xref: /minix3/bin/ksh/lex.c (revision 2718b5688b1550d32bf379153192626eee37752d)
1 /*	$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 #include <sys/cdefs.h>
7 
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.15 2011/10/16 17:12:11 joerg Exp $");
10 #endif
11 
12 
13 #include "sh.h"
14 #include <ctype.h>
15 
16 
17 /* Structure to keep track of the lexing state and the various pieces of info
18  * needed for each particular state.
19  */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 	int ls_state;
23 	union {
24 	    /* $(...) */
25 	    struct scsparen_info {
26 		    int nparen;		/* count open parenthesis */
27 		    int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 	    } u_scsparen;
30 
31 	    /* $((...)) */
32 	    struct sasparen_info {
33 		    int nparen;		/* count open parenthesis */
34 		    int start;		/* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 	    } u_sasparen;
37 
38 	    /* ((...)) */
39 	    struct sletparen_info {
40 		    int nparen;		/* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 	    } u_sletparen;
43 
44 	    /* `...` */
45 	    struct sbquote_info {
46 		    int indquotes;	/* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 	    } u_sbquote;
49 
50 	    Lex_state *base;		/* used to point to next state block */
51 	} ls_info;
52 };
53 
54 typedef struct State_info State_info;
55 struct State_info {
56 	Lex_state	*base;
57 	Lex_state	*end;
58 };
59 
60 
61 static void	readhere ARGS((struct ioword *iop));
62 static int	getsc__ ARGS((void));
63 static void	getsc_line ARGS((Source *s));
64 static int	getsc_bn ARGS((void));
65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
66 static int	arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void	gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71 
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74 
75 /* optimized getsc_bn() */
76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
77 			 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
80 
81 #define STATE_BSIZE	32
82 
83 #define PUSH_STATE(s)	do { \
84 			    if (++statep == state_info.end) \
85 				statep = push_state_(&state_info, statep); \
86 			    state = statep->ls_state = (s); \
87 			} while (0)
88 
89 #define POP_STATE()	do { \
90 			    if (--statep == state_info.base) \
91 				statep = pop_state_(&state_info, statep); \
92 			    state = statep->ls_state; \
93 			} while (0)
94 
95 
96 
97 /*
98  * Lexical analyzer
99  *
100  * tokens are not regular expressions, they are LL(1).
101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102  * hence the state stack.
103  */
104 
105 int
yylex(cf)106 yylex(cf)
107 	int cf;
108 {
109 	Lex_state states[STATE_BSIZE], *statep;
110 	State_info state_info;
111 	register int c, state;
112 	XString ws;		/* expandable output word */
113 	register char *wp;	/* output word pointer */
114 	char *sp, *dp;
115 	int c2;
116 
117 
118   Again:
119 	states[0].ls_state = -1;
120 	states[0].ls_info.base = (Lex_state *) 0;
121 	statep = &states[1];
122 	state_info.base = states;
123 	state_info.end = &states[STATE_BSIZE];
124 
125 	Xinit(ws, wp, 64, ATEMP);
126 
127 	backslash_skip = 0;
128 	ignore_backslash_newline = 0;
129 
130 	if (cf&ONEWORD)
131 		state = SWORD;
132 #ifdef KSH
133 	else if (cf&LETEXPR) {
134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
135 		state = SLETPAREN;
136 		statep->ls_sletparen.nparen = 0;
137 	}
138 #endif /* KSH */
139 	else {		/* normal lexing */
140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 		while ((c = getsc()) == ' ' || c == '\t')
142 			;
143 		if (c == '#') {
144 			ignore_backslash_newline++;
145 			while ((c = getsc()) != '\0' && c != '\n')
146 				;
147 			ignore_backslash_newline--;
148 		}
149 		ungetsc(c);
150 	}
151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
152 		source->flags &= ~SF_ALIAS;
153 		/* In POSIX mode, a trailing space only counts if we are
154 		 * parsing a simple command
155 		 */
156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
157 			cf |= ALIAS;
158 	}
159 
160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 	statep->ls_state = state;
162 
163 	/* collect non-special or quoted characters to form word */
164 	while (!((c = getsc()) == 0
165 		 || ((state == SBASE || state == SHEREDELIM)
166 		     && ctype(c, C_LEX1))))
167 	{
168 		Xcheck(ws, wp);
169 		switch (state) {
170 		  case SBASE:
171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 				*wp = EOS; /* temporary */
173 				if (is_wdvarname(Xstring(ws, wp), FALSE))
174 				{
175 					char *p, *tmp;
176 
177 					if (arraysub(&tmp)) {
178 						*wp++ = CHAR;
179 						*wp++ = c;
180 						for (p = tmp; *p; ) {
181 							Xcheck(ws, wp);
182 							*wp++ = CHAR;
183 							*wp++ = *p++;
184 						}
185 						afree(tmp, ATEMP);
186 						break;
187 					} else {
188 						Source *s;
189 
190 						s = pushs(SREREAD,
191 							  source->areap);
192 						s->start = s->str
193 							= s->u.freeme = tmp;
194 						s->next = source;
195 						source = s;
196 					}
197 				}
198 				*wp++ = CHAR;
199 				*wp++ = c;
200 				break;
201 			}
202 			/* fall through.. */
203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 			if (c == '*' || c == '@' || c == '+' || c == '?'
206 			    || c == '!')
207 			{
208 				c2 = getsc();
209 				if (c2 == '(' /*)*/ ) {
210 					*wp++ = OPAT;
211 					*wp++ = c;
212 					PUSH_STATE(SPATTERN);
213 					break;
214 				}
215 				ungetsc(c2);
216 			}
217 #endif /* KSH */
218 			/* fall through.. */
219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
220 			switch (c) {
221 			  case '\\':
222 				c = getsc();
223 #ifdef OS2
224 				if (isalnum((unsigned char)c)) {
225 					*wp++ = CHAR, *wp++ = '\\';
226 					*wp++ = CHAR, *wp++ = c;
227 				} else
228 #endif
229 				if (c) /* trailing \ is lost */
230 					*wp++ = QCHAR, *wp++ = c;
231 				break;
232 			  case '\'':
233 				*wp++ = OQUOTE;
234 				ignore_backslash_newline++;
235 				PUSH_STATE(SSQUOTE);
236 				break;
237 			  case '"':
238 				*wp++ = OQUOTE;
239 				PUSH_STATE(SDQUOTE);
240 				break;
241 			  default:
242 				goto Subst;
243 			}
244 			break;
245 
246 		  Subst:
247 			switch (c) {
248 			  Lex_state *s;
249 			  Lex_state *base;
250 
251 			  case '\\':
252 				c = getsc();
253 				switch (c) {
254 				  case '\\':
255 				  case '$': case '`':
256 					*wp++ = QCHAR, *wp++ = c;
257 					break;
258 				  case '"':
259 					if ((cf & HEREDOC) == 0) {
260 						*wp++ = QCHAR, *wp++ = c;
261 						break;
262 					}
263 					/* FALLTROUGH */
264 				  default:
265 					Xcheck(ws, wp);
266 					if (c) { /* trailing \ is lost */
267 						*wp++ = CHAR, *wp++ = '\\';
268 						*wp++ = CHAR, *wp++ = c;
269 					}
270 					break;
271 				}
272 				break;
273 			  case '$':
274 				c = getsc();
275 				if (c == '(') /*)*/ {
276 					c = getsc();
277 					if (c == '(') /*)*/ {
278 						PUSH_STATE(SASPAREN);
279 						statep->ls_sasparen.nparen = 2;
280 						statep->ls_sasparen.start =
281 							Xsavepos(ws, wp);
282 						*wp++ = EXPRSUB;
283 					} else {
284 						ungetsc(c);
285 						PUSH_STATE(SCSPAREN);
286 						statep->ls_scsparen.nparen = 1;
287 						statep->ls_scsparen.csstate = 0;
288 						*wp++ = COMSUB;
289 					}
290 				} else if (c == '{') /*}*/ {
291 					*wp++ = OSUBST;
292 					*wp++ = '{'; /*}*/
293 					wp = get_brace_var(&ws, wp);
294 					c = getsc();
295 					/* allow :# and :% (ksh88 compat) */
296 					if (c == ':') {
297 						*wp++ = CHAR, *wp++ = c;
298 						c = getsc();
299 					}
300 					/* If this is a trim operation,
301 					 * treat (,|,) specially in STBRACE.
302 					 */
303 					if (c == '#' || c == '%') {
304 						ungetsc(c);
305 						PUSH_STATE(STBRACE);
306 					} else {
307 						ungetsc(c);
308 						PUSH_STATE(SBRACE);
309 					}
310 				} else if (ctype(c, C_ALPHA)) {
311 					*wp++ = OSUBST;
312 					*wp++ = 'X';
313 					do {
314 						Xcheck(ws, wp);
315 						*wp++ = c;
316 						c = getsc();
317 					} while (ctype(c, C_ALPHA|C_DIGIT));
318 					*wp++ = '\0';
319 					*wp++ = CSUBST;
320 					*wp++ = 'X';
321 					ungetsc(c);
322 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
323 					Xcheck(ws, wp);
324 					*wp++ = OSUBST;
325 					*wp++ = 'X';
326 					*wp++ = c;
327 					*wp++ = '\0';
328 					*wp++ = CSUBST;
329 					*wp++ = 'X';
330 				} else {
331 					*wp++ = CHAR, *wp++ = '$';
332 					ungetsc(c);
333 				}
334 				break;
335 			  case '`':
336 				PUSH_STATE(SBQUOTE);
337 				*wp++ = COMSUB;
338 				/* Need to know if we are inside double quotes
339 				 * since sh/at&t-ksh translate the \" to " in
340 				 * "`..\"..`".  POSIX also requires this.
341 				 * An earlier version of ksh misinterpreted
342 				 * the POSIX specification and performed
343 				 * removal of backslash escapes only if
344 				 * posix mode was not in effect.
345 				 */
346 				statep->ls_sbquote.indquotes = 0;
347 				s = statep;
348 				base = state_info.base;
349 				while (1) {
350 					for (; s != base; s--) {
351 						if (s->ls_state == SDQUOTE) {
352 							statep->ls_sbquote.indquotes = 1;
353 							break;
354 						}
355 					}
356 					if (s != base)
357 						break;
358 					if (!(s = s->ls_info.base))
359 						break;
360 					base = s-- - STATE_BSIZE;
361 				}
362 				break;
363 			  default:
364 				*wp++ = CHAR, *wp++ = c;
365 			}
366 			break;
367 
368 		  case SSQUOTE:
369 			if (c == '\'') {
370 				POP_STATE();
371 				*wp++ = CQUOTE;
372 				ignore_backslash_newline--;
373 			} else
374 				*wp++ = QCHAR, *wp++ = c;
375 			break;
376 
377 		  case SDQUOTE:
378 			if (c == '"') {
379 				POP_STATE();
380 				*wp++ = CQUOTE;
381 			} else
382 				goto Subst;
383 			break;
384 
385 		  case SCSPAREN: /* $( .. ) */
386 			/* todo: deal with $(...) quoting properly
387 			 * kludge to partly fake quoting inside $(..): doesn't
388 			 * really work because nested $(..) or ${..} inside
389 			 * double quotes aren't dealt with.
390 			 */
391 			switch (statep->ls_scsparen.csstate) {
392 			  case 0: /* normal */
393 				switch (c) {
394 				  case '(':
395 					statep->ls_scsparen.nparen++;
396 					break;
397 				  case ')':
398 					statep->ls_scsparen.nparen--;
399 					break;
400 				  case '\\':
401 					statep->ls_scsparen.csstate = 1;
402 					break;
403 				  case '"':
404 					statep->ls_scsparen.csstate = 2;
405 					break;
406 				  case '\'':
407 					statep->ls_scsparen.csstate = 4;
408 					ignore_backslash_newline++;
409 					break;
410 				}
411 				break;
412 
413 			  case 1: /* backslash in normal mode */
414 			  case 3: /* backslash in double quotes */
415 				--statep->ls_scsparen.csstate;
416 				break;
417 
418 			  case 2: /* double quotes */
419 				if (c == '"')
420 					statep->ls_scsparen.csstate = 0;
421 				else if (c == '\\')
422 					statep->ls_scsparen.csstate = 3;
423 				break;
424 
425 			  case 4: /* single quotes */
426 				if (c == '\'') {
427 					statep->ls_scsparen.csstate = 0;
428 					ignore_backslash_newline--;
429 				}
430 				break;
431 			}
432 			if (statep->ls_scsparen.nparen == 0) {
433 				POP_STATE();
434 				*wp++ = 0; /* end of COMSUB */
435 			} else
436 				*wp++ = c;
437 			break;
438 
439 		  case SASPAREN: /* $(( .. )) */
440 			/* todo: deal with $((...); (...)) properly */
441 			/* XXX should nest using existing state machine
442 			 *     (embed "..", $(...), etc.) */
443 			if (c == '(')
444 				statep->ls_sasparen.nparen++;
445 			else if (c == ')') {
446 				statep->ls_sasparen.nparen--;
447 				if (statep->ls_sasparen.nparen == 1) {
448 					/*(*/
449 					if ((c2 = getsc()) == ')') {
450 						POP_STATE();
451 						*wp++ = 0; /* end of EXPRSUB */
452 						break;
453 					} else {
454 						char *s;
455 
456 						ungetsc(c2);
457 						/* mismatched parenthesis -
458 						 * assume we were really
459 						 * parsing a $(..) expression
460 						 */
461 						s = Xrestpos(ws, wp,
462 						     statep->ls_sasparen.start);
463 						memmove(s + 1, s, wp - s);
464 						*s++ = COMSUB;
465 						*s = '('; /*)*/
466 						wp++;
467 						statep->ls_scsparen.nparen = 1;
468 						statep->ls_scsparen.csstate = 0;
469 						state = statep->ls_state
470 							= SCSPAREN;
471 
472 					}
473 				}
474 			}
475 			*wp++ = c;
476 			break;
477 
478 		  case SBRACE:
479 			/*{*/
480 			if (c == '}') {
481 				POP_STATE();
482 				*wp++ = CSUBST;
483 				*wp++ = /*{*/ '}';
484 			} else
485 				goto Sbase1;
486 			break;
487 
488 		  case STBRACE:
489 			/* Same as SBRACE, except (,|,) treated specially */
490 			/*{*/
491 			if (c == '}') {
492 				POP_STATE();
493 				*wp++ = CSUBST;
494 				*wp++ = /*{*/ '}';
495 			} else if (c == '|') {
496 				*wp++ = SPAT;
497 			} else if (c == '(') {
498 				*wp++ = OPAT;
499 				*wp++ = ' ';	/* simile for @ */
500 				PUSH_STATE(SPATTERN);
501 			} else
502 				goto Sbase1;
503 			break;
504 
505 		  case SBQUOTE:
506 			if (c == '`') {
507 				*wp++ = 0;
508 				POP_STATE();
509 			} else if (c == '\\') {
510 				switch (c = getsc()) {
511 				  case '\\':
512 				  case '$': case '`':
513 					*wp++ = c;
514 					break;
515 				  case '"':
516 					if (statep->ls_sbquote.indquotes) {
517 						*wp++ = c;
518 						break;
519 					}
520 					/* fall through.. */
521 				  default:
522 					if (c) { /* trailing \ is lost */
523 						*wp++ = '\\';
524 						*wp++ = c;
525 					}
526 					break;
527 				}
528 			} else
529 				*wp++ = c;
530 			break;
531 
532 		  case SWORD:	/* ONEWORD */
533 			goto Subst;
534 
535 #ifdef KSH
536 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
537 			/*(*/
538 			if (c == ')') {
539 				if (statep->ls_sletparen.nparen > 0)
540 				    --statep->ls_sletparen.nparen;
541 				/*(*/
542 				else if ((c2 = getsc()) == ')') {
543 					c = 0;
544 					*wp++ = CQUOTE;
545 					goto Done;
546 				} else
547 					ungetsc(c2);
548 			} else if (c == '(')
549 				/* parenthesis inside quotes and backslashes
550 				 * are lost, but at&t ksh doesn't count them
551 				 * either
552 				 */
553 				++statep->ls_sletparen.nparen;
554 			goto Sbase2;
555 #endif /* KSH */
556 
557 		  case SHEREDELIM:	/* <<,<<- delimiter */
558 			/* XXX chuck this state (and the next) - use
559 			 * the existing states ($ and \`..` should be
560 			 * stripped of their specialness after the
561 			 * fact).
562 			 */
563 			/* here delimiters need a special case since
564 			 * $ and `..` are not to be treated specially
565 			 */
566 			if (c == '\\') {
567 				c = getsc();
568 				if (c) { /* trailing \ is lost */
569 					*wp++ = QCHAR;
570 					*wp++ = c;
571 				}
572 			} else if (c == '\'') {
573 				PUSH_STATE(SSQUOTE);
574 				*wp++ = OQUOTE;
575 				ignore_backslash_newline++;
576 			} else if (c == '"') {
577 				state = statep->ls_state = SHEREDQUOTE;
578 				*wp++ = OQUOTE;
579 			} else {
580 				*wp++ = CHAR;
581 				*wp++ = c;
582 			}
583 			break;
584 
585 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
586 			if (c == '"') {
587 				*wp++ = CQUOTE;
588 				state = statep->ls_state = SHEREDELIM;
589 			} else {
590 				if (c == '\\') {
591 					switch (c = getsc()) {
592 					  case '\\': case '"':
593 					  case '$': case '`':
594 						break;
595 					  default:
596 						if (c) { /* trailing \ lost */
597 							*wp++ = CHAR;
598 							*wp++ = '\\';
599 						}
600 						break;
601 					}
602 				}
603 				*wp++ = CHAR;
604 				*wp++ = c;
605 			}
606 			break;
607 
608 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
609 			if ( /*(*/ c == ')') {
610 				*wp++ = CPAT;
611 				POP_STATE();
612 			} else if (c == '|') {
613 				*wp++ = SPAT;
614 			} else if (c == '(') {
615 				*wp++ = OPAT;
616 				*wp++ = ' ';	/* simile for @ */
617 				PUSH_STATE(SPATTERN);
618 			} else
619 				goto Sbase1;
620 			break;
621 		}
622 	}
623 Done:
624 	Xcheck(ws, wp);
625 	if (statep != &states[1])
626 		/* XXX figure out what is missing */
627 		yyerror("no closing quote\n");
628 
629 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
630 	if (state == SHEREDELIM)
631 		state = SBASE;
632 
633 	dp = Xstring(ws, wp);
634 	if ((c == '<' || c == '>') && state == SBASE
635 	    && ((c2 = Xlength(ws, wp)) == 0
636 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
637 	{
638 		struct ioword *iop =
639 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
640 
641 		if (c2 == 2)
642 			iop->unit = dp[1] - '0';
643 		else
644 			iop->unit = c == '>'; /* 0 for <, 1 for > */
645 
646 		c2 = getsc();
647 		/* <<, >>, <> are ok, >< is not */
648 		if (c == c2 || (c == '<' && c2 == '>')) {
649 			iop->flag = c == c2 ?
650 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
651 			if (iop->flag == IOHERE) {
652 				if ((c2 = getsc()) == '-') {
653 					iop->flag |= IOSKIP;
654 				} else {
655 					ungetsc(c2);
656 				}
657 			}
658 		} else if (c2 == '&')
659 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
660 		else {
661 			iop->flag = c == '>' ? IOWRITE : IOREAD;
662 			if (c == '>' && c2 == '|')
663 				iop->flag |= IOCLOB;
664 			else
665 				ungetsc(c2);
666 		}
667 
668 		iop->name = (char *) 0;
669 		iop->delim = (char *) 0;
670 		iop->heredoc = (char *) 0;
671 		Xfree(ws, wp);	/* free word */
672 		yylval.iop = iop;
673 		return REDIR;
674 	}
675 
676 	if (wp == dp && state == SBASE) {
677 		Xfree(ws, wp);	/* free word */
678 		/* no word, process LEX1 character */
679 		switch (c) {
680 		  default:
681 			return c;
682 
683 		  case '|':
684 		  case '&':
685 		  case ';':
686 			if ((c2 = getsc()) == c)
687 				c = (c == ';') ? BREAK :
688 				    (c == '|') ? LOGOR :
689 				    (c == '&') ? LOGAND :
690 				    YYERRCODE;
691 #ifdef KSH
692 			else if (c == '|' && c2 == '&')
693 				c = COPROC;
694 #endif /* KSH */
695 			else
696 				ungetsc(c2);
697 			return c;
698 
699 		  case '\n':
700 			gethere();
701 			if (cf & CONTIN)
702 				goto Again;
703 			return c;
704 
705 		  case '(':  /*)*/
706 #ifdef KSH
707 			if ((c2 = getsc()) == '(') /*)*/
708 				/* XXX need to handle ((...); (...)) */
709 				c = MDPAREN;
710 			else
711 				ungetsc(c2);
712 #endif /* KSH */
713 			return c;
714 		  /*(*/
715 		  case ')':
716 			return c;
717 		}
718 	}
719 
720 	*wp++ = EOS;		/* terminate word */
721 	yylval.cp = Xclose(ws, wp);
722 	if (state == SWORD
723 #ifdef KSH
724 		|| state == SLETPAREN
725 #endif /* KSH */
726 		)	/* ONEWORD? */
727 		return LWORD;
728 	ungetsc(c);		/* unget terminator */
729 
730 	/* copy word to unprefixed string ident */
731 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
732 		*dp++ = *sp++;
733 	/* Make sure the ident array stays '\0' padded */
734 	memset(dp, 0, (ident+IDENT) - dp + 1);
735 	if (c != EOS)
736 		*ident = '\0';	/* word is not unquoted */
737 
738 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
739 		struct tbl *p;
740 		int h = hash(ident);
741 
742 		/* { */
743 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
744 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
745 		{
746 			afree(yylval.cp, ATEMP);
747 			return p->val.i;
748 		}
749 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
750 		    && (p->flag & ISSET))
751 		{
752 			register Source *s;
753 
754 			for (s = source; s->type == SALIAS; s = s->next)
755 				if (s->u.tblp == p)
756 					return LWORD;
757 			/* push alias expansion */
758 			s = pushs(SALIAS, source->areap);
759 			s->start = s->str = p->val.s;
760 			s->u.tblp = p;
761 			s->next = source;
762 			source = s;
763 			afree(yylval.cp, ATEMP);
764 			goto Again;
765 		}
766 	}
767 
768 	return LWORD;
769 }
770 
771 static void
gethere()772 gethere()
773 {
774 	register struct ioword **p;
775 
776 	for (p = heres; p < herep; p++)
777 		readhere(*p);
778 	herep = heres;
779 }
780 
781 /*
782  * read "<<word" text into temp file
783  */
784 
785 static void
readhere(iop)786 readhere(iop)
787 	struct ioword *iop;
788 {
789 	register int c;
790 	char *volatile eof;
791 	char *eofp;
792 	int skiptabs;
793 	XString xs;
794 	char *xp;
795 	int xpos;
796 
797 	eof = evalstr(iop->delim, 0);
798 
799 	if (!(iop->flag & IOEVAL))
800 		ignore_backslash_newline++;
801 
802 	Xinit(xs, xp, 256, ATEMP);
803 
804 	for (;;) {
805 		eofp = eof;
806 		skiptabs = iop->flag & IOSKIP;
807 		xpos = Xsavepos(xs, xp);
808 		while ((c = getsc()) != 0) {
809 			if (skiptabs) {
810 				if (c == '\t')
811 					continue;
812 				skiptabs = 0;
813 			}
814 			if (c != *eofp)
815 				break;
816 			Xcheck(xs, xp);
817 			Xput(xs, xp, c);
818 			eofp++;
819 		}
820 		/* Allow EOF here so commands with out trailing newlines
821 		 * will work (eg, ksh -c '...', $(...), etc).
822 		 */
823 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
824 			xp = Xrestpos(xs, xp, xpos);
825 			break;
826 		}
827 		ungetsc(c);
828 		while ((c = getsc()) != '\n') {
829 			if (c == 0)
830 				yyerror("here document `%s' unclosed\n", eof);
831 			Xcheck(xs, xp);
832 			Xput(xs, xp, c);
833 		}
834 		Xcheck(xs, xp);
835 		Xput(xs, xp, c);
836 	}
837 	Xput(xs, xp, '\0');
838 	iop->heredoc = Xclose(xs, xp);
839 
840 	if (!(iop->flag & IOEVAL))
841 		ignore_backslash_newline--;
842 }
843 
844 void
845 #ifdef HAVE_PROTOTYPES
yyerror(const char * fmt,...)846 yyerror(const char *fmt, ...)
847 #else
848 yyerror(fmt, va_alist)
849 	const char *fmt;
850 	va_dcl
851 #endif
852 {
853 	va_list va;
854 
855 	/* pop aliases and re-reads */
856 	while (source->type == SALIAS || source->type == SREREAD)
857 		source = source->next;
858 	source->str = null;	/* zap pending input */
859 
860 	error_prefix(TRUE);
861 	SH_VA_START(va, fmt);
862 	shf_vfprintf(shl_out, fmt, va);
863 	va_end(va);
864 	errorf("%s", null);
865 }
866 
867 /*
868  * input for yylex with alias expansion
869  */
870 
871 Source *
pushs(type,areap)872 pushs(type, areap)
873 	int type;
874 	Area *areap;
875 {
876 	register Source *s;
877 
878 	s = (Source *) alloc(sizeof(Source), areap);
879 	s->type = type;
880 	s->str = null;
881 	s->start = NULL;
882 	s->line = 0;
883 	s->errline = 0;
884 	s->file = NULL;
885 	s->flags = 0;
886 	s->next = NULL;
887 	s->areap = areap;
888 	if (type == SFILE || type == SSTDIN) {
889 		char *dummy;
890 		Xinit(s->xs, dummy, 256, s->areap);
891 	} else
892 		memset(&s->xs, 0, sizeof(s->xs));
893 	return s;
894 }
895 
896 static int
getsc__()897 getsc__()
898 {
899 	register Source *s = source;
900 	register int c;
901 
902 	while ((c = *s->str++) == 0) {
903 		s->str = NULL;		/* return 0 for EOF by default */
904 		switch (s->type) {
905 		  case SEOF:
906 			s->str = null;
907 			return 0;
908 
909 		  case SSTDIN:
910 		  case SFILE:
911 			getsc_line(s);
912 			break;
913 
914 		  case SWSTR:
915 			break;
916 
917 		  case SSTRING:
918 			break;
919 
920 		  case SWORDS:
921 			s->start = s->str = *s->u.strv++;
922 			s->type = SWORDSEP;
923 			break;
924 
925 		  case SWORDSEP:
926 			if (*s->u.strv == NULL) {
927 				s->start = s->str = newline;
928 				s->type = SEOF;
929 			} else {
930 				s->start = s->str = space;
931 				s->type = SWORDS;
932 			}
933 			break;
934 
935 		  case SALIAS:
936 			if (s->flags & SF_ALIASEND) {
937 				/* pass on an unused SF_ALIAS flag */
938 				source = s->next;
939 				source->flags |= s->flags & SF_ALIAS;
940 				s = source;
941 			} else if (*s->u.tblp->val.s
942 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
943 			{
944 				source = s = s->next;	/* pop source stack */
945 				/* Note that this alias ended with a space,
946 				 * enabling alias expansion on the following
947 				 * word.
948 				 */
949 				s->flags |= SF_ALIAS;
950 			} else {
951 				/* At this point, we need to keep the current
952 				 * alias in the source list so recursive
953 				 * aliases can be detected and we also need
954 				 * to return the next character.  Do this
955 				 * by temporarily popping the alias to get
956 				 * the next character and then put it back
957 				 * in the source list with the SF_ALIASEND
958 				 * flag set.
959 				 */
960 				source = s->next;	/* pop source stack */
961 				source->flags |= s->flags & SF_ALIAS;
962 				c = getsc__();
963 				if (c) {
964 					s->flags |= SF_ALIASEND;
965 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
966 					s->start = s->str = s->ugbuf;
967 					s->next = source;
968 					source = s;
969 				} else {
970 					s = source;
971 					/* avoid reading eof twice */
972 					s->str = NULL;
973 					break;
974 				}
975 			}
976 			continue;
977 
978 		  case SREREAD:
979 			if (s->start != s->ugbuf) /* yuck */
980 				afree(s->u.freeme, ATEMP);
981 			source = s = s->next;
982 			continue;
983 		}
984 		if (s->str == NULL) {
985 			s->type = SEOF;
986 			s->start = s->str = null;
987 			return '\0';
988 		}
989 		if (s->flags & SF_ECHO) {
990 			shf_puts(s->str, shl_out);
991 			shf_flush(shl_out);
992 		}
993 	}
994 	return c;
995 }
996 
997 static void
getsc_line(s)998 getsc_line(s)
999 	Source *s;
1000 {
1001 	char *xp = Xstring(s->xs, xp);
1002 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1003 	int have_tty = interactive && (s->flags & SF_TTY);
1004 
1005 	/* Done here to ensure nothing odd happens when a timeout occurs */
1006 	XcheckN(s->xs, xp, LINE);
1007 	*xp = '\0';
1008 	s->start = s->str = xp;
1009 
1010 #ifdef KSH
1011 	if (have_tty && ksh_tmout) {
1012 		ksh_tmout_state = TMOUT_READING;
1013 		alarm(ksh_tmout);
1014 	}
1015 #endif /* KSH */
1016 #ifdef EDIT
1017 	if (have_tty && (0
1018 # ifdef VI
1019 			 || Flag(FVI)
1020 # endif /* VI */
1021 # ifdef EMACS
1022 			 || Flag(FEMACS) || Flag(FGMACS)
1023 # endif /* EMACS */
1024 		))
1025 	{
1026 		int nread;
1027 
1028 		nread = x_read(xp, LINE);
1029 		if (nread < 0)	/* read error */
1030 			nread = 0;
1031 		xp[nread] = '\0';
1032 		xp += nread;
1033 	}
1034 	else
1035 #endif /* EDIT */
1036 	{
1037 		if (interactive) {
1038 			pprompt(prompt, 0);
1039 		} else
1040 			s->line++;
1041 
1042 		while (1) {
1043 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1044 
1045 			if (!p && shf_error(s->u.shf)
1046 			    && shf_errno(s->u.shf) == EINTR)
1047 			{
1048 				shf_clearerr(s->u.shf);
1049 				if (trap)
1050 					runtraps(0);
1051 				continue;
1052 			}
1053 			if (!p || (xp = p, xp[-1] == '\n'))
1054 				break;
1055 			/* double buffer size */
1056 			xp++; /* move past null so doubling works... */
1057 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1058 			xp--; /* ...and move back again */
1059 		}
1060 		/* flush any unwanted input so other programs/builtins
1061 		 * can read it.  Not very optimal, but less error prone
1062 		 * than flushing else where, dealing with redirections,
1063 		 * etc..
1064 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1065 		 */
1066 		if (s->type == SSTDIN)
1067 			shf_flush(s->u.shf);
1068 	}
1069 	/* XXX: temporary kludge to restore source after a
1070 	 * trap may have been executed.
1071 	 */
1072 	source = s;
1073 #ifdef KSH
1074 	if (have_tty && ksh_tmout)
1075 	{
1076 		ksh_tmout_state = TMOUT_EXECUTING;
1077 		alarm(0);
1078 	}
1079 #endif /* KSH */
1080 	s->start = s->str = Xstring(s->xs, xp);
1081 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1082 	/* Note: if input is all nulls, this is not eof */
1083 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1084 		if (s->type == SFILE)
1085 			shf_fdclose(s->u.shf);
1086 		s->str = NULL;
1087 	} else if (interactive) {
1088 #ifdef HISTORY
1089 		char *p = Xstring(s->xs, xp);
1090 		if (cur_prompt == PS1)
1091 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1092 				p++;
1093 		if (*p) {
1094 # ifdef EASY_HISTORY
1095 			if (cur_prompt == PS2)
1096 				histappend(Xstring(s->xs, xp), 1);
1097 			else
1098 # endif /* EASY_HISTORY */
1099 			{
1100 				s->line++;
1101 				histsave(s->line, s->str, 1);
1102 			}
1103 		}
1104 #endif /* HISTORY */
1105 	}
1106 	if (interactive)
1107 		set_prompt(PS2, (Source *) 0);
1108 }
1109 
1110 void
set_prompt(to,s)1111 set_prompt(to, s)
1112 	int to;
1113 	Source *s;
1114 {
1115 	cur_prompt = to;
1116 
1117 	switch (to) {
1118 	case PS1: /* command */
1119 #ifdef KSH
1120 		/* Substitute ! and !! here, before substitutions are done
1121 		 * so ! in expanded variables are not expanded.
1122 		 * NOTE: this is not what at&t ksh does (it does it after
1123 		 * substitutions, POSIX doesn't say which is to be done.
1124 		 */
1125 		{
1126 			struct shf *shf;
1127 			char * volatile ps1;
1128 			Area *saved_atemp;
1129 
1130 			ps1 = str_val(global("PS1"));
1131 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1132 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1133 			while (*ps1) {
1134 				if (*ps1 != '!' || *++ps1 == '!')
1135 					shf_putchar(*ps1++, shf);
1136 				else
1137 					shf_fprintf(shf, "%d",
1138 						s ? s->line + 1 : 0);
1139 			}
1140 			ps1 = shf_sclose(shf);
1141 			saved_atemp = ATEMP;
1142 			newenv(E_ERRH);
1143 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1144 				prompt = safe_prompt;
1145 				/* Don't print an error - assume it has already
1146 				 * been printed.  Reason is we may have forked
1147 				 * to run a command and the child may be
1148 				 * unwinding its stack through this code as it
1149 				 * exits.
1150 				 */
1151 			} else
1152 				prompt = str_save(substitute(ps1, 0),
1153 						 saved_atemp);
1154 			quitenv();
1155 		}
1156 #else /* KSH */
1157 		prompt = str_val(global("PS1"));
1158 #endif /* KSH */
1159 		break;
1160 
1161 	case PS2: /* command continuation */
1162 		prompt = str_val(global("PS2"));
1163 		break;
1164 	}
1165 }
1166 
1167 /* See also related routine, promptlen() in edit.c */
1168 void
pprompt(cp,ntruncate)1169 pprompt(cp, ntruncate)
1170 	const char *cp;
1171 	int ntruncate;
1172 {
1173 #if 0
1174 	char nbuf[32];
1175 	int c;
1176 
1177 	while (*cp != 0) {
1178 		if (*cp != '!')
1179 			c = *cp++;
1180 		else if (*++cp == '!')
1181 			c = *cp++;
1182 		else {
1183 			int len;
1184 			char *p;
1185 
1186 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1187 				source->line + 1);
1188 			len = strlen(nbuf);
1189 			if (ntruncate) {
1190 				if (ntruncate >= len) {
1191 					ntruncate -= len;
1192 					continue;
1193 				}
1194 				p += ntruncate;
1195 				len -= ntruncate;
1196 				ntruncate = 0;
1197 			}
1198 			shf_write(p, len, shl_out);
1199 			continue;
1200 		}
1201 		if (ntruncate)
1202 			--ntruncate;
1203 		else
1204 			shf_putc(c, shl_out);
1205 	}
1206 #endif /* 0 */
1207 	shf_puts(cp + ntruncate, shl_out);
1208 	shf_flush(shl_out);
1209 }
1210 
1211 /* Read the variable part of a ${...} expression (ie, up to but not including
1212  * the :[-+?=#%] or close-brace.
1213  */
1214 static char *
get_brace_var(wsp,wp)1215 get_brace_var(wsp, wp)
1216 	XString *wsp;
1217 	char *wp;
1218 {
1219 	enum parse_state {
1220 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1221 			   PS_NUMBER, PS_VAR1, PS_END
1222 			 }
1223 		state;
1224 	char c;
1225 
1226 	state = PS_INITIAL;
1227 	while (1) {
1228 		c = getsc();
1229 		/* State machine to figure out where the variable part ends. */
1230 		switch (state) {
1231 		  case PS_INITIAL:
1232 			if (c == '#') {
1233 				state = PS_SAW_HASH;
1234 				break;
1235 			}
1236 			/* fall through.. */
1237 		  case PS_SAW_HASH:
1238 			if (letter(c))
1239 				state = PS_IDENT;
1240 			else if (digit(c))
1241 				state = PS_NUMBER;
1242 			else if (ctype(c, C_VAR1))
1243 				state = PS_VAR1;
1244 			else
1245 				state = PS_END;
1246 			break;
1247 		  case PS_IDENT:
1248 			if (!letnum(c)) {
1249 				state = PS_END;
1250 				if (c == '[') {
1251 					char *tmp, *p;
1252 
1253 					if (!arraysub(&tmp))
1254 						yyerror("missing ]\n");
1255 					*wp++ = c;
1256 					for (p = tmp; *p; ) {
1257 						Xcheck(*wsp, wp);
1258 						*wp++ = *p++;
1259 					}
1260 					afree(tmp, ATEMP);
1261 					c = getsc(); /* the ] */
1262 				}
1263 			}
1264 			break;
1265 		  case PS_NUMBER:
1266 			if (!digit(c))
1267 				state = PS_END;
1268 			break;
1269 		  case PS_VAR1:
1270 			state = PS_END;
1271 			break;
1272 		  case PS_END: /* keep gcc happy */
1273 			break;
1274 		}
1275 		if (state == PS_END) {
1276 			*wp++ = '\0';	/* end of variable part */
1277 			ungetsc(c);
1278 			break;
1279 		}
1280 		Xcheck(*wsp, wp);
1281 		*wp++ = c;
1282 	}
1283 	return wp;
1284 }
1285 
1286 /*
1287  * Save an array subscript - returns true if matching bracket found, false
1288  * if eof or newline was found.
1289  * (Returned string double null terminated)
1290  */
1291 static int
arraysub(strp)1292 arraysub(strp)
1293 	char **strp;
1294 {
1295 	XString ws;
1296 	char	*wp;
1297 	char	c;
1298 	int 	depth = 1;	/* we are just past the initial [ */
1299 
1300 	Xinit(ws, wp, 32, ATEMP);
1301 
1302 	do {
1303 		c = getsc();
1304 		Xcheck(ws, wp);
1305 		*wp++ = c;
1306 		if (c == '[')
1307 			depth++;
1308 		else if (c == ']')
1309 			depth--;
1310 	} while (depth > 0 && c && c != '\n');
1311 
1312 	*wp++ = '\0';
1313 	*strp = Xclose(ws, wp);
1314 
1315 	return depth == 0 ? 1 : 0;
1316 }
1317 
1318 /* Unget a char: handles case when we are already at the start of the buffer */
1319 static const char *
ungetsc(c)1320 ungetsc(c)
1321 	int c;
1322 {
1323 	if (backslash_skip)
1324 		backslash_skip--;
1325 	/* Don't unget eof... */
1326 	if (source->str == null && c == '\0')
1327 		return source->str;
1328 	if (source->str > source->start)
1329 		source->str--;
1330 	else {
1331 		Source *s;
1332 
1333 		s = pushs(SREREAD, source->areap);
1334 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335 		s->start = s->str = s->ugbuf;
1336 		s->next = source;
1337 		source = s;
1338 	}
1339 	return source->str;
1340 }
1341 
1342 
1343 /* Called to get a char that isn't a \newline sequence. */
1344 static int
getsc_bn(void)1345 getsc_bn ARGS((void))
1346 {
1347 	int c, c2;
1348 
1349 	if (ignore_backslash_newline)
1350 		return getsc_();
1351 
1352 	if (backslash_skip == 1) {
1353 		backslash_skip = 2;
1354 		return getsc_();
1355 	}
1356 
1357 	backslash_skip = 0;
1358 
1359 	while (1) {
1360 		c = getsc_();
1361 		if (c == '\\') {
1362 			if ((c2 = getsc_()) == '\n')
1363 				/* ignore the \newline; get the next char... */
1364 				continue;
1365 			ungetsc(c2);
1366 			backslash_skip = 1;
1367 		}
1368 		return c;
1369 	}
1370 }
1371 
1372 static Lex_state *
push_state_(si,old_end)1373 push_state_(si, old_end)
1374 	State_info *si;
1375 	Lex_state *old_end;
1376 {
1377 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1378 
1379 	new[0].ls_info.base = old_end;
1380 	si->base = &new[0];
1381 	si->end = &new[STATE_BSIZE];
1382 	return &new[1];
1383 }
1384 
1385 static Lex_state *
pop_state_(si,old_end)1386 pop_state_(si, old_end)
1387 	State_info *si;
1388 	Lex_state *old_end;
1389 {
1390 	Lex_state *old_base = si->base;
1391 
1392 	si->base = old_end->ls_info.base - STATE_BSIZE;
1393 	si->end = old_end->ls_info.base;
1394 
1395 	afree(old_base, ATEMP);
1396 
1397 	return si->base + STATE_BSIZE - 1;
1398 }
1399