xref: /plan9/sys/src/ape/cmd/pdksh/lex.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 /*
2  * lexical analysis and source input
3  */
4 
5 #include "sh.h"
6 #include <ctype.h>
7 
8 
9 /* Structure to keep track of the lexing state and the various pieces of info
10  * needed for each particular state.
11  */
12 typedef struct lex_state Lex_state;
13 struct lex_state {
14 	int ls_state;
15 	union {
16 	    /* $(...) */
17 	    struct scsparen_info {
18 		    int nparen;		/* count open parenthesis */
19 		    int csstate; /* XXX remove */
20 #define ls_scsparen ls_info.u_scsparen
21 	    } u_scsparen;
22 
23 	    /* $((...)) */
24 	    struct sasparen_info {
25 		    int nparen;		/* count open parenthesis */
26 		    int start;		/* marks start of $(( in output str */
27 #define ls_sasparen ls_info.u_sasparen
28 	    } u_sasparen;
29 
30 	    /* ((...)) */
31 	    struct sletparen_info {
32 		    int nparen;		/* count open parenthesis */
33 #define ls_sletparen ls_info.u_sletparen
34 	    } u_sletparen;
35 
36 	    /* `...` */
37 	    struct sbquote_info {
38 		    int indquotes;	/* true if in double quotes: "`...`" */
39 #define ls_sbquote ls_info.u_sbquote
40 	    } u_sbquote;
41 
42 	    Lex_state *base;		/* used to point to next state block */
43 	} ls_info;
44 };
45 
46 typedef struct State_info State_info;
47 struct State_info {
48 	Lex_state	*base;
49 	Lex_state	*end;
50 };
51 
52 
53 static void	readhere ARGS((struct ioword *iop));
54 static int	getsc__ ARGS((void));
55 static void	getsc_line ARGS((Source *s));
56 static int	getsc_bn ARGS((void));
57 static char	*get_brace_var ARGS((XString *wsp, char *wp));
58 static int	arraysub ARGS((char **strp));
59 static const char *ungetsc ARGS((int c));
60 static void	gethere ARGS((void));
61 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
62 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
63 
64 static int backslash_skip;
65 static int ignore_backslash_newline;
66 
67 /* optimized getsc_bn() */
68 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
69 			 && !backslash_skip ? *source->str++ : getsc_bn())
70 /* optimized getsc__() */
71 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
72 
73 #define STATE_BSIZE	32
74 
75 #define PUSH_STATE(s)	do { \
76 			    if (++statep == state_info.end) \
77 				statep = push_state_(&state_info, statep); \
78 			    state = statep->ls_state = (s); \
79 			} while (0)
80 
81 #define POP_STATE()	do { \
82 			    if (--statep == state_info.base) \
83 				statep = pop_state_(&state_info, statep); \
84 			    state = statep->ls_state; \
85 			} while (0)
86 
87 
88 
89 /*
90  * Lexical analyzer
91  *
92  * tokens are not regular expressions, they are LL(1).
93  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
94  * hence the state stack.
95  */
96 
97 int
yylex(cf)98 yylex(cf)
99 	int cf;
100 {
101 	Lex_state states[STATE_BSIZE], *statep;
102 	State_info state_info;
103 	register int c, state;
104 	XString ws;		/* expandable output word */
105 	register char *wp;	/* output word pointer */
106 	char *sp, *dp;
107 	int c2;
108 
109 
110   Again:
111 	states[0].ls_state = -1;
112 	states[0].ls_info.base = (Lex_state *) 0;
113 	statep = &states[1];
114 	state_info.base = states;
115 	state_info.end = &states[STATE_BSIZE];
116 
117 	Xinit(ws, wp, 64, ATEMP);
118 
119 	backslash_skip = 0;
120 	ignore_backslash_newline = 0;
121 
122 	if (cf&ONEWORD)
123 		state = SWORD;
124 #ifdef KSH
125 	else if (cf&LETEXPR) {
126 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
127 		state = SLETPAREN;
128 		statep->ls_sletparen.nparen = 0;
129 	}
130 #endif /* KSH */
131 	else {		/* normal lexing */
132 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
133 		while ((c = getsc()) == ' ' || c == '\t')
134 			;
135 		if (c == '#') {
136 			ignore_backslash_newline++;
137 			while ((c = getsc()) != '\0' && c != '\n')
138 				;
139 			ignore_backslash_newline--;
140 		}
141 		ungetsc(c);
142 	}
143 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
144 		source->flags &= ~SF_ALIAS;
145 		/* In POSIX mode, a trailing space only counts if we are
146 		 * parsing a simple command
147 		 */
148 		if (!Flag(FPOSIX) || (cf & CMDWORD))
149 			cf |= ALIAS;
150 	}
151 
152 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
153 	statep->ls_state = state;
154 
155 	/* collect non-special or quoted characters to form word */
156 	while (!((c = getsc()) == 0
157 		 || ((state == SBASE || state == SHEREDELIM)
158 		     && ctype(c, C_LEX1))))
159 	{
160 		Xcheck(ws, wp);
161 		switch (state) {
162 		  case SBASE:
163 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
164 				*wp = EOS; /* temporary */
165 				if (is_wdvarname(Xstring(ws, wp), FALSE))
166 				{
167 					char *p, *tmp;
168 
169 					if (arraysub(&tmp)) {
170 						*wp++ = CHAR;
171 						*wp++ = c;
172 						for (p = tmp; *p; ) {
173 							Xcheck(ws, wp);
174 							*wp++ = CHAR;
175 							*wp++ = *p++;
176 						}
177 						afree(tmp, ATEMP);
178 						break;
179 					} else {
180 						Source *s;
181 
182 						s = pushs(SREREAD,
183 							  source->areap);
184 						s->start = s->str
185 							= s->u.freeme = tmp;
186 						s->next = source;
187 						source = s;
188 					}
189 				}
190 				*wp++ = CHAR;
191 				*wp++ = c;
192 				break;
193 			}
194 			/* fall through.. */
195 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
196 #ifdef KSH
197 			if (c == '*' || c == '@' || c == '+' || c == '?'
198 			    || c == '!')
199 			{
200 				c2 = getsc();
201 				if (c2 == '(' /*)*/ ) {
202 					*wp++ = OPAT;
203 					*wp++ = c;
204 					PUSH_STATE(SPATTERN);
205 					break;
206 				}
207 				ungetsc(c2);
208 			}
209 #endif /* KSH */
210 			/* fall through.. */
211 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
212 			switch (c) {
213 			  case '\\':
214 				c = getsc();
215 #ifdef OS2
216 				if (isalnum(c)) {
217 					*wp++ = CHAR, *wp++ = '\\';
218 					*wp++ = CHAR, *wp++ = c;
219 				} else
220 #endif
221 				if (c) /* trailing \ is lost */
222 					*wp++ = QCHAR, *wp++ = c;
223 				break;
224 			  case '\'':
225 				*wp++ = OQUOTE;
226 				ignore_backslash_newline++;
227 				PUSH_STATE(SSQUOTE);
228 				break;
229 			  case '"':
230 				*wp++ = OQUOTE;
231 				PUSH_STATE(SDQUOTE);
232 				break;
233 			  default:
234 				goto Subst;
235 			}
236 			break;
237 
238 		  Subst:
239 			switch (c) {
240 			  case '\\':
241 				c = getsc();
242 				switch (c) {
243 				  case '"': case '\\':
244 				  case '$': case '`':
245 					*wp++ = QCHAR, *wp++ = c;
246 					break;
247 				  default:
248 					Xcheck(ws, wp);
249 					if (c) { /* trailing \ is lost */
250 						*wp++ = CHAR, *wp++ = '\\';
251 						*wp++ = CHAR, *wp++ = c;
252 					}
253 					break;
254 				}
255 				break;
256 			  case '$':
257 				c = getsc();
258 				if (c == '(') /*)*/ {
259 					c = getsc();
260 					if (c == '(') /*)*/ {
261 						PUSH_STATE(SASPAREN);
262 						statep->ls_sasparen.nparen = 2;
263 						statep->ls_sasparen.start =
264 							Xsavepos(ws, wp);
265 						*wp++ = EXPRSUB;
266 					} else {
267 						ungetsc(c);
268 						PUSH_STATE(SCSPAREN);
269 						statep->ls_scsparen.nparen = 1;
270 						statep->ls_scsparen.csstate = 0;
271 						*wp++ = COMSUB;
272 					}
273 				} else if (c == '{') /*}*/ {
274 					*wp++ = OSUBST;
275 					*wp++ = '{'; /*}*/
276 					wp = get_brace_var(&ws, wp);
277 					c = getsc();
278 					/* allow :# and :% (ksh88 compat) */
279 					if (c == ':') {
280 						*wp++ = CHAR, *wp++ = c;
281 						c = getsc();
282 					}
283 					/* If this is a trim operation,
284 					 * treat (,|,) specially in STBRACE.
285 					 */
286 					if (c == '#' || c == '%') {
287 						ungetsc(c);
288 						PUSH_STATE(STBRACE);
289 					} else {
290 						ungetsc(c);
291 						PUSH_STATE(SBRACE);
292 					}
293 				} else if (ctype(c, C_ALPHA)) {
294 					*wp++ = OSUBST;
295 					*wp++ = 'X';
296 					do {
297 						Xcheck(ws, wp);
298 						*wp++ = c;
299 						c = getsc();
300 					} while (ctype(c, C_ALPHA|C_DIGIT));
301 					*wp++ = '\0';
302 					*wp++ = CSUBST;
303 					*wp++ = 'X';
304 					ungetsc(c);
305 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
306 					Xcheck(ws, wp);
307 					*wp++ = OSUBST;
308 					*wp++ = 'X';
309 					*wp++ = c;
310 					*wp++ = '\0';
311 					*wp++ = CSUBST;
312 					*wp++ = 'X';
313 				} else {
314 					*wp++ = CHAR, *wp++ = '$';
315 					ungetsc(c);
316 				}
317 				break;
318 			  case '`':
319 				PUSH_STATE(SBQUOTE);
320 				*wp++ = COMSUB;
321 				/* Need to know if we are inside double quotes
322 				 * since sh/at&t-ksh translate the \" to " in
323 				 * "`..\"..`".
324 				 * This is not done in posix mode (section
325 				 * 3.2.3, Double Quotes: "The backquote shall
326 				 * retain its special meaning introducing the
327 				 * other form of command substitution (see
328 				 * 3.6.3). The portion of the quoted string
329 				 * from the initial backquote and the
330 				 * characters up to the next backquote that
331 				 * is not preceded by a backslash (having
332 				 * escape characters removed) defines that
333 				 * command whose output replaces `...` when
334 				 * the word is expanded."
335 				 * Section 3.6.3, Command Substitution:
336 				 * "Within the backquoted style of command
337 				 * substitution, backslash shall retain its
338 				 * literal meaning, except when followed by
339 				 * $ ` \.").
340 				 */
341 				statep->ls_sbquote.indquotes = 0;
342 				if (!Flag(FPOSIX)) {
343 					Lex_state *s = statep;
344 					Lex_state *base = state_info.base;
345 					while (1) {
346 						for (; s != base; s--) {
347 							if (s->ls_state == SDQUOTE) {
348 								statep->ls_sbquote.indquotes = 1;
349 								break;
350 							}
351 						}
352 						if (s != base)
353 							break;
354 						if (!(s = s->ls_info.base))
355 							break;
356 						base = s-- - STATE_BSIZE;
357 					}
358 				}
359 				break;
360 			  default:
361 				*wp++ = CHAR, *wp++ = c;
362 			}
363 			break;
364 
365 		  case SSQUOTE:
366 			if (c == '\'') {
367 				POP_STATE();
368 				*wp++ = CQUOTE;
369 				ignore_backslash_newline--;
370 			} else
371 				*wp++ = QCHAR, *wp++ = c;
372 			break;
373 
374 		  case SDQUOTE:
375 			if (c == '"') {
376 				POP_STATE();
377 				*wp++ = CQUOTE;
378 			} else
379 				goto Subst;
380 			break;
381 
382 		  case SCSPAREN: /* $( .. ) */
383 			/* todo: deal with $(...) quoting properly
384 			 * kludge to partly fake quoting inside $(..): doesn't
385 			 * really work because nested $(..) or ${..} inside
386 			 * double quotes aren't dealt with.
387 			 */
388 			switch (statep->ls_scsparen.csstate) {
389 			  case 0: /* normal */
390 				switch (c) {
391 				  case '(':
392 					statep->ls_scsparen.nparen++;
393 					break;
394 				  case ')':
395 					statep->ls_scsparen.nparen--;
396 					break;
397 				  case '\\':
398 					statep->ls_scsparen.csstate = 1;
399 					break;
400 				  case '"':
401 					statep->ls_scsparen.csstate = 2;
402 					break;
403 				  case '\'':
404 					statep->ls_scsparen.csstate = 4;
405 					ignore_backslash_newline++;
406 					break;
407 				}
408 				break;
409 
410 			  case 1: /* backslash in normal mode */
411 			  case 3: /* backslash in double quotes */
412 				--statep->ls_scsparen.csstate;
413 				break;
414 
415 			  case 2: /* double quotes */
416 				if (c == '"')
417 					statep->ls_scsparen.csstate = 0;
418 				else if (c == '\\')
419 					statep->ls_scsparen.csstate = 3;
420 				break;
421 
422 			  case 4: /* single quotes */
423 				if (c == '\'') {
424 					statep->ls_scsparen.csstate = 0;
425 					ignore_backslash_newline--;
426 				}
427 				break;
428 			}
429 			if (statep->ls_scsparen.nparen == 0) {
430 				POP_STATE();
431 				*wp++ = 0; /* end of COMSUB */
432 			} else
433 				*wp++ = c;
434 			break;
435 
436 		  case SASPAREN: /* $(( .. )) */
437 			/* todo: deal with $((...); (...)) properly */
438 			/* XXX should nest using existing state machine
439 			 *     (embed "..", $(...), etc.) */
440 			if (c == '(')
441 				statep->ls_sasparen.nparen++;
442 			else if (c == ')') {
443 				statep->ls_sasparen.nparen--;
444 				if (statep->ls_sasparen.nparen == 1) {
445 					/*(*/
446 					if ((c2 = getsc()) == ')') {
447 						POP_STATE();
448 						*wp++ = 0; /* end of EXPRSUB */
449 						break;
450 					} else {
451 						char *s;
452 
453 						ungetsc(c2);
454 						/* mismatched parenthesis -
455 						 * assume we were really
456 						 * parsing a $(..) expression
457 						 */
458 						s = Xrestpos(ws, wp,
459 						     statep->ls_sasparen.start);
460 						memmove(s + 1, s, wp - s);
461 						*s++ = COMSUB;
462 						*s = '('; /*)*/
463 						wp++;
464 						statep->ls_scsparen.nparen = 1;
465 						statep->ls_scsparen.csstate = 0;
466 						state = statep->ls_state
467 							= SCSPAREN;
468 
469 					}
470 				}
471 			}
472 			*wp++ = c;
473 			break;
474 
475 		  case SBRACE:
476 			/*{*/
477 			if (c == '}') {
478 				POP_STATE();
479 				*wp++ = CSUBST;
480 				*wp++ = /*{*/ '}';
481 			} else
482 				goto Sbase1;
483 			break;
484 
485 		  case STBRACE:
486 			/* Same as SBRACE, except (,|,) treated specially */
487 			/*{*/
488 			if (c == '}') {
489 				POP_STATE();
490 				*wp++ = CSUBST;
491 				*wp++ = /*{*/ '}';
492 			} else if (c == '|') {
493 				*wp++ = SPAT;
494 			} else if (c == '(') {
495 				*wp++ = OPAT;
496 				*wp++ = ' ';	/* simile for @ */
497 				PUSH_STATE(SPATTERN);
498 			} else
499 				goto Sbase1;
500 			break;
501 
502 		  case SBQUOTE:
503 			if (c == '`') {
504 				*wp++ = 0;
505 				POP_STATE();
506 			} else if (c == '\\') {
507 				switch (c = getsc()) {
508 				  case '\\':
509 				  case '$': case '`':
510 					*wp++ = c;
511 					break;
512 				  case '"':
513 					if (statep->ls_sbquote.indquotes) {
514 						*wp++ = c;
515 						break;
516 					}
517 					/* fall through.. */
518 				  default:
519 					if (c) { /* trailing \ is lost */
520 						*wp++ = '\\';
521 						*wp++ = c;
522 					}
523 					break;
524 				}
525 			} else
526 				*wp++ = c;
527 			break;
528 
529 		  case SWORD:	/* ONEWORD */
530 			goto Subst;
531 
532 #ifdef KSH
533 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
534 			/*(*/
535 			if (c == ')') {
536 				if (statep->ls_sletparen.nparen > 0)
537 				    --statep->ls_sletparen.nparen;
538 				/*(*/
539 				else if ((c2 = getsc()) == ')') {
540 					c = 0;
541 					*wp++ = CQUOTE;
542 					goto Done;
543 				} else
544 					ungetsc(c2);
545 			} else if (c == '(')
546 				/* parenthesis inside quotes and backslashes
547 				 * are lost, but at&t ksh doesn't count them
548 				 * either
549 				 */
550 				++statep->ls_sletparen.nparen;
551 			goto Sbase2;
552 #endif /* KSH */
553 
554 		  case SHEREDELIM:	/* <<,<<- delimiter */
555 			/* XXX chuck this state (and the next) - use
556 			 * the existing states ($ and \`..` should be
557 			 * stripped of their specialness after the
558 			 * fact).
559 			 */
560 			/* here delimiters need a special case since
561 			 * $ and `..` are not to be treated specially
562 			 */
563 			if (c == '\\') {
564 				c = getsc();
565 				if (c) { /* trailing \ is lost */
566 					*wp++ = QCHAR;
567 					*wp++ = c;
568 				}
569 			} else if (c == '\'') {
570 				PUSH_STATE(SSQUOTE);
571 				*wp++ = OQUOTE;
572 				ignore_backslash_newline++;
573 			} else if (c == '"') {
574 				state = statep->ls_state = SHEREDQUOTE;
575 				*wp++ = OQUOTE;
576 			} else {
577 				*wp++ = CHAR;
578 				*wp++ = c;
579 			}
580 			break;
581 
582 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
583 			if (c == '"') {
584 				*wp++ = CQUOTE;
585 				state = statep->ls_state = SHEREDELIM;
586 			} else {
587 				if (c == '\\') {
588 					switch (c = getsc()) {
589 					  case '\\': case '"':
590 					  case '$': case '`':
591 						break;
592 					  default:
593 						if (c) { /* trailing \ lost */
594 							*wp++ = CHAR;
595 							*wp++ = '\\';
596 						}
597 						break;
598 					}
599 				}
600 				*wp++ = CHAR;
601 				*wp++ = c;
602 			}
603 			break;
604 
605 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
606 			if ( /*(*/ c == ')') {
607 				*wp++ = CPAT;
608 				POP_STATE();
609 			} else if (c == '|') {
610 				*wp++ = SPAT;
611 			} else if (c == '(') {
612 				*wp++ = OPAT;
613 				*wp++ = ' ';	/* simile for @ */
614 				PUSH_STATE(SPATTERN);
615 			} else
616 				goto Sbase1;
617 			break;
618 		}
619 	}
620 Done:
621 	Xcheck(ws, wp);
622 	if (statep != &states[1])
623 		/* XXX figure out what is missing */
624 		yyerror("no closing quote\n");
625 
626 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
627 	if (state == SHEREDELIM)
628 		state = SBASE;
629 
630 	dp = Xstring(ws, wp);
631 	if ((c == '<' || c == '>') && state == SBASE
632 	    && ((c2 = Xlength(ws, wp)) == 0
633 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
634 	{
635 		struct ioword *iop =
636 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
637 
638 		if (c2 == 2)
639 			iop->unit = dp[1] - '0';
640 		else
641 			iop->unit = c == '>'; /* 0 for <, 1 for > */
642 
643 		c2 = getsc();
644 		/* <<, >>, <> are ok, >< is not */
645 		if (c == c2 || (c == '<' && c2 == '>')) {
646 			iop->flag = c == c2 ?
647 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
648 			if (iop->flag == IOHERE)
649 				if ((c2 = getsc()) == '-')
650 					iop->flag |= IOSKIP;
651 				else
652 					ungetsc(c2);
653 		} else if (c2 == '&')
654 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
655 		else {
656 			iop->flag = c == '>' ? IOWRITE : IOREAD;
657 			if (c == '>' && c2 == '|')
658 				iop->flag |= IOCLOB;
659 			else
660 				ungetsc(c2);
661 		}
662 
663 		iop->name = (char *) 0;
664 		iop->delim = (char *) 0;
665 		iop->heredoc = (char *) 0;
666 		Xfree(ws, wp);	/* free word */
667 		yylval.iop = iop;
668 		return REDIR;
669 	}
670 
671 	if (wp == dp && state == SBASE) {
672 		Xfree(ws, wp);	/* free word */
673 		/* no word, process LEX1 character */
674 		switch (c) {
675 		  default:
676 			return c;
677 
678 		  case '|':
679 		  case '&':
680 		  case ';':
681 			if ((c2 = getsc()) == c)
682 				c = (c == ';') ? BREAK :
683 				    (c == '|') ? LOGOR :
684 				    (c == '&') ? LOGAND :
685 				    YYERRCODE;
686 #ifdef KSH
687 			else if (c == '|' && c2 == '&')
688 				c = COPROC;
689 #endif /* KSH */
690 			else
691 				ungetsc(c2);
692 			return c;
693 
694 		  case '\n':
695 			gethere();
696 			if (cf & CONTIN)
697 				goto Again;
698 			return c;
699 
700 		  case '(':  /*)*/
701 #ifdef KSH
702 			if ((c2 = getsc()) == '(') /*)*/
703 				/* XXX need to handle ((...); (...)) */
704 				c = MDPAREN;
705 			else
706 				ungetsc(c2);
707 #endif /* KSH */
708 			return c;
709 		  /*(*/
710 		  case ')':
711 			return c;
712 		}
713 	}
714 
715 	*wp++ = EOS;		/* terminate word */
716 	yylval.cp = Xclose(ws, wp);
717 	if (state == SWORD
718 #ifdef KSH
719 		|| state == SLETPAREN
720 #endif /* KSH */
721 		)	/* ONEWORD? */
722 		return LWORD;
723 	ungetsc(c);		/* unget terminator */
724 
725 	/* copy word to unprefixed string ident */
726 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
727 		*dp++ = *sp++;
728 	/* Make sure the ident array stays '\0' paded */
729 	memset(dp, 0, (ident+IDENT) - dp + 1);
730 	if (c != EOS)
731 		*ident = '\0';	/* word is not unquoted */
732 
733 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
734 		struct tbl *p;
735 		int h = hash(ident);
736 
737 		/* { */
738 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
739 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
740 		{
741 			afree(yylval.cp, ATEMP);
742 			return p->val.i;
743 		}
744 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
745 		    && (p->flag & ISSET))
746 		{
747 			register Source *s;
748 
749 			for (s = source; s->type == SALIAS; s = s->next)
750 				if (s->u.tblp == p)
751 					return LWORD;
752 			/* push alias expansion */
753 			s = pushs(SALIAS, source->areap);
754 			s->start = s->str = p->val.s;
755 			s->u.tblp = p;
756 			s->next = source;
757 			source = s;
758 			afree(yylval.cp, ATEMP);
759 			goto Again;
760 		}
761 	}
762 
763 	return LWORD;
764 }
765 
766 static void
gethere()767 gethere()
768 {
769 	register struct ioword **p;
770 
771 	for (p = heres; p < herep; p++)
772 		readhere(*p);
773 	herep = heres;
774 }
775 
776 /*
777  * read "<<word" text into temp file
778  */
779 
780 static void
readhere(iop)781 readhere(iop)
782 	struct ioword *iop;
783 {
784 	register int c;
785 	char *volatile eof;
786 	char *eofp;
787 	int skiptabs;
788 	XString xs;
789 	char *xp;
790 	int xpos;
791 
792 	eof = evalstr(iop->delim, 0);
793 
794 	if (!(iop->flag & IOEVAL))
795 		ignore_backslash_newline++;
796 
797 	Xinit(xs, xp, 256, ATEMP);
798 
799 	for (;;) {
800 		eofp = eof;
801 		skiptabs = iop->flag & IOSKIP;
802 		xpos = Xsavepos(xs, xp);
803 		while ((c = getsc()) != 0) {
804 			if (skiptabs) {
805 				if (c == '\t')
806 					continue;
807 				skiptabs = 0;
808 			}
809 			if (c != *eofp)
810 				break;
811 			Xcheck(xs, xp);
812 			Xput(xs, xp, c);
813 			eofp++;
814 		}
815 		/* Allow EOF here so commands with out trailing newlines
816 		 * will work (eg, ksh -c '...', $(...), etc).
817 		 */
818 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
819 			xp = Xrestpos(xs, xp, xpos);
820 			break;
821 		}
822 		ungetsc(c);
823 		while ((c = getsc()) != '\n') {
824 			if (c == 0)
825 				yyerror("here document `%s' unclosed\n", eof);
826 			Xcheck(xs, xp);
827 			Xput(xs, xp, c);
828 		}
829 		Xcheck(xs, xp);
830 		Xput(xs, xp, c);
831 	}
832 	Xput(xs, xp, '\0');
833 	iop->heredoc = Xclose(xs, xp);
834 
835 	if (!(iop->flag & IOEVAL))
836 		ignore_backslash_newline--;
837 }
838 
839 void
840 #ifdef HAVE_PROTOTYPES
yyerror(const char * fmt,...)841 yyerror(const char *fmt, ...)
842 #else
843 yyerror(fmt, va_alist)
844 	const char *fmt;
845 	va_dcl
846 #endif
847 {
848 	va_list va;
849 
850 	/* pop aliases and re-reads */
851 	while (source->type == SALIAS || source->type == SREREAD)
852 		source = source->next;
853 	source->str = null;	/* zap pending input */
854 
855 	error_prefix(TRUE);
856 	SH_VA_START(va, fmt);
857 	shf_vfprintf(shl_out, fmt, va);
858 	va_end(va);
859 	errorf(null);
860 }
861 
862 /*
863  * input for yylex with alias expansion
864  */
865 
866 Source *
pushs(type,areap)867 pushs(type, areap)
868 	int type;
869 	Area *areap;
870 {
871 	register Source *s;
872 
873 	s = (Source *) alloc(sizeof(Source), areap);
874 	s->type = type;
875 	s->str = null;
876 	s->start = NULL;
877 	s->line = 0;
878 	s->errline = 0;
879 	s->file = NULL;
880 	s->flags = 0;
881 	s->next = NULL;
882 	s->areap = areap;
883 	if (type == SFILE || type == SSTDIN) {
884 		char *dummy;
885 		Xinit(s->xs, dummy, 256, s->areap);
886 	} else
887 		memset(&s->xs, 0, sizeof(s->xs));
888 	return s;
889 }
890 
891 static int
getsc__()892 getsc__()
893 {
894 	register Source *s = source;
895 	register int c;
896 
897 	while ((c = *s->str++) == 0) {
898 		s->str = NULL;		/* return 0 for EOF by default */
899 		switch (s->type) {
900 		  case SEOF:
901 			s->str = null;
902 			return 0;
903 
904 		  case SSTDIN:
905 		  case SFILE:
906 			getsc_line(s);
907 			break;
908 
909 		  case SWSTR:
910 			break;
911 
912 		  case SSTRING:
913 			break;
914 
915 		  case SWORDS:
916 			s->start = s->str = *s->u.strv++;
917 			s->type = SWORDSEP;
918 			break;
919 
920 		  case SWORDSEP:
921 			if (*s->u.strv == NULL) {
922 				s->start = s->str = newline;
923 				s->type = SEOF;
924 			} else {
925 				s->start = s->str = space;
926 				s->type = SWORDS;
927 			}
928 			break;
929 
930 		  case SALIAS:
931 			if (s->flags & SF_ALIASEND) {
932 				/* pass on an unused SF_ALIAS flag */
933 				source = s->next;
934 				source->flags |= s->flags & SF_ALIAS;
935 				s = source;
936 			} else if (*s->u.tblp->val.s
937 				 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
938 			{
939 				source = s = s->next;	/* pop source stack */
940 				/* Note that this alias ended with a space,
941 				 * enabling alias expansion on the following
942 				 * word.
943 				 */
944 				s->flags |= SF_ALIAS;
945 			} else {
946 				/* At this point, we need to keep the current
947 				 * alias in the source list so recursive
948 				 * aliases can be detected and we also need
949 				 * to return the next character.  Do this
950 				 * by temporarily popping the alias to get
951 				 * the next character and then put it back
952 				 * in the source list with the SF_ALIASEND
953 				 * flag set.
954 				 */
955 				source = s->next;	/* pop source stack */
956 				source->flags |= s->flags & SF_ALIAS;
957 				c = getsc__();
958 				if (c) {
959 					s->flags |= SF_ALIASEND;
960 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
961 					s->start = s->str = s->ugbuf;
962 					s->next = source;
963 					source = s;
964 				} else {
965 					s = source;
966 					/* avoid reading eof twice */
967 					s->str = NULL;
968 					break;
969 				}
970 			}
971 			continue;
972 
973 		  case SREREAD:
974 			if (s->start != s->ugbuf) /* yuck */
975 				afree(s->u.freeme, ATEMP);
976 			source = s = s->next;
977 			continue;
978 		}
979 		if (s->str == NULL) {
980 			s->type = SEOF;
981 			s->start = s->str = null;
982 			return '\0';
983 		}
984 		if (s->flags & SF_ECHO) {
985 			shf_puts(s->str, shl_out);
986 			shf_flush(shl_out);
987 		}
988 	}
989 	return c;
990 }
991 
992 static void
getsc_line(s)993 getsc_line(s)
994 	Source *s;
995 {
996 	char *xp = Xstring(s->xs, xp);
997 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
998 	int have_tty = interactive && (s->flags & SF_TTY);
999 
1000 	/* Done here to ensure nothing odd happens when a timeout occurs */
1001 	XcheckN(s->xs, xp, LINE);
1002 	*xp = '\0';
1003 	s->start = s->str = xp;
1004 
1005 #ifdef KSH
1006 	if (have_tty && ksh_tmout) {
1007 		ksh_tmout_state = TMOUT_READING;
1008 		alarm(ksh_tmout);
1009 	}
1010 #endif /* KSH */
1011 #ifdef EDIT
1012 	if (have_tty && (0
1013 # ifdef VI
1014 			 || Flag(FVI)
1015 # endif /* VI */
1016 # ifdef EMACS
1017 			 || Flag(FEMACS) || Flag(FGMACS)
1018 # endif /* EMACS */
1019 		))
1020 	{
1021 		int nread;
1022 
1023 		nread = x_read(xp, LINE);
1024 		if (nread < 0)	/* read error */
1025 			nread = 0;
1026 		xp[nread] = '\0';
1027 		xp += nread;
1028 	}
1029 	else
1030 #endif /* EDIT */
1031 	{
1032 		if (interactive) {
1033 			pprompt(prompt, 0);
1034 		} else
1035 			s->line++;
1036 
1037 		while (1) {
1038 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1039 
1040 			if (!p && shf_error(s->u.shf)
1041 			    && shf_errno(s->u.shf) == EINTR)
1042 			{
1043 				shf_clearerr(s->u.shf);
1044 				if (trap)
1045 					runtraps(0);
1046 				continue;
1047 			}
1048 			if (!p || (xp = p, xp[-1] == '\n'))
1049 				break;
1050 			/* double buffer size */
1051 			xp++; /* move past null so doubling works... */
1052 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1053 			xp--; /* ...and move back again */
1054 		}
1055 		/* flush any unwanted input so other programs/builtins
1056 		 * can read it.  Not very optimal, but less error prone
1057 		 * than flushing else where, dealing with redirections,
1058 		 * etc..
1059 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1060 		 */
1061 		if (s->type == SSTDIN)
1062 			shf_flush(s->u.shf);
1063 	}
1064 	/* XXX: temporary kludge to restore source after a
1065 	 * trap may have been executed.
1066 	 */
1067 	source = s;
1068 #ifdef KSH
1069 	if (have_tty && ksh_tmout)
1070 	{
1071 		ksh_tmout_state = TMOUT_EXECUTING;
1072 		alarm(0);
1073 	}
1074 #endif /* KSH */
1075 	s->start = s->str = Xstring(s->xs, xp);
1076 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1077 	/* Note: if input is all nulls, this is not eof */
1078 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1079 		if (s->type == SFILE)
1080 			shf_fdclose(s->u.shf);
1081 		s->str = NULL;
1082 	} else if (interactive) {
1083 #ifdef HISTORY
1084 		char *p = Xstring(s->xs, xp);
1085 		if (cur_prompt == PS1)
1086 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1087 				p++;
1088 		if (*p) {
1089 # ifdef EASY_HISTORY
1090 			if (cur_prompt == PS2)
1091 				histappend(Xstring(s->xs, xp), 1);
1092 			else
1093 # endif /* EASY_HISTORY */
1094 			{
1095 				s->line++;
1096 				histsave(s->line, s->str, 1);
1097 			}
1098 		}
1099 #endif /* HISTORY */
1100 	}
1101 	if (interactive)
1102 		set_prompt(PS2, (Source *) 0);
1103 }
1104 
1105 void
set_prompt(to,s)1106 set_prompt(to, s)
1107 	int to;
1108 	Source *s;
1109 {
1110 	cur_prompt = to;
1111 
1112 	switch (to) {
1113 	case PS1: /* command */
1114 #ifdef KSH
1115 		/* Substitute ! and !! here, before substitutions are done
1116 		 * so ! in expanded variables are not expanded.
1117 		 * NOTE: this is not what at&t ksh does (it does it after
1118 		 * substitutions, POSIX doesn't say which is to be done.
1119 		 */
1120 		{
1121 			struct shf *shf;
1122 			char *ps1;
1123 			Area *saved_atemp;
1124 
1125 			ps1 = str_val(global("PS1"));
1126 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1127 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1128 			while (*ps1) {
1129 				if (*ps1 != '!' || *++ps1 == '!')
1130 					shf_putchar(*ps1++, shf);
1131 				else
1132 					shf_fprintf(shf, "%d",
1133 						s ? s->line + 1 : 0);
1134 			}
1135 			ps1 = shf_sclose(shf);
1136 			saved_atemp = ATEMP;
1137 			newenv(E_ERRH);
1138 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1139 				prompt = safe_prompt;
1140 				/* Don't print an error - assume it has already
1141 				 * been printed.  Reason is we may have forked
1142 				 * to run a command and the child may be
1143 				 * unwinding its stack through this code as it
1144 				 * exits.
1145 				 */
1146 			} else
1147 				prompt = str_save(substitute(ps1, 0),
1148 						 saved_atemp);
1149 			quitenv();
1150 		}
1151 #else /* KSH */
1152 		prompt = str_val(global("PS1"));
1153 #endif /* KSH */
1154 		break;
1155 
1156 	case PS2: /* command continuation */
1157 		prompt = str_val(global("PS2"));
1158 		break;
1159 	}
1160 }
1161 
1162 /* See also related routine, promptlen() in edit.c */
1163 void
pprompt(cp,ntruncate)1164 pprompt(cp, ntruncate)
1165 	const char *cp;
1166 	int ntruncate;
1167 {
1168 #if 0
1169 	char nbuf[32];
1170 	int c;
1171 
1172 	while (*cp != 0) {
1173 		if (*cp != '!')
1174 			c = *cp++;
1175 		else if (*++cp == '!')
1176 			c = *cp++;
1177 		else {
1178 			int len;
1179 			char *p;
1180 
1181 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1182 				source->line + 1);
1183 			len = strlen(nbuf);
1184 			if (ntruncate) {
1185 				if (ntruncate >= len) {
1186 					ntruncate -= len;
1187 					continue;
1188 				}
1189 				p += ntruncate;
1190 				len -= ntruncate;
1191 				ntruncate = 0;
1192 			}
1193 			shf_write(p, len, shl_out);
1194 			continue;
1195 		}
1196 		if (ntruncate)
1197 			--ntruncate;
1198 		else
1199 			shf_putc(c, shl_out);
1200 	}
1201 #endif /* 0 */
1202 	shf_puts(cp + ntruncate, shl_out);
1203 	shf_flush(shl_out);
1204 }
1205 
1206 /* Read the variable part of a ${...} expression (ie, up to but not including
1207  * the :[-+?=#%] or close-brace.
1208  */
1209 static char *
get_brace_var(wsp,wp)1210 get_brace_var(wsp, wp)
1211 	XString *wsp;
1212 	char *wp;
1213 {
1214 	enum parse_state {
1215 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1216 			   PS_NUMBER, PS_VAR1, PS_END
1217 			 }
1218 		state;
1219 	char c;
1220 
1221 	state = PS_INITIAL;
1222 	while (1) {
1223 		c = getsc();
1224 		/* State machine to figure out where the variable part ends. */
1225 		switch (state) {
1226 		  case PS_INITIAL:
1227 			if (c == '#') {
1228 				state = PS_SAW_HASH;
1229 				break;
1230 			}
1231 			/* fall through.. */
1232 		  case PS_SAW_HASH:
1233 			if (letter(c))
1234 				state = PS_IDENT;
1235 			else if (digit(c))
1236 				state = PS_NUMBER;
1237 			else if (ctype(c, C_VAR1))
1238 				state = PS_VAR1;
1239 			else
1240 				state = PS_END;
1241 			break;
1242 		  case PS_IDENT:
1243 			if (!letnum(c)) {
1244 				state = PS_END;
1245 				if (c == '[') {
1246 					char *tmp, *p;
1247 
1248 					if (!arraysub(&tmp))
1249 						yyerror("missing ]\n");
1250 					*wp++ = c;
1251 					for (p = tmp; *p; ) {
1252 						Xcheck(*wsp, wp);
1253 						*wp++ = *p++;
1254 					}
1255 					afree(tmp, ATEMP);
1256 					c = getsc(); /* the ] */
1257 				}
1258 			}
1259 			break;
1260 		  case PS_NUMBER:
1261 			if (!digit(c))
1262 				state = PS_END;
1263 			break;
1264 		  case PS_VAR1:
1265 			state = PS_END;
1266 			break;
1267 		  case PS_END: /* keep gcc happy */
1268 			break;
1269 		}
1270 		if (state == PS_END) {
1271 			*wp++ = '\0';	/* end of variable part */
1272 			ungetsc(c);
1273 			break;
1274 		}
1275 		Xcheck(*wsp, wp);
1276 		*wp++ = c;
1277 	}
1278 	return wp;
1279 }
1280 
1281 /*
1282  * Save an array subscript - returns true if matching bracket found, false
1283  * if eof or newline was found.
1284  * (Returned string double null terminated)
1285  */
1286 static int
arraysub(strp)1287 arraysub(strp)
1288 	char **strp;
1289 {
1290 	XString ws;
1291 	char	*wp;
1292 	char	c;
1293 	int 	depth = 1;	/* we are just past the initial [ */
1294 
1295 	Xinit(ws, wp, 32, ATEMP);
1296 
1297 	do {
1298 		c = getsc();
1299 		Xcheck(ws, wp);
1300 		*wp++ = c;
1301 		if (c == '[')
1302 			depth++;
1303 		else if (c == ']')
1304 			depth--;
1305 	} while (depth > 0 && c && c != '\n');
1306 
1307 	*wp++ = '\0';
1308 	*strp = Xclose(ws, wp);
1309 
1310 	return depth == 0 ? 1 : 0;
1311 }
1312 
1313 /* Unget a char: handles case when we are already at the start of the buffer */
1314 static const char *
ungetsc(c)1315 ungetsc(c)
1316 	int c;
1317 {
1318 	if (backslash_skip)
1319 		backslash_skip--;
1320 	/* Don't unget eof... */
1321 	if (source->str == null && c == '\0')
1322 		return source->str;
1323 	if (source->str > source->start)
1324 		source->str--;
1325 	else {
1326 		Source *s;
1327 
1328 		s = pushs(SREREAD, source->areap);
1329 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330 		s->start = s->str = s->ugbuf;
1331 		s->next = source;
1332 		source = s;
1333 	}
1334 	return source->str;
1335 }
1336 
1337 
1338 /* Called to get a char that isn't a \newline sequence. */
1339 static int
getsc_bn(void)1340 getsc_bn ARGS((void))
1341 {
1342 	int c, c2;
1343 
1344 	if (ignore_backslash_newline)
1345 		return getsc_();
1346 
1347 	if (backslash_skip == 1) {
1348 		backslash_skip = 2;
1349 		return getsc_();
1350 	}
1351 
1352 	backslash_skip = 0;
1353 
1354 	while (1) {
1355 		c = getsc_();
1356 		if (c == '\\') {
1357 			if ((c2 = getsc_()) == '\n')
1358 				/* ignore the \newline; get the next char... */
1359 				continue;
1360 			ungetsc(c2);
1361 			backslash_skip = 1;
1362 		}
1363 		return c;
1364 	}
1365 }
1366 
1367 static Lex_state *
push_state_(si,old_end)1368 push_state_(si, old_end)
1369 	State_info *si;
1370 	Lex_state *old_end;
1371 {
1372 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1373 
1374 	new[0].ls_info.base = old_end;
1375 	si->base = &new[0];
1376 	si->end = &new[STATE_BSIZE];
1377 	return &new[1];
1378 }
1379 
1380 static Lex_state *
pop_state_(si,old_end)1381 pop_state_(si, old_end)
1382 	State_info *si;
1383 	Lex_state *old_end;
1384 {
1385 	Lex_state *old_base = si->base;
1386 
1387 	si->base = old_end->ls_info.base - STATE_BSIZE;
1388 	si->end = old_end->ls_info.base;
1389 
1390 	afree(old_base, ATEMP);
1391 
1392 	return si->base + STATE_BSIZE - 1;;
1393 }
1394