xref: /netbsd-src/bin/ksh/lex.c (revision 6883e45b8706cc17a9d0df56a891b10e5ffaa6ca)
1 /*	$NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 #include <sys/cdefs.h>
7 
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $");
10 #endif
11 
12 
13 #include "sh.h"
14 #include <ctype.h>
15 
16 
17 /* Structure to keep track of the lexing state and the various pieces of info
18  * needed for each particular state.
19  */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 	int ls_state;
23 	union {
24 	    /* $(...) */
25 	    struct scsparen_info {
26 		    int nparen;		/* count open parenthesis */
27 		    int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 	    } u_scsparen;
30 
31 	    /* $((...)) */
32 	    struct sasparen_info {
33 		    int nparen;		/* count open parenthesis */
34 		    int start;		/* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 	    } u_sasparen;
37 
38 	    /* ((...)) */
39 	    struct sletparen_info {
40 		    int nparen;		/* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 	    } u_sletparen;
43 
44 	    /* `...` */
45 	    struct sbquote_info {
46 		    int indquotes;	/* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 	    } u_sbquote;
49 
50 	    Lex_state *base;		/* used to point to next state block */
51 	} ls_info;
52 };
53 
54 typedef struct State_info State_info;
55 struct State_info {
56 	Lex_state	*base;
57 	Lex_state	*end;
58 };
59 
60 
61 static void	readhere ARGS((struct ioword *iop));
62 static int	getsc__ ARGS((void));
63 static void	getsc_line ARGS((Source *s));
64 static int	getsc_bn ARGS((void));
65 static char	*get_brace_var ARGS((XString *wsp, char *wp));
66 static int	arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void	gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71 
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74 
75 /* optimized getsc_bn() */
76 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
77 			 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
80 
81 #define STATE_BSIZE	32
82 
83 #define PUSH_STATE(s)	do { \
84 			    if (++statep == state_info.end) \
85 				statep = push_state_(&state_info, statep); \
86 			    state = statep->ls_state = (s); \
87 			} while (0)
88 
89 #define POP_STATE()	do { \
90 			    if (--statep == state_info.base) \
91 				statep = pop_state_(&state_info, statep); \
92 			    state = statep->ls_state; \
93 			} while (0)
94 
95 
96 
97 /*
98  * Lexical analyzer
99  *
100  * tokens are not regular expressions, they are LL(1).
101  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102  * hence the state stack.
103  */
104 
105 int
yylex(cf)106 yylex(cf)
107 	int cf;
108 {
109 	Lex_state states[STATE_BSIZE], *statep;
110 	State_info state_info;
111 	int c, state;
112 	XString ws;		/* expandable output word */
113 	char *wp;		/* output word pointer */
114 	char *sp, *dp;
115 	int c2;
116 
117 
118   Again:
119 	states[0].ls_state = -1;
120 	states[0].ls_info.base = (Lex_state *) 0;
121 	statep = &states[1];
122 	state_info.base = states;
123 	state_info.end = &states[STATE_BSIZE];
124 
125 	Xinit(ws, wp, 64, ATEMP);
126 
127 	backslash_skip = 0;
128 	ignore_backslash_newline = 0;
129 
130 	if (cf&ONEWORD)
131 		state = SWORD;
132 #ifdef KSH
133 	else if (cf&LETEXPR) {
134 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
135 		state = SLETPAREN;
136 		statep->ls_sletparen.nparen = 0;
137 	}
138 #endif /* KSH */
139 	else {		/* normal lexing */
140 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 		while ((c = getsc()) == ' ' || c == '\t')
142 			;
143 		if (c == '#') {
144 			ignore_backslash_newline++;
145 			while ((c = getsc()) != '\0' && c != '\n')
146 				;
147 			ignore_backslash_newline--;
148 		}
149 		ungetsc(c);
150 	}
151 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
152 		source->flags &= ~SF_ALIAS;
153 		/* In POSIX mode, a trailing space only counts if we are
154 		 * parsing a simple command
155 		 */
156 		if (!Flag(FPOSIX) || (cf & CMDWORD))
157 			cf |= ALIAS;
158 	}
159 
160 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 	statep->ls_state = state;
162 
163 	/* collect non-special or quoted characters to form word */
164 	while (!((c = getsc()) == 0
165 		 || ((state == SBASE || state == SHEREDELIM)
166 		     && ctype(c, C_LEX1))))
167 	{
168 		Xcheck(ws, wp);
169 		switch (state) {
170 		  case SBASE:
171 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 				*wp = EOS; /* temporary */
173 				if (is_wdvarname(Xstring(ws, wp), false))
174 				{
175 					char *p, *tmp;
176 
177 					if (arraysub(&tmp)) {
178 						*wp++ = CHAR;
179 						*wp++ = c;
180 						for (p = tmp; *p; ) {
181 							Xcheck(ws, wp);
182 							*wp++ = CHAR;
183 							*wp++ = *p++;
184 						}
185 						afree(tmp, ATEMP);
186 						break;
187 					} else {
188 						Source *s;
189 
190 						s = pushs(SREREAD,
191 							  source->areap);
192 						s->start = s->str
193 							= s->u.freeme = tmp;
194 						s->next = source;
195 						source = s;
196 					}
197 				}
198 				*wp++ = CHAR;
199 				*wp++ = c;
200 				break;
201 			}
202 			/* fall through.. */
203 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 			if (c == '*' || c == '@' || c == '+' || c == '?'
206 			    || c == '!')
207 			{
208 				c2 = getsc();
209 				if (c2 == '(' /*)*/ ) {
210 					*wp++ = OPAT;
211 					*wp++ = c;
212 					PUSH_STATE(SPATTERN);
213 					break;
214 				}
215 				ungetsc(c2);
216 			}
217 #endif /* KSH */
218 			/* fall through.. */
219 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
220 			switch (c) {
221 			  case '\\':
222 				c = getsc();
223 				if (c) /* trailing \ is lost */
224 					*wp++ = QCHAR, *wp++ = c;
225 				break;
226 			  case '\'':
227 				*wp++ = OQUOTE;
228 				ignore_backslash_newline++;
229 				PUSH_STATE(SSQUOTE);
230 				break;
231 			  case '"':
232 				*wp++ = OQUOTE;
233 				PUSH_STATE(SDQUOTE);
234 				break;
235 			  default:
236 				goto Subst;
237 			}
238 			break;
239 
240 		  Subst:
241 			switch (c) {
242 			  Lex_state *s;
243 			  Lex_state *base;
244 
245 			  case '\\':
246 				c = getsc();
247 				switch (c) {
248 				  case '\\':
249 				  case '$': case '`':
250 					*wp++ = QCHAR, *wp++ = c;
251 					break;
252 				  case '"':
253 					if ((cf & HEREDOC) == 0) {
254 						*wp++ = QCHAR, *wp++ = c;
255 						break;
256 					}
257 					/* FALLTHROUGH */
258 				  default:
259 					Xcheck(ws, wp);
260 					if (c) { /* trailing \ is lost */
261 						*wp++ = CHAR, *wp++ = '\\';
262 						*wp++ = CHAR, *wp++ = c;
263 					}
264 					break;
265 				}
266 				break;
267 			  case '$':
268 				c = getsc();
269 				if (c == '(') /*)*/ {
270 					c = getsc();
271 					if (c == '(') /*)*/ {
272 						PUSH_STATE(SASPAREN);
273 						statep->ls_sasparen.nparen = 2;
274 						statep->ls_sasparen.start =
275 							Xsavepos(ws, wp);
276 						*wp++ = EXPRSUB;
277 					} else {
278 						ungetsc(c);
279 						PUSH_STATE(SCSPAREN);
280 						statep->ls_scsparen.nparen = 1;
281 						statep->ls_scsparen.csstate = 0;
282 						*wp++ = COMSUB;
283 					}
284 				} else if (c == '{') /*}*/ {
285 					*wp++ = OSUBST;
286 					*wp++ = '{'; /*}*/
287 					wp = get_brace_var(&ws, wp);
288 					c = getsc();
289 					/* allow :# and :% (ksh88 compat) */
290 					if (c == ':') {
291 						*wp++ = CHAR, *wp++ = c;
292 						c = getsc();
293 					}
294 					/* If this is a trim operation,
295 					 * treat (,|,) specially in STBRACE.
296 					 */
297 					if (c == '#' || c == '%') {
298 						ungetsc(c);
299 						PUSH_STATE(STBRACE);
300 					} else {
301 						ungetsc(c);
302 						PUSH_STATE(SBRACE);
303 					}
304 				} else if (ctype(c, C_ALPHA)) {
305 					*wp++ = OSUBST;
306 					*wp++ = 'X';
307 					do {
308 						Xcheck(ws, wp);
309 						*wp++ = c;
310 						c = getsc();
311 					} while (ctype(c, C_ALPHA|C_DIGIT));
312 					*wp++ = '\0';
313 					*wp++ = CSUBST;
314 					*wp++ = 'X';
315 					ungetsc(c);
316 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
317 					Xcheck(ws, wp);
318 					*wp++ = OSUBST;
319 					*wp++ = 'X';
320 					*wp++ = c;
321 					*wp++ = '\0';
322 					*wp++ = CSUBST;
323 					*wp++ = 'X';
324 				} else {
325 					*wp++ = CHAR, *wp++ = '$';
326 					ungetsc(c);
327 				}
328 				break;
329 			  case '`':
330 				PUSH_STATE(SBQUOTE);
331 				*wp++ = COMSUB;
332 				/* Need to know if we are inside double quotes
333 				 * since sh/at&t-ksh translate the \" to " in
334 				 * "`..\"..`".  POSIX also requires this.
335 				 * An earlier version of ksh misinterpreted
336 				 * the POSIX specification and performed
337 				 * removal of backslash escapes only if
338 				 * posix mode was not in effect.
339 				 */
340 				statep->ls_sbquote.indquotes = 0;
341 				s = statep;
342 				base = state_info.base;
343 				while (1) {
344 					for (; s != base; s--) {
345 						if (s->ls_state == SDQUOTE) {
346 							statep->ls_sbquote.indquotes = 1;
347 							break;
348 						}
349 					}
350 					if (s != base)
351 						break;
352 					if (!(s = s->ls_info.base))
353 						break;
354 					base = s-- - STATE_BSIZE;
355 				}
356 				break;
357 			  default:
358 				*wp++ = CHAR, *wp++ = c;
359 			}
360 			break;
361 
362 		  case SSQUOTE:
363 			if (c == '\'') {
364 				POP_STATE();
365 				*wp++ = CQUOTE;
366 				ignore_backslash_newline--;
367 			} else
368 				*wp++ = QCHAR, *wp++ = c;
369 			break;
370 
371 		  case SDQUOTE:
372 			if (c == '"') {
373 				POP_STATE();
374 				*wp++ = CQUOTE;
375 			} else
376 				goto Subst;
377 			break;
378 
379 		  case SCSPAREN: /* $( .. ) */
380 			/* todo: deal with $(...) quoting properly
381 			 * kludge to partly fake quoting inside $(..): doesn't
382 			 * really work because nested $(..) or ${..} inside
383 			 * double quotes aren't dealt with.
384 			 */
385 			switch (statep->ls_scsparen.csstate) {
386 			  case 0: /* normal */
387 				switch (c) {
388 				  case '(':
389 					statep->ls_scsparen.nparen++;
390 					break;
391 				  case ')':
392 					statep->ls_scsparen.nparen--;
393 					break;
394 				  case '\\':
395 					statep->ls_scsparen.csstate = 1;
396 					break;
397 				  case '"':
398 					statep->ls_scsparen.csstate = 2;
399 					break;
400 				  case '\'':
401 					statep->ls_scsparen.csstate = 4;
402 					ignore_backslash_newline++;
403 					break;
404 				}
405 				break;
406 
407 			  case 1: /* backslash in normal mode */
408 			  case 3: /* backslash in double quotes */
409 				--statep->ls_scsparen.csstate;
410 				break;
411 
412 			  case 2: /* double quotes */
413 				if (c == '"')
414 					statep->ls_scsparen.csstate = 0;
415 				else if (c == '\\')
416 					statep->ls_scsparen.csstate = 3;
417 				break;
418 
419 			  case 4: /* single quotes */
420 				if (c == '\'') {
421 					statep->ls_scsparen.csstate = 0;
422 					ignore_backslash_newline--;
423 				}
424 				break;
425 			}
426 			if (statep->ls_scsparen.nparen == 0) {
427 				POP_STATE();
428 				*wp++ = 0; /* end of COMSUB */
429 			} else
430 				*wp++ = c;
431 			break;
432 
433 		  case SASPAREN: /* $(( .. )) */
434 			/* todo: deal with $((...); (...)) properly */
435 			/* XXX should nest using existing state machine
436 			 *     (embed "..", $(...), etc.) */
437 			if (c == '(')
438 				statep->ls_sasparen.nparen++;
439 			else if (c == ')') {
440 				statep->ls_sasparen.nparen--;
441 				if (statep->ls_sasparen.nparen == 1) {
442 					/*(*/
443 					if ((c2 = getsc()) == ')') {
444 						POP_STATE();
445 						*wp++ = 0; /* end of EXPRSUB */
446 						break;
447 					} else {
448 						char *s;
449 
450 						ungetsc(c2);
451 						/* mismatched parenthesis -
452 						 * assume we were really
453 						 * parsing a $(..) expression
454 						 */
455 						s = Xrestpos(ws, wp,
456 						     statep->ls_sasparen.start);
457 						memmove(s + 1, s, wp - s);
458 						*s++ = COMSUB;
459 						*s = '('; /*)*/
460 						wp++;
461 						statep->ls_scsparen.nparen = 1;
462 						statep->ls_scsparen.csstate = 0;
463 						state = statep->ls_state
464 							= SCSPAREN;
465 
466 					}
467 				}
468 			}
469 			*wp++ = c;
470 			break;
471 
472 		  case SBRACE:
473 			/*{*/
474 			if (c == '}') {
475 				POP_STATE();
476 				*wp++ = CSUBST;
477 				*wp++ = /*{*/ '}';
478 			} else
479 				goto Sbase1;
480 			break;
481 
482 		  case STBRACE:
483 			/* Same as SBRACE, except (,|,) treated specially */
484 			/*{*/
485 			if (c == '}') {
486 				POP_STATE();
487 				*wp++ = CSUBST;
488 				*wp++ = /*{*/ '}';
489 			} else if (c == '|') {
490 				*wp++ = SPAT;
491 			} else if (c == '(') {
492 				*wp++ = OPAT;
493 				*wp++ = ' ';	/* simile for @ */
494 				PUSH_STATE(SPATTERN);
495 			} else
496 				goto Sbase1;
497 			break;
498 
499 		  case SBQUOTE:
500 			if (c == '`') {
501 				*wp++ = 0;
502 				POP_STATE();
503 			} else if (c == '\\') {
504 				switch (c = getsc()) {
505 				  case '\\':
506 				  case '$': case '`':
507 					*wp++ = c;
508 					break;
509 				  case '"':
510 					if (statep->ls_sbquote.indquotes) {
511 						*wp++ = c;
512 						break;
513 					}
514 					/* fall through.. */
515 				  default:
516 					if (c) { /* trailing \ is lost */
517 						*wp++ = '\\';
518 						*wp++ = c;
519 					}
520 					break;
521 				}
522 			} else
523 				*wp++ = c;
524 			break;
525 
526 		  case SWORD:	/* ONEWORD */
527 			goto Subst;
528 
529 #ifdef KSH
530 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
531 			/*(*/
532 			if (c == ')') {
533 				if (statep->ls_sletparen.nparen > 0)
534 				    --statep->ls_sletparen.nparen;
535 				/*(*/
536 				else if ((c2 = getsc()) == ')') {
537 					c = 0;
538 					*wp++ = CQUOTE;
539 					goto Done;
540 				} else
541 					ungetsc(c2);
542 			} else if (c == '(')
543 				/* parenthesis inside quotes and backslashes
544 				 * are lost, but at&t ksh doesn't count them
545 				 * either
546 				 */
547 				++statep->ls_sletparen.nparen;
548 			goto Sbase2;
549 #endif /* KSH */
550 
551 		  case SHEREDELIM:	/* <<,<<- delimiter */
552 			/* XXX chuck this state (and the next) - use
553 			 * the existing states ($ and \`..` should be
554 			 * stripped of their specialness after the
555 			 * fact).
556 			 */
557 			/* here delimiters need a special case since
558 			 * $ and `..` are not to be treated specially
559 			 */
560 			if (c == '\\') {
561 				c = getsc();
562 				if (c) { /* trailing \ is lost */
563 					*wp++ = QCHAR;
564 					*wp++ = c;
565 				}
566 			} else if (c == '\'') {
567 				PUSH_STATE(SSQUOTE);
568 				*wp++ = OQUOTE;
569 				ignore_backslash_newline++;
570 			} else if (c == '"') {
571 				state = statep->ls_state = SHEREDQUOTE;
572 				*wp++ = OQUOTE;
573 			} else {
574 				*wp++ = CHAR;
575 				*wp++ = c;
576 			}
577 			break;
578 
579 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
580 			if (c == '"') {
581 				*wp++ = CQUOTE;
582 				state = statep->ls_state = SHEREDELIM;
583 			} else {
584 				if (c == '\\') {
585 					switch (c = getsc()) {
586 					  case '\\': case '"':
587 					  case '$': case '`':
588 						break;
589 					  default:
590 						if (c) { /* trailing \ lost */
591 							*wp++ = CHAR;
592 							*wp++ = '\\';
593 						}
594 						break;
595 					}
596 				}
597 				*wp++ = CHAR;
598 				*wp++ = c;
599 			}
600 			break;
601 
602 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
603 			if ( /*(*/ c == ')') {
604 				*wp++ = CPAT;
605 				POP_STATE();
606 			} else if (c == '|') {
607 				*wp++ = SPAT;
608 			} else if (c == '(') {
609 				*wp++ = OPAT;
610 				*wp++ = ' ';	/* simile for @ */
611 				PUSH_STATE(SPATTERN);
612 			} else
613 				goto Sbase1;
614 			break;
615 		}
616 	}
617 Done:
618 	Xcheck(ws, wp);
619 	if (statep != &states[1])
620 		/* XXX figure out what is missing */
621 		yyerror("no closing quote\n");
622 
623 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
624 	if (state == SHEREDELIM)
625 		state = SBASE;
626 
627 	dp = Xstring(ws, wp);
628 	if ((c == '<' || c == '>') && state == SBASE
629 	    && ((c2 = Xlength(ws, wp)) == 0
630 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
631 	{
632 		struct ioword *iop =
633 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
634 
635 		if (c2 == 2)
636 			iop->unit = dp[1] - '0';
637 		else
638 			iop->unit = c == '>'; /* 0 for <, 1 for > */
639 
640 		c2 = getsc();
641 		/* <<, >>, <> are ok, >< is not */
642 		if (c == c2 || (c == '<' && c2 == '>')) {
643 			iop->flag = c == c2 ?
644 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
645 			if (iop->flag == IOHERE) {
646 				if ((c2 = getsc()) == '-') {
647 					iop->flag |= IOSKIP;
648 				} else {
649 					ungetsc(c2);
650 				}
651 			}
652 		} else if (c2 == '&')
653 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
654 		else {
655 			iop->flag = c == '>' ? IOWRITE : IOREAD;
656 			if (c == '>' && c2 == '|')
657 				iop->flag |= IOCLOB;
658 			else
659 				ungetsc(c2);
660 		}
661 
662 		iop->name = (char *) 0;
663 		iop->delim = (char *) 0;
664 		iop->heredoc = (char *) 0;
665 		Xfree(ws, wp);	/* free word */
666 		yylval.iop = iop;
667 		return REDIR;
668 	}
669 
670 	if (wp == dp && state == SBASE) {
671 		Xfree(ws, wp);	/* free word */
672 		/* no word, process LEX1 character */
673 		switch (c) {
674 		  default:
675 			return c;
676 
677 		  case '|':
678 		  case '&':
679 		  case ';':
680 			if ((c2 = getsc()) == c)
681 				c = (c == ';') ? BREAK :
682 				    (c == '|') ? LOGOR :
683 				    (c == '&') ? LOGAND :
684 				    YYERRCODE;
685 #ifdef KSH
686 			else if (c == '|' && c2 == '&')
687 				c = COPROC;
688 #endif /* KSH */
689 			else
690 				ungetsc(c2);
691 			return c;
692 
693 		  case '\n':
694 			gethere();
695 			if (cf & CONTIN)
696 				goto Again;
697 			return c;
698 
699 		  case '(':  /*)*/
700 #ifdef KSH
701 			if ((c2 = getsc()) == '(') /*)*/
702 				/* XXX need to handle ((...); (...)) */
703 				c = MDPAREN;
704 			else
705 				ungetsc(c2);
706 #endif /* KSH */
707 			return c;
708 		  /*(*/
709 		  case ')':
710 			return c;
711 		}
712 	}
713 
714 	*wp++ = EOS;		/* terminate word */
715 	yylval.cp = Xclose(ws, wp);
716 	if (state == SWORD
717 #ifdef KSH
718 		|| state == SLETPAREN
719 #endif /* KSH */
720 		)	/* ONEWORD? */
721 		return LWORD;
722 	ungetsc(c);		/* unget terminator */
723 
724 	/* copy word to unprefixed string ident */
725 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
726 		*dp++ = *sp++;
727 	/* Make sure the ident array stays '\0' padded */
728 	memset(dp, 0, (ident+IDENT) - dp + 1);
729 	if (c != EOS)
730 		*ident = '\0';	/* word is not unquoted */
731 
732 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
733 		struct tbl *p;
734 		int h = hash(ident);
735 
736 		/* { */
737 		if ((cf & KEYWORD) && (p = mytsearch(&keywords, ident, h))
738 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
739 		{
740 			afree(yylval.cp, ATEMP);
741 			return p->val.i;
742 		}
743 		if ((cf & ALIAS) && (p = mytsearch(&aliases, ident, h))
744 		    && (p->flag & ISSET))
745 		{
746 			Source *s;
747 
748 			for (s = source; s->type == SALIAS; s = s->next)
749 				if (s->u.tblp == p)
750 					return LWORD;
751 			/* push alias expansion */
752 			s = pushs(SALIAS, source->areap);
753 			s->start = s->str = p->val.s;
754 			s->u.tblp = p;
755 			s->next = source;
756 			source = s;
757 			afree(yylval.cp, ATEMP);
758 			goto Again;
759 		}
760 	}
761 
762 	return LWORD;
763 }
764 
765 static void
gethere()766 gethere()
767 {
768 	struct ioword **p;
769 
770 	for (p = heres; p < herep; p++)
771 		readhere(*p);
772 	herep = heres;
773 }
774 
775 /*
776  * read "<<word" text into temp file
777  */
778 
779 static void
readhere(iop)780 readhere(iop)
781 	struct ioword *iop;
782 {
783 	int c;
784 	char *volatile eof;
785 	char *eofp;
786 	int skiptabs;
787 	XString xs;
788 	char *xp;
789 	int xpos;
790 
791 	eof = evalstr(iop->delim, 0);
792 
793 	if (!(iop->flag & IOEVAL))
794 		ignore_backslash_newline++;
795 
796 	Xinit(xs, xp, 256, ATEMP);
797 
798 	for (;;) {
799 		eofp = eof;
800 		skiptabs = iop->flag & IOSKIP;
801 		xpos = Xsavepos(xs, xp);
802 		while ((c = getsc()) != 0) {
803 			if (skiptabs) {
804 				if (c == '\t')
805 					continue;
806 				skiptabs = 0;
807 			}
808 			if (c != *eofp)
809 				break;
810 			Xcheck(xs, xp);
811 			Xput(xs, xp, c);
812 			eofp++;
813 		}
814 		/* Allow EOF here so commands with out trailing newlines
815 		 * will work (eg, ksh -c '...', $(...), etc).
816 		 */
817 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
818 			xp = Xrestpos(xs, xp, xpos);
819 			break;
820 		}
821 		ungetsc(c);
822 		while ((c = getsc()) != '\n') {
823 			if (c == 0)
824 				yyerror("here document `%s' unclosed\n", eof);
825 			Xcheck(xs, xp);
826 			Xput(xs, xp, c);
827 		}
828 		Xcheck(xs, xp);
829 		Xput(xs, xp, c);
830 	}
831 	Xput(xs, xp, '\0');
832 	iop->heredoc = Xclose(xs, xp);
833 
834 	if (!(iop->flag & IOEVAL))
835 		ignore_backslash_newline--;
836 }
837 
838 void
yyerror(const char * fmt,...)839 yyerror(const char *fmt, ...)
840 {
841 	va_list va;
842 
843 	/* pop aliases and re-reads */
844 	while (source->type == SALIAS || source->type == SREREAD)
845 		source = source->next;
846 	source->str = null;	/* zap pending input */
847 
848 	error_prefix(true);
849 	va_start(va, fmt);
850 	shf_vfprintf(shl_out, fmt, va);
851 	va_end(va);
852 	errorf("%s", null);
853 }
854 
855 /*
856  * input for yylex with alias expansion
857  */
858 
859 Source *
pushs(type,areap)860 pushs(type, areap)
861 	int type;
862 	Area *areap;
863 {
864 	Source *s;
865 
866 	s = (Source *) alloc(sizeof(Source), areap);
867 	s->type = type;
868 	s->str = null;
869 	s->start = NULL;
870 	s->line = 0;
871 	s->errline = 0;
872 	s->file = NULL;
873 	s->flags = 0;
874 	s->next = NULL;
875 	s->areap = areap;
876 	if (type == SFILE || type == SSTDIN) {
877 		char *dummy;
878 		Xinit(s->xs, dummy, 256, s->areap);
879 	} else
880 		memset(&s->xs, 0, sizeof(s->xs));
881 	return s;
882 }
883 
884 static int
getsc__()885 getsc__()
886 {
887 	Source *s = source;
888 	int c;
889 
890 	while ((c = *s->str++) == 0) {
891 		s->str = NULL;		/* return 0 for EOF by default */
892 		switch (s->type) {
893 		  case SEOF:
894 			s->str = null;
895 			return 0;
896 
897 		  case SSTDIN:
898 		  case SFILE:
899 			getsc_line(s);
900 			break;
901 
902 		  case SWSTR:
903 			break;
904 
905 		  case SSTRING:
906 			break;
907 
908 		  case SWORDS:
909 			s->start = s->str = *s->u.strv++;
910 			s->type = SWORDSEP;
911 			break;
912 
913 		  case SWORDSEP:
914 			if (*s->u.strv == NULL) {
915 				s->start = s->str = newline;
916 				s->type = SEOF;
917 			} else {
918 				s->start = s->str = space;
919 				s->type = SWORDS;
920 			}
921 			break;
922 
923 		  case SALIAS:
924 			if (s->flags & SF_ALIASEND) {
925 				/* pass on an unused SF_ALIAS flag */
926 				source = s->next;
927 				source->flags |= s->flags & SF_ALIAS;
928 				s = source;
929 			} else if (*s->u.tblp->val.s
930 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
931 			{
932 				source = s = s->next;	/* pop source stack */
933 				/* Note that this alias ended with a space,
934 				 * enabling alias expansion on the following
935 				 * word.
936 				 */
937 				s->flags |= SF_ALIAS;
938 			} else {
939 				/* At this point, we need to keep the current
940 				 * alias in the source list so recursive
941 				 * aliases can be detected and we also need
942 				 * to return the next character.  Do this
943 				 * by temporarily popping the alias to get
944 				 * the next character and then put it back
945 				 * in the source list with the SF_ALIASEND
946 				 * flag set.
947 				 */
948 				source = s->next;	/* pop source stack */
949 				source->flags |= s->flags & SF_ALIAS;
950 				c = getsc__();
951 				if (c) {
952 					s->flags |= SF_ALIASEND;
953 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
954 					s->start = s->str = s->ugbuf;
955 					s->next = source;
956 					source = s;
957 				} else {
958 					s = source;
959 					/* avoid reading eof twice */
960 					s->str = NULL;
961 					break;
962 				}
963 			}
964 			continue;
965 
966 		  case SREREAD:
967 			if (s->start != s->ugbuf) /* yuck */
968 				afree(s->u.freeme, ATEMP);
969 			source = s = s->next;
970 			continue;
971 		}
972 		if (s->str == NULL) {
973 			s->type = SEOF;
974 			s->start = s->str = null;
975 			return '\0';
976 		}
977 		if (s->flags & SF_ECHO) {
978 			shf_puts(s->str, shl_out);
979 			shf_flush(shl_out);
980 		}
981 	}
982 	return c;
983 }
984 
985 static void
getsc_line(s)986 getsc_line(s)
987 	Source *s;
988 {
989 	char *xp = Xstring(s->xs, xp);
990 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
991 	int have_tty = interactive && (s->flags & SF_TTY);
992 
993 	/* Done here to ensure nothing odd happens when a timeout occurs */
994 	XcheckN(s->xs, xp, LINE);
995 	*xp = '\0';
996 	s->start = s->str = xp;
997 
998 #ifdef KSH
999 	if (have_tty && ksh_tmout) {
1000 		ksh_tmout_state = TMOUT_READING;
1001 		alarm(ksh_tmout);
1002 	}
1003 #endif /* KSH */
1004 #ifdef EDIT
1005 	if (have_tty && (0
1006 # ifdef VI
1007 			 || Flag(FVI)
1008 # endif /* VI */
1009 # ifdef EMACS
1010 			 || Flag(FEMACS) || Flag(FGMACS)
1011 # endif /* EMACS */
1012 		))
1013 	{
1014 		int nread;
1015 
1016 		nread = x_read(xp, LINE);
1017 		if (nread < 0)	/* read error */
1018 			nread = 0;
1019 		xp[nread] = '\0';
1020 		xp += nread;
1021 	}
1022 	else
1023 #endif /* EDIT */
1024 	{
1025 		if (interactive) {
1026 			pprompt(prompt, 0);
1027 		} else
1028 			s->line++;
1029 
1030 		while (1) {
1031 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1032 
1033 			if (!p && shf_error(s->u.shf)
1034 			    && shf_errno(s->u.shf) == EINTR)
1035 			{
1036 				shf_clearerr(s->u.shf);
1037 				if (trap)
1038 					runtraps(0);
1039 				continue;
1040 			}
1041 			if (!p || (xp = p, xp[-1] == '\n'))
1042 				break;
1043 			/* double buffer size */
1044 			xp++; /* move past null so doubling works... */
1045 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1046 			xp--; /* ...and move back again */
1047 		}
1048 		/* flush any unwanted input so other programs/builtins
1049 		 * can read it.  Not very optimal, but less error prone
1050 		 * than flushing else where, dealing with redirections,
1051 		 * etc..
1052 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1053 		 */
1054 		if (s->type == SSTDIN)
1055 			shf_flush(s->u.shf);
1056 	}
1057 	/* XXX: temporary kludge to restore source after a
1058 	 * trap may have been executed.
1059 	 */
1060 	source = s;
1061 #ifdef KSH
1062 	if (have_tty && ksh_tmout)
1063 	{
1064 		ksh_tmout_state = TMOUT_EXECUTING;
1065 		alarm(0);
1066 	}
1067 #endif /* KSH */
1068 	s->start = s->str = Xstring(s->xs, xp);
1069 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1070 	/* Note: if input is all nulls, this is not eof */
1071 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1072 		if (s->type == SFILE)
1073 			shf_fdclose(s->u.shf);
1074 		s->str = NULL;
1075 	} else if (interactive) {
1076 #ifdef HISTORY
1077 		char *p = Xstring(s->xs, xp);
1078 		if (cur_prompt == PS1)
1079 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1080 				p++;
1081 		if (*p) {
1082 # ifdef EASY_HISTORY
1083 			if (cur_prompt == PS2)
1084 				histappend(Xstring(s->xs, xp), 1);
1085 			else
1086 # endif /* EASY_HISTORY */
1087 			{
1088 				s->line++;
1089 				histsave(s->line, s->str, 1);
1090 			}
1091 		}
1092 #endif /* HISTORY */
1093 	}
1094 	if (interactive)
1095 		set_prompt(PS2, (Source *) 0);
1096 }
1097 
1098 void
set_prompt(to,s)1099 set_prompt(to, s)
1100 	int to;
1101 	Source *s;
1102 {
1103 	cur_prompt = to;
1104 
1105 	switch (to) {
1106 	case PS1: /* command */
1107 #ifdef KSH
1108 		/* Substitute ! and !! here, before substitutions are done
1109 		 * so ! in expanded variables are not expanded.
1110 		 * NOTE: this is not what at&t ksh does (it does it after
1111 		 * substitutions, POSIX doesn't say which is to be done.
1112 		 */
1113 		{
1114 			struct shf *shf;
1115 			char * volatile ps1;
1116 			Area *saved_atemp;
1117 
1118 			ps1 = str_val(global("PS1"));
1119 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1120 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1121 			while (*ps1) {
1122 				if (*ps1 != '!' || *++ps1 == '!')
1123 					shf_putchar(*ps1++, shf);
1124 				else
1125 					shf_fprintf(shf, "%d",
1126 						s ? s->line + 1 : 0);
1127 			}
1128 			ps1 = shf_sclose(shf);
1129 			saved_atemp = ATEMP;
1130 			newenv(E_ERRH);
1131 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1132 				prompt = safe_prompt;
1133 				/* Don't print an error - assume it has already
1134 				 * been printed.  Reason is we may have forked
1135 				 * to run a command and the child may be
1136 				 * unwinding its stack through this code as it
1137 				 * exits.
1138 				 */
1139 			} else
1140 				prompt = str_save(substitute(ps1, 0),
1141 						 saved_atemp);
1142 			quitenv();
1143 		}
1144 #else /* KSH */
1145 		prompt = str_val(global("PS1"));
1146 #endif /* KSH */
1147 		break;
1148 
1149 	case PS2: /* command continuation */
1150 		prompt = str_val(global("PS2"));
1151 		break;
1152 	}
1153 }
1154 
1155 /* See also related routine, promptlen() in edit.c */
1156 void
pprompt(cp,ntruncate)1157 pprompt(cp, ntruncate)
1158 	const char *cp;
1159 	int ntruncate;
1160 {
1161 #if 0
1162 	char nbuf[32];
1163 	int c;
1164 
1165 	while (*cp != 0) {
1166 		if (*cp != '!')
1167 			c = *cp++;
1168 		else if (*++cp == '!')
1169 			c = *cp++;
1170 		else {
1171 			int len;
1172 			char *p;
1173 
1174 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1175 				source->line + 1);
1176 			len = strlen(nbuf);
1177 			if (ntruncate) {
1178 				if (ntruncate >= len) {
1179 					ntruncate -= len;
1180 					continue;
1181 				}
1182 				p += ntruncate;
1183 				len -= ntruncate;
1184 				ntruncate = 0;
1185 			}
1186 			shf_write(p, len, shl_out);
1187 			continue;
1188 		}
1189 		if (ntruncate)
1190 			--ntruncate;
1191 		else
1192 			shf_putc(c, shl_out);
1193 	}
1194 #endif /* 0 */
1195 	shf_puts(cp + ntruncate, shl_out);
1196 	shf_flush(shl_out);
1197 }
1198 
1199 /* Read the variable part of a ${...} expression (ie, up to but not including
1200  * the :[-+?=#%] or close-brace.
1201  */
1202 static char *
get_brace_var(wsp,wp)1203 get_brace_var(wsp, wp)
1204 	XString *wsp;
1205 	char *wp;
1206 {
1207 	enum parse_state {
1208 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1209 			   PS_NUMBER, PS_VAR1, PS_END
1210 			 }
1211 		state;
1212 	char c;
1213 
1214 	state = PS_INITIAL;
1215 	while (1) {
1216 		c = getsc();
1217 		/* State machine to figure out where the variable part ends. */
1218 		switch (state) {
1219 		  case PS_INITIAL:
1220 			if (c == '#') {
1221 				state = PS_SAW_HASH;
1222 				break;
1223 			}
1224 			/* fall through.. */
1225 		  case PS_SAW_HASH:
1226 			if (letter(c))
1227 				state = PS_IDENT;
1228 			else if (digit(c))
1229 				state = PS_NUMBER;
1230 			else if (ctype(c, C_VAR1))
1231 				state = PS_VAR1;
1232 			else
1233 				state = PS_END;
1234 			break;
1235 		  case PS_IDENT:
1236 			if (!letnum(c)) {
1237 				state = PS_END;
1238 				if (c == '[') {
1239 					char *tmp, *p;
1240 
1241 					if (!arraysub(&tmp))
1242 						yyerror("missing ]\n");
1243 					*wp++ = c;
1244 					for (p = tmp; *p; ) {
1245 						Xcheck(*wsp, wp);
1246 						*wp++ = *p++;
1247 					}
1248 					afree(tmp, ATEMP);
1249 					c = getsc(); /* the ] */
1250 				}
1251 			}
1252 			break;
1253 		  case PS_NUMBER:
1254 			if (!digit(c))
1255 				state = PS_END;
1256 			break;
1257 		  case PS_VAR1:
1258 			state = PS_END;
1259 			break;
1260 		  case PS_END: /* keep gcc happy */
1261 			break;
1262 		}
1263 		if (state == PS_END) {
1264 			*wp++ = '\0';	/* end of variable part */
1265 			ungetsc(c);
1266 			break;
1267 		}
1268 		Xcheck(*wsp, wp);
1269 		*wp++ = c;
1270 	}
1271 	return wp;
1272 }
1273 
1274 /*
1275  * Save an array subscript - returns true if matching bracket found, false
1276  * if eof or newline was found.
1277  * (Returned string double null terminated)
1278  */
1279 static int
arraysub(strp)1280 arraysub(strp)
1281 	char **strp;
1282 {
1283 	XString ws;
1284 	char	*wp;
1285 	char	c;
1286 	int 	depth = 1;	/* we are just past the initial [ */
1287 
1288 	Xinit(ws, wp, 32, ATEMP);
1289 
1290 	do {
1291 		c = getsc();
1292 		Xcheck(ws, wp);
1293 		*wp++ = c;
1294 		if (c == '[')
1295 			depth++;
1296 		else if (c == ']')
1297 			depth--;
1298 	} while (depth > 0 && c && c != '\n');
1299 
1300 	*wp++ = '\0';
1301 	*strp = Xclose(ws, wp);
1302 
1303 	return depth == 0 ? 1 : 0;
1304 }
1305 
1306 /* Unget a char: handles case when we are already at the start of the buffer */
1307 static const char *
ungetsc(c)1308 ungetsc(c)
1309 	int c;
1310 {
1311 	if (backslash_skip)
1312 		backslash_skip--;
1313 	/* Don't unget eof... */
1314 	if (source->str == null && c == '\0')
1315 		return source->str;
1316 	if (source->str > source->start)
1317 		source->str--;
1318 	else {
1319 		Source *s;
1320 
1321 		s = pushs(SREREAD, source->areap);
1322 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1323 		s->start = s->str = s->ugbuf;
1324 		s->next = source;
1325 		source = s;
1326 	}
1327 	return source->str;
1328 }
1329 
1330 
1331 /* Called to get a char that isn't a \newline sequence. */
1332 static int
getsc_bn(void)1333 getsc_bn ARGS((void))
1334 {
1335 	int c, c2;
1336 
1337 	if (ignore_backslash_newline)
1338 		return getsc_();
1339 
1340 	if (backslash_skip == 1) {
1341 		backslash_skip = 2;
1342 		return getsc_();
1343 	}
1344 
1345 	backslash_skip = 0;
1346 
1347 	while (1) {
1348 		c = getsc_();
1349 		if (c == '\\') {
1350 			if ((c2 = getsc_()) == '\n')
1351 				/* ignore the \newline; get the next char... */
1352 				continue;
1353 			ungetsc(c2);
1354 			backslash_skip = 1;
1355 		}
1356 		return c;
1357 	}
1358 }
1359 
1360 static Lex_state *
push_state_(si,old_end)1361 push_state_(si, old_end)
1362 	State_info *si;
1363 	Lex_state *old_end;
1364 {
1365 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1366 
1367 	new[0].ls_info.base = old_end;
1368 	si->base = &new[0];
1369 	si->end = &new[STATE_BSIZE];
1370 	return &new[1];
1371 }
1372 
1373 static Lex_state *
pop_state_(si,old_end)1374 pop_state_(si, old_end)
1375 	State_info *si;
1376 	Lex_state *old_end;
1377 {
1378 	Lex_state *old_base = si->base;
1379 
1380 	si->base = old_end->ls_info.base - STATE_BSIZE;
1381 	si->end = old_end->ls_info.base;
1382 
1383 	afree(old_base, ATEMP);
1384 
1385 	return si->base + STATE_BSIZE - 1;
1386 }
1387