1 /* $NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $ */
2
3 /*
4 * lexical analysis and source input
5 */
6 #include <sys/cdefs.h>
7
8 #ifndef lint
9 __RCSID("$NetBSD: lex.c,v 1.24 2019/09/26 11:01:09 mlelstv Exp $");
10 #endif
11
12
13 #include "sh.h"
14 #include <ctype.h>
15
16
17 /* Structure to keep track of the lexing state and the various pieces of info
18 * needed for each particular state.
19 */
20 typedef struct lex_state Lex_state;
21 struct lex_state {
22 int ls_state;
23 union {
24 /* $(...) */
25 struct scsparen_info {
26 int nparen; /* count open parenthesis */
27 int csstate; /* XXX remove */
28 #define ls_scsparen ls_info.u_scsparen
29 } u_scsparen;
30
31 /* $((...)) */
32 struct sasparen_info {
33 int nparen; /* count open parenthesis */
34 int start; /* marks start of $(( in output str */
35 #define ls_sasparen ls_info.u_sasparen
36 } u_sasparen;
37
38 /* ((...)) */
39 struct sletparen_info {
40 int nparen; /* count open parenthesis */
41 #define ls_sletparen ls_info.u_sletparen
42 } u_sletparen;
43
44 /* `...` */
45 struct sbquote_info {
46 int indquotes; /* true if in double quotes: "`...`" */
47 #define ls_sbquote ls_info.u_sbquote
48 } u_sbquote;
49
50 Lex_state *base; /* used to point to next state block */
51 } ls_info;
52 };
53
54 typedef struct State_info State_info;
55 struct State_info {
56 Lex_state *base;
57 Lex_state *end;
58 };
59
60
61 static void readhere ARGS((struct ioword *iop));
62 static int getsc__ ARGS((void));
63 static void getsc_line ARGS((Source *s));
64 static int getsc_bn ARGS((void));
65 static char *get_brace_var ARGS((XString *wsp, char *wp));
66 static int arraysub ARGS((char **strp));
67 static const char *ungetsc ARGS((int c));
68 static void gethere ARGS((void));
69 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
70 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
71
72 static int backslash_skip;
73 static int ignore_backslash_newline;
74
75 /* optimized getsc_bn() */
76 #define getsc() (*source->str != '\0' && *source->str != '\\' \
77 && !backslash_skip ? *source->str++ : getsc_bn())
78 /* optimized getsc__() */
79 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
80
81 #define STATE_BSIZE 32
82
83 #define PUSH_STATE(s) do { \
84 if (++statep == state_info.end) \
85 statep = push_state_(&state_info, statep); \
86 state = statep->ls_state = (s); \
87 } while (0)
88
89 #define POP_STATE() do { \
90 if (--statep == state_info.base) \
91 statep = pop_state_(&state_info, statep); \
92 state = statep->ls_state; \
93 } while (0)
94
95
96
97 /*
98 * Lexical analyzer
99 *
100 * tokens are not regular expressions, they are LL(1).
101 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
102 * hence the state stack.
103 */
104
105 int
yylex(cf)106 yylex(cf)
107 int cf;
108 {
109 Lex_state states[STATE_BSIZE], *statep;
110 State_info state_info;
111 int c, state;
112 XString ws; /* expandable output word */
113 char *wp; /* output word pointer */
114 char *sp, *dp;
115 int c2;
116
117
118 Again:
119 states[0].ls_state = -1;
120 states[0].ls_info.base = (Lex_state *) 0;
121 statep = &states[1];
122 state_info.base = states;
123 state_info.end = &states[STATE_BSIZE];
124
125 Xinit(ws, wp, 64, ATEMP);
126
127 backslash_skip = 0;
128 ignore_backslash_newline = 0;
129
130 if (cf&ONEWORD)
131 state = SWORD;
132 #ifdef KSH
133 else if (cf&LETEXPR) {
134 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
135 state = SLETPAREN;
136 statep->ls_sletparen.nparen = 0;
137 }
138 #endif /* KSH */
139 else { /* normal lexing */
140 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
141 while ((c = getsc()) == ' ' || c == '\t')
142 ;
143 if (c == '#') {
144 ignore_backslash_newline++;
145 while ((c = getsc()) != '\0' && c != '\n')
146 ;
147 ignore_backslash_newline--;
148 }
149 ungetsc(c);
150 }
151 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
152 source->flags &= ~SF_ALIAS;
153 /* In POSIX mode, a trailing space only counts if we are
154 * parsing a simple command
155 */
156 if (!Flag(FPOSIX) || (cf & CMDWORD))
157 cf |= ALIAS;
158 }
159
160 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
161 statep->ls_state = state;
162
163 /* collect non-special or quoted characters to form word */
164 while (!((c = getsc()) == 0
165 || ((state == SBASE || state == SHEREDELIM)
166 && ctype(c, C_LEX1))))
167 {
168 Xcheck(ws, wp);
169 switch (state) {
170 case SBASE:
171 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
172 *wp = EOS; /* temporary */
173 if (is_wdvarname(Xstring(ws, wp), false))
174 {
175 char *p, *tmp;
176
177 if (arraysub(&tmp)) {
178 *wp++ = CHAR;
179 *wp++ = c;
180 for (p = tmp; *p; ) {
181 Xcheck(ws, wp);
182 *wp++ = CHAR;
183 *wp++ = *p++;
184 }
185 afree(tmp, ATEMP);
186 break;
187 } else {
188 Source *s;
189
190 s = pushs(SREREAD,
191 source->areap);
192 s->start = s->str
193 = s->u.freeme = tmp;
194 s->next = source;
195 source = s;
196 }
197 }
198 *wp++ = CHAR;
199 *wp++ = c;
200 break;
201 }
202 /* fall through.. */
203 Sbase1: /* includes *(...|...) pattern (*+?@!) */
204 #ifdef KSH
205 if (c == '*' || c == '@' || c == '+' || c == '?'
206 || c == '!')
207 {
208 c2 = getsc();
209 if (c2 == '(' /*)*/ ) {
210 *wp++ = OPAT;
211 *wp++ = c;
212 PUSH_STATE(SPATTERN);
213 break;
214 }
215 ungetsc(c2);
216 }
217 #endif /* KSH */
218 /* fall through.. */
219 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
220 switch (c) {
221 case '\\':
222 c = getsc();
223 if (c) /* trailing \ is lost */
224 *wp++ = QCHAR, *wp++ = c;
225 break;
226 case '\'':
227 *wp++ = OQUOTE;
228 ignore_backslash_newline++;
229 PUSH_STATE(SSQUOTE);
230 break;
231 case '"':
232 *wp++ = OQUOTE;
233 PUSH_STATE(SDQUOTE);
234 break;
235 default:
236 goto Subst;
237 }
238 break;
239
240 Subst:
241 switch (c) {
242 Lex_state *s;
243 Lex_state *base;
244
245 case '\\':
246 c = getsc();
247 switch (c) {
248 case '\\':
249 case '$': case '`':
250 *wp++ = QCHAR, *wp++ = c;
251 break;
252 case '"':
253 if ((cf & HEREDOC) == 0) {
254 *wp++ = QCHAR, *wp++ = c;
255 break;
256 }
257 /* FALLTHROUGH */
258 default:
259 Xcheck(ws, wp);
260 if (c) { /* trailing \ is lost */
261 *wp++ = CHAR, *wp++ = '\\';
262 *wp++ = CHAR, *wp++ = c;
263 }
264 break;
265 }
266 break;
267 case '$':
268 c = getsc();
269 if (c == '(') /*)*/ {
270 c = getsc();
271 if (c == '(') /*)*/ {
272 PUSH_STATE(SASPAREN);
273 statep->ls_sasparen.nparen = 2;
274 statep->ls_sasparen.start =
275 Xsavepos(ws, wp);
276 *wp++ = EXPRSUB;
277 } else {
278 ungetsc(c);
279 PUSH_STATE(SCSPAREN);
280 statep->ls_scsparen.nparen = 1;
281 statep->ls_scsparen.csstate = 0;
282 *wp++ = COMSUB;
283 }
284 } else if (c == '{') /*}*/ {
285 *wp++ = OSUBST;
286 *wp++ = '{'; /*}*/
287 wp = get_brace_var(&ws, wp);
288 c = getsc();
289 /* allow :# and :% (ksh88 compat) */
290 if (c == ':') {
291 *wp++ = CHAR, *wp++ = c;
292 c = getsc();
293 }
294 /* If this is a trim operation,
295 * treat (,|,) specially in STBRACE.
296 */
297 if (c == '#' || c == '%') {
298 ungetsc(c);
299 PUSH_STATE(STBRACE);
300 } else {
301 ungetsc(c);
302 PUSH_STATE(SBRACE);
303 }
304 } else if (ctype(c, C_ALPHA)) {
305 *wp++ = OSUBST;
306 *wp++ = 'X';
307 do {
308 Xcheck(ws, wp);
309 *wp++ = c;
310 c = getsc();
311 } while (ctype(c, C_ALPHA|C_DIGIT));
312 *wp++ = '\0';
313 *wp++ = CSUBST;
314 *wp++ = 'X';
315 ungetsc(c);
316 } else if (ctype(c, C_DIGIT|C_VAR1)) {
317 Xcheck(ws, wp);
318 *wp++ = OSUBST;
319 *wp++ = 'X';
320 *wp++ = c;
321 *wp++ = '\0';
322 *wp++ = CSUBST;
323 *wp++ = 'X';
324 } else {
325 *wp++ = CHAR, *wp++ = '$';
326 ungetsc(c);
327 }
328 break;
329 case '`':
330 PUSH_STATE(SBQUOTE);
331 *wp++ = COMSUB;
332 /* Need to know if we are inside double quotes
333 * since sh/at&t-ksh translate the \" to " in
334 * "`..\"..`". POSIX also requires this.
335 * An earlier version of ksh misinterpreted
336 * the POSIX specification and performed
337 * removal of backslash escapes only if
338 * posix mode was not in effect.
339 */
340 statep->ls_sbquote.indquotes = 0;
341 s = statep;
342 base = state_info.base;
343 while (1) {
344 for (; s != base; s--) {
345 if (s->ls_state == SDQUOTE) {
346 statep->ls_sbquote.indquotes = 1;
347 break;
348 }
349 }
350 if (s != base)
351 break;
352 if (!(s = s->ls_info.base))
353 break;
354 base = s-- - STATE_BSIZE;
355 }
356 break;
357 default:
358 *wp++ = CHAR, *wp++ = c;
359 }
360 break;
361
362 case SSQUOTE:
363 if (c == '\'') {
364 POP_STATE();
365 *wp++ = CQUOTE;
366 ignore_backslash_newline--;
367 } else
368 *wp++ = QCHAR, *wp++ = c;
369 break;
370
371 case SDQUOTE:
372 if (c == '"') {
373 POP_STATE();
374 *wp++ = CQUOTE;
375 } else
376 goto Subst;
377 break;
378
379 case SCSPAREN: /* $( .. ) */
380 /* todo: deal with $(...) quoting properly
381 * kludge to partly fake quoting inside $(..): doesn't
382 * really work because nested $(..) or ${..} inside
383 * double quotes aren't dealt with.
384 */
385 switch (statep->ls_scsparen.csstate) {
386 case 0: /* normal */
387 switch (c) {
388 case '(':
389 statep->ls_scsparen.nparen++;
390 break;
391 case ')':
392 statep->ls_scsparen.nparen--;
393 break;
394 case '\\':
395 statep->ls_scsparen.csstate = 1;
396 break;
397 case '"':
398 statep->ls_scsparen.csstate = 2;
399 break;
400 case '\'':
401 statep->ls_scsparen.csstate = 4;
402 ignore_backslash_newline++;
403 break;
404 }
405 break;
406
407 case 1: /* backslash in normal mode */
408 case 3: /* backslash in double quotes */
409 --statep->ls_scsparen.csstate;
410 break;
411
412 case 2: /* double quotes */
413 if (c == '"')
414 statep->ls_scsparen.csstate = 0;
415 else if (c == '\\')
416 statep->ls_scsparen.csstate = 3;
417 break;
418
419 case 4: /* single quotes */
420 if (c == '\'') {
421 statep->ls_scsparen.csstate = 0;
422 ignore_backslash_newline--;
423 }
424 break;
425 }
426 if (statep->ls_scsparen.nparen == 0) {
427 POP_STATE();
428 *wp++ = 0; /* end of COMSUB */
429 } else
430 *wp++ = c;
431 break;
432
433 case SASPAREN: /* $(( .. )) */
434 /* todo: deal with $((...); (...)) properly */
435 /* XXX should nest using existing state machine
436 * (embed "..", $(...), etc.) */
437 if (c == '(')
438 statep->ls_sasparen.nparen++;
439 else if (c == ')') {
440 statep->ls_sasparen.nparen--;
441 if (statep->ls_sasparen.nparen == 1) {
442 /*(*/
443 if ((c2 = getsc()) == ')') {
444 POP_STATE();
445 *wp++ = 0; /* end of EXPRSUB */
446 break;
447 } else {
448 char *s;
449
450 ungetsc(c2);
451 /* mismatched parenthesis -
452 * assume we were really
453 * parsing a $(..) expression
454 */
455 s = Xrestpos(ws, wp,
456 statep->ls_sasparen.start);
457 memmove(s + 1, s, wp - s);
458 *s++ = COMSUB;
459 *s = '('; /*)*/
460 wp++;
461 statep->ls_scsparen.nparen = 1;
462 statep->ls_scsparen.csstate = 0;
463 state = statep->ls_state
464 = SCSPAREN;
465
466 }
467 }
468 }
469 *wp++ = c;
470 break;
471
472 case SBRACE:
473 /*{*/
474 if (c == '}') {
475 POP_STATE();
476 *wp++ = CSUBST;
477 *wp++ = /*{*/ '}';
478 } else
479 goto Sbase1;
480 break;
481
482 case STBRACE:
483 /* Same as SBRACE, except (,|,) treated specially */
484 /*{*/
485 if (c == '}') {
486 POP_STATE();
487 *wp++ = CSUBST;
488 *wp++ = /*{*/ '}';
489 } else if (c == '|') {
490 *wp++ = SPAT;
491 } else if (c == '(') {
492 *wp++ = OPAT;
493 *wp++ = ' '; /* simile for @ */
494 PUSH_STATE(SPATTERN);
495 } else
496 goto Sbase1;
497 break;
498
499 case SBQUOTE:
500 if (c == '`') {
501 *wp++ = 0;
502 POP_STATE();
503 } else if (c == '\\') {
504 switch (c = getsc()) {
505 case '\\':
506 case '$': case '`':
507 *wp++ = c;
508 break;
509 case '"':
510 if (statep->ls_sbquote.indquotes) {
511 *wp++ = c;
512 break;
513 }
514 /* fall through.. */
515 default:
516 if (c) { /* trailing \ is lost */
517 *wp++ = '\\';
518 *wp++ = c;
519 }
520 break;
521 }
522 } else
523 *wp++ = c;
524 break;
525
526 case SWORD: /* ONEWORD */
527 goto Subst;
528
529 #ifdef KSH
530 case SLETPAREN: /* LETEXPR: (( ... )) */
531 /*(*/
532 if (c == ')') {
533 if (statep->ls_sletparen.nparen > 0)
534 --statep->ls_sletparen.nparen;
535 /*(*/
536 else if ((c2 = getsc()) == ')') {
537 c = 0;
538 *wp++ = CQUOTE;
539 goto Done;
540 } else
541 ungetsc(c2);
542 } else if (c == '(')
543 /* parenthesis inside quotes and backslashes
544 * are lost, but at&t ksh doesn't count them
545 * either
546 */
547 ++statep->ls_sletparen.nparen;
548 goto Sbase2;
549 #endif /* KSH */
550
551 case SHEREDELIM: /* <<,<<- delimiter */
552 /* XXX chuck this state (and the next) - use
553 * the existing states ($ and \`..` should be
554 * stripped of their specialness after the
555 * fact).
556 */
557 /* here delimiters need a special case since
558 * $ and `..` are not to be treated specially
559 */
560 if (c == '\\') {
561 c = getsc();
562 if (c) { /* trailing \ is lost */
563 *wp++ = QCHAR;
564 *wp++ = c;
565 }
566 } else if (c == '\'') {
567 PUSH_STATE(SSQUOTE);
568 *wp++ = OQUOTE;
569 ignore_backslash_newline++;
570 } else if (c == '"') {
571 state = statep->ls_state = SHEREDQUOTE;
572 *wp++ = OQUOTE;
573 } else {
574 *wp++ = CHAR;
575 *wp++ = c;
576 }
577 break;
578
579 case SHEREDQUOTE: /* " in <<,<<- delimiter */
580 if (c == '"') {
581 *wp++ = CQUOTE;
582 state = statep->ls_state = SHEREDELIM;
583 } else {
584 if (c == '\\') {
585 switch (c = getsc()) {
586 case '\\': case '"':
587 case '$': case '`':
588 break;
589 default:
590 if (c) { /* trailing \ lost */
591 *wp++ = CHAR;
592 *wp++ = '\\';
593 }
594 break;
595 }
596 }
597 *wp++ = CHAR;
598 *wp++ = c;
599 }
600 break;
601
602 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
603 if ( /*(*/ c == ')') {
604 *wp++ = CPAT;
605 POP_STATE();
606 } else if (c == '|') {
607 *wp++ = SPAT;
608 } else if (c == '(') {
609 *wp++ = OPAT;
610 *wp++ = ' '; /* simile for @ */
611 PUSH_STATE(SPATTERN);
612 } else
613 goto Sbase1;
614 break;
615 }
616 }
617 Done:
618 Xcheck(ws, wp);
619 if (statep != &states[1])
620 /* XXX figure out what is missing */
621 yyerror("no closing quote\n");
622
623 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
624 if (state == SHEREDELIM)
625 state = SBASE;
626
627 dp = Xstring(ws, wp);
628 if ((c == '<' || c == '>') && state == SBASE
629 && ((c2 = Xlength(ws, wp)) == 0
630 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
631 {
632 struct ioword *iop =
633 (struct ioword *) alloc(sizeof(*iop), ATEMP);
634
635 if (c2 == 2)
636 iop->unit = dp[1] - '0';
637 else
638 iop->unit = c == '>'; /* 0 for <, 1 for > */
639
640 c2 = getsc();
641 /* <<, >>, <> are ok, >< is not */
642 if (c == c2 || (c == '<' && c2 == '>')) {
643 iop->flag = c == c2 ?
644 (c == '>' ? IOCAT : IOHERE) : IORDWR;
645 if (iop->flag == IOHERE) {
646 if ((c2 = getsc()) == '-') {
647 iop->flag |= IOSKIP;
648 } else {
649 ungetsc(c2);
650 }
651 }
652 } else if (c2 == '&')
653 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
654 else {
655 iop->flag = c == '>' ? IOWRITE : IOREAD;
656 if (c == '>' && c2 == '|')
657 iop->flag |= IOCLOB;
658 else
659 ungetsc(c2);
660 }
661
662 iop->name = (char *) 0;
663 iop->delim = (char *) 0;
664 iop->heredoc = (char *) 0;
665 Xfree(ws, wp); /* free word */
666 yylval.iop = iop;
667 return REDIR;
668 }
669
670 if (wp == dp && state == SBASE) {
671 Xfree(ws, wp); /* free word */
672 /* no word, process LEX1 character */
673 switch (c) {
674 default:
675 return c;
676
677 case '|':
678 case '&':
679 case ';':
680 if ((c2 = getsc()) == c)
681 c = (c == ';') ? BREAK :
682 (c == '|') ? LOGOR :
683 (c == '&') ? LOGAND :
684 YYERRCODE;
685 #ifdef KSH
686 else if (c == '|' && c2 == '&')
687 c = COPROC;
688 #endif /* KSH */
689 else
690 ungetsc(c2);
691 return c;
692
693 case '\n':
694 gethere();
695 if (cf & CONTIN)
696 goto Again;
697 return c;
698
699 case '(': /*)*/
700 #ifdef KSH
701 if ((c2 = getsc()) == '(') /*)*/
702 /* XXX need to handle ((...); (...)) */
703 c = MDPAREN;
704 else
705 ungetsc(c2);
706 #endif /* KSH */
707 return c;
708 /*(*/
709 case ')':
710 return c;
711 }
712 }
713
714 *wp++ = EOS; /* terminate word */
715 yylval.cp = Xclose(ws, wp);
716 if (state == SWORD
717 #ifdef KSH
718 || state == SLETPAREN
719 #endif /* KSH */
720 ) /* ONEWORD? */
721 return LWORD;
722 ungetsc(c); /* unget terminator */
723
724 /* copy word to unprefixed string ident */
725 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
726 *dp++ = *sp++;
727 /* Make sure the ident array stays '\0' padded */
728 memset(dp, 0, (ident+IDENT) - dp + 1);
729 if (c != EOS)
730 *ident = '\0'; /* word is not unquoted */
731
732 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
733 struct tbl *p;
734 int h = hash(ident);
735
736 /* { */
737 if ((cf & KEYWORD) && (p = mytsearch(&keywords, ident, h))
738 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
739 {
740 afree(yylval.cp, ATEMP);
741 return p->val.i;
742 }
743 if ((cf & ALIAS) && (p = mytsearch(&aliases, ident, h))
744 && (p->flag & ISSET))
745 {
746 Source *s;
747
748 for (s = source; s->type == SALIAS; s = s->next)
749 if (s->u.tblp == p)
750 return LWORD;
751 /* push alias expansion */
752 s = pushs(SALIAS, source->areap);
753 s->start = s->str = p->val.s;
754 s->u.tblp = p;
755 s->next = source;
756 source = s;
757 afree(yylval.cp, ATEMP);
758 goto Again;
759 }
760 }
761
762 return LWORD;
763 }
764
765 static void
gethere()766 gethere()
767 {
768 struct ioword **p;
769
770 for (p = heres; p < herep; p++)
771 readhere(*p);
772 herep = heres;
773 }
774
775 /*
776 * read "<<word" text into temp file
777 */
778
779 static void
readhere(iop)780 readhere(iop)
781 struct ioword *iop;
782 {
783 int c;
784 char *volatile eof;
785 char *eofp;
786 int skiptabs;
787 XString xs;
788 char *xp;
789 int xpos;
790
791 eof = evalstr(iop->delim, 0);
792
793 if (!(iop->flag & IOEVAL))
794 ignore_backslash_newline++;
795
796 Xinit(xs, xp, 256, ATEMP);
797
798 for (;;) {
799 eofp = eof;
800 skiptabs = iop->flag & IOSKIP;
801 xpos = Xsavepos(xs, xp);
802 while ((c = getsc()) != 0) {
803 if (skiptabs) {
804 if (c == '\t')
805 continue;
806 skiptabs = 0;
807 }
808 if (c != *eofp)
809 break;
810 Xcheck(xs, xp);
811 Xput(xs, xp, c);
812 eofp++;
813 }
814 /* Allow EOF here so commands with out trailing newlines
815 * will work (eg, ksh -c '...', $(...), etc).
816 */
817 if (*eofp == '\0' && (c == 0 || c == '\n')) {
818 xp = Xrestpos(xs, xp, xpos);
819 break;
820 }
821 ungetsc(c);
822 while ((c = getsc()) != '\n') {
823 if (c == 0)
824 yyerror("here document `%s' unclosed\n", eof);
825 Xcheck(xs, xp);
826 Xput(xs, xp, c);
827 }
828 Xcheck(xs, xp);
829 Xput(xs, xp, c);
830 }
831 Xput(xs, xp, '\0');
832 iop->heredoc = Xclose(xs, xp);
833
834 if (!(iop->flag & IOEVAL))
835 ignore_backslash_newline--;
836 }
837
838 void
yyerror(const char * fmt,...)839 yyerror(const char *fmt, ...)
840 {
841 va_list va;
842
843 /* pop aliases and re-reads */
844 while (source->type == SALIAS || source->type == SREREAD)
845 source = source->next;
846 source->str = null; /* zap pending input */
847
848 error_prefix(true);
849 va_start(va, fmt);
850 shf_vfprintf(shl_out, fmt, va);
851 va_end(va);
852 errorf("%s", null);
853 }
854
855 /*
856 * input for yylex with alias expansion
857 */
858
859 Source *
pushs(type,areap)860 pushs(type, areap)
861 int type;
862 Area *areap;
863 {
864 Source *s;
865
866 s = (Source *) alloc(sizeof(Source), areap);
867 s->type = type;
868 s->str = null;
869 s->start = NULL;
870 s->line = 0;
871 s->errline = 0;
872 s->file = NULL;
873 s->flags = 0;
874 s->next = NULL;
875 s->areap = areap;
876 if (type == SFILE || type == SSTDIN) {
877 char *dummy;
878 Xinit(s->xs, dummy, 256, s->areap);
879 } else
880 memset(&s->xs, 0, sizeof(s->xs));
881 return s;
882 }
883
884 static int
getsc__()885 getsc__()
886 {
887 Source *s = source;
888 int c;
889
890 while ((c = *s->str++) == 0) {
891 s->str = NULL; /* return 0 for EOF by default */
892 switch (s->type) {
893 case SEOF:
894 s->str = null;
895 return 0;
896
897 case SSTDIN:
898 case SFILE:
899 getsc_line(s);
900 break;
901
902 case SWSTR:
903 break;
904
905 case SSTRING:
906 break;
907
908 case SWORDS:
909 s->start = s->str = *s->u.strv++;
910 s->type = SWORDSEP;
911 break;
912
913 case SWORDSEP:
914 if (*s->u.strv == NULL) {
915 s->start = s->str = newline;
916 s->type = SEOF;
917 } else {
918 s->start = s->str = space;
919 s->type = SWORDS;
920 }
921 break;
922
923 case SALIAS:
924 if (s->flags & SF_ALIASEND) {
925 /* pass on an unused SF_ALIAS flag */
926 source = s->next;
927 source->flags |= s->flags & SF_ALIAS;
928 s = source;
929 } else if (*s->u.tblp->val.s
930 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
931 {
932 source = s = s->next; /* pop source stack */
933 /* Note that this alias ended with a space,
934 * enabling alias expansion on the following
935 * word.
936 */
937 s->flags |= SF_ALIAS;
938 } else {
939 /* At this point, we need to keep the current
940 * alias in the source list so recursive
941 * aliases can be detected and we also need
942 * to return the next character. Do this
943 * by temporarily popping the alias to get
944 * the next character and then put it back
945 * in the source list with the SF_ALIASEND
946 * flag set.
947 */
948 source = s->next; /* pop source stack */
949 source->flags |= s->flags & SF_ALIAS;
950 c = getsc__();
951 if (c) {
952 s->flags |= SF_ALIASEND;
953 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
954 s->start = s->str = s->ugbuf;
955 s->next = source;
956 source = s;
957 } else {
958 s = source;
959 /* avoid reading eof twice */
960 s->str = NULL;
961 break;
962 }
963 }
964 continue;
965
966 case SREREAD:
967 if (s->start != s->ugbuf) /* yuck */
968 afree(s->u.freeme, ATEMP);
969 source = s = s->next;
970 continue;
971 }
972 if (s->str == NULL) {
973 s->type = SEOF;
974 s->start = s->str = null;
975 return '\0';
976 }
977 if (s->flags & SF_ECHO) {
978 shf_puts(s->str, shl_out);
979 shf_flush(shl_out);
980 }
981 }
982 return c;
983 }
984
985 static void
getsc_line(s)986 getsc_line(s)
987 Source *s;
988 {
989 char *xp = Xstring(s->xs, xp);
990 int interactive = Flag(FTALKING) && s->type == SSTDIN;
991 int have_tty = interactive && (s->flags & SF_TTY);
992
993 /* Done here to ensure nothing odd happens when a timeout occurs */
994 XcheckN(s->xs, xp, LINE);
995 *xp = '\0';
996 s->start = s->str = xp;
997
998 #ifdef KSH
999 if (have_tty && ksh_tmout) {
1000 ksh_tmout_state = TMOUT_READING;
1001 alarm(ksh_tmout);
1002 }
1003 #endif /* KSH */
1004 #ifdef EDIT
1005 if (have_tty && (0
1006 # ifdef VI
1007 || Flag(FVI)
1008 # endif /* VI */
1009 # ifdef EMACS
1010 || Flag(FEMACS) || Flag(FGMACS)
1011 # endif /* EMACS */
1012 ))
1013 {
1014 int nread;
1015
1016 nread = x_read(xp, LINE);
1017 if (nread < 0) /* read error */
1018 nread = 0;
1019 xp[nread] = '\0';
1020 xp += nread;
1021 }
1022 else
1023 #endif /* EDIT */
1024 {
1025 if (interactive) {
1026 pprompt(prompt, 0);
1027 } else
1028 s->line++;
1029
1030 while (1) {
1031 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1032
1033 if (!p && shf_error(s->u.shf)
1034 && shf_errno(s->u.shf) == EINTR)
1035 {
1036 shf_clearerr(s->u.shf);
1037 if (trap)
1038 runtraps(0);
1039 continue;
1040 }
1041 if (!p || (xp = p, xp[-1] == '\n'))
1042 break;
1043 /* double buffer size */
1044 xp++; /* move past null so doubling works... */
1045 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1046 xp--; /* ...and move back again */
1047 }
1048 /* flush any unwanted input so other programs/builtins
1049 * can read it. Not very optimal, but less error prone
1050 * than flushing else where, dealing with redirections,
1051 * etc..
1052 * todo: reduce size of shf buffer (~128?) if SSTDIN
1053 */
1054 if (s->type == SSTDIN)
1055 shf_flush(s->u.shf);
1056 }
1057 /* XXX: temporary kludge to restore source after a
1058 * trap may have been executed.
1059 */
1060 source = s;
1061 #ifdef KSH
1062 if (have_tty && ksh_tmout)
1063 {
1064 ksh_tmout_state = TMOUT_EXECUTING;
1065 alarm(0);
1066 }
1067 #endif /* KSH */
1068 s->start = s->str = Xstring(s->xs, xp);
1069 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1070 /* Note: if input is all nulls, this is not eof */
1071 if (Xlength(s->xs, xp) == 0) { /* EOF */
1072 if (s->type == SFILE)
1073 shf_fdclose(s->u.shf);
1074 s->str = NULL;
1075 } else if (interactive) {
1076 #ifdef HISTORY
1077 char *p = Xstring(s->xs, xp);
1078 if (cur_prompt == PS1)
1079 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1080 p++;
1081 if (*p) {
1082 # ifdef EASY_HISTORY
1083 if (cur_prompt == PS2)
1084 histappend(Xstring(s->xs, xp), 1);
1085 else
1086 # endif /* EASY_HISTORY */
1087 {
1088 s->line++;
1089 histsave(s->line, s->str, 1);
1090 }
1091 }
1092 #endif /* HISTORY */
1093 }
1094 if (interactive)
1095 set_prompt(PS2, (Source *) 0);
1096 }
1097
1098 void
set_prompt(to,s)1099 set_prompt(to, s)
1100 int to;
1101 Source *s;
1102 {
1103 cur_prompt = to;
1104
1105 switch (to) {
1106 case PS1: /* command */
1107 #ifdef KSH
1108 /* Substitute ! and !! here, before substitutions are done
1109 * so ! in expanded variables are not expanded.
1110 * NOTE: this is not what at&t ksh does (it does it after
1111 * substitutions, POSIX doesn't say which is to be done.
1112 */
1113 {
1114 struct shf *shf;
1115 char * volatile ps1;
1116 Area *saved_atemp;
1117
1118 ps1 = str_val(global("PS1"));
1119 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1120 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1121 while (*ps1) {
1122 if (*ps1 != '!' || *++ps1 == '!')
1123 shf_putchar(*ps1++, shf);
1124 else
1125 shf_fprintf(shf, "%d",
1126 s ? s->line + 1 : 0);
1127 }
1128 ps1 = shf_sclose(shf);
1129 saved_atemp = ATEMP;
1130 newenv(E_ERRH);
1131 if (ksh_sigsetjmp(e->jbuf, 0)) {
1132 prompt = safe_prompt;
1133 /* Don't print an error - assume it has already
1134 * been printed. Reason is we may have forked
1135 * to run a command and the child may be
1136 * unwinding its stack through this code as it
1137 * exits.
1138 */
1139 } else
1140 prompt = str_save(substitute(ps1, 0),
1141 saved_atemp);
1142 quitenv();
1143 }
1144 #else /* KSH */
1145 prompt = str_val(global("PS1"));
1146 #endif /* KSH */
1147 break;
1148
1149 case PS2: /* command continuation */
1150 prompt = str_val(global("PS2"));
1151 break;
1152 }
1153 }
1154
1155 /* See also related routine, promptlen() in edit.c */
1156 void
pprompt(cp,ntruncate)1157 pprompt(cp, ntruncate)
1158 const char *cp;
1159 int ntruncate;
1160 {
1161 #if 0
1162 char nbuf[32];
1163 int c;
1164
1165 while (*cp != 0) {
1166 if (*cp != '!')
1167 c = *cp++;
1168 else if (*++cp == '!')
1169 c = *cp++;
1170 else {
1171 int len;
1172 char *p;
1173
1174 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1175 source->line + 1);
1176 len = strlen(nbuf);
1177 if (ntruncate) {
1178 if (ntruncate >= len) {
1179 ntruncate -= len;
1180 continue;
1181 }
1182 p += ntruncate;
1183 len -= ntruncate;
1184 ntruncate = 0;
1185 }
1186 shf_write(p, len, shl_out);
1187 continue;
1188 }
1189 if (ntruncate)
1190 --ntruncate;
1191 else
1192 shf_putc(c, shl_out);
1193 }
1194 #endif /* 0 */
1195 shf_puts(cp + ntruncate, shl_out);
1196 shf_flush(shl_out);
1197 }
1198
1199 /* Read the variable part of a ${...} expression (ie, up to but not including
1200 * the :[-+?=#%] or close-brace.
1201 */
1202 static char *
get_brace_var(wsp,wp)1203 get_brace_var(wsp, wp)
1204 XString *wsp;
1205 char *wp;
1206 {
1207 enum parse_state {
1208 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1209 PS_NUMBER, PS_VAR1, PS_END
1210 }
1211 state;
1212 char c;
1213
1214 state = PS_INITIAL;
1215 while (1) {
1216 c = getsc();
1217 /* State machine to figure out where the variable part ends. */
1218 switch (state) {
1219 case PS_INITIAL:
1220 if (c == '#') {
1221 state = PS_SAW_HASH;
1222 break;
1223 }
1224 /* fall through.. */
1225 case PS_SAW_HASH:
1226 if (letter(c))
1227 state = PS_IDENT;
1228 else if (digit(c))
1229 state = PS_NUMBER;
1230 else if (ctype(c, C_VAR1))
1231 state = PS_VAR1;
1232 else
1233 state = PS_END;
1234 break;
1235 case PS_IDENT:
1236 if (!letnum(c)) {
1237 state = PS_END;
1238 if (c == '[') {
1239 char *tmp, *p;
1240
1241 if (!arraysub(&tmp))
1242 yyerror("missing ]\n");
1243 *wp++ = c;
1244 for (p = tmp; *p; ) {
1245 Xcheck(*wsp, wp);
1246 *wp++ = *p++;
1247 }
1248 afree(tmp, ATEMP);
1249 c = getsc(); /* the ] */
1250 }
1251 }
1252 break;
1253 case PS_NUMBER:
1254 if (!digit(c))
1255 state = PS_END;
1256 break;
1257 case PS_VAR1:
1258 state = PS_END;
1259 break;
1260 case PS_END: /* keep gcc happy */
1261 break;
1262 }
1263 if (state == PS_END) {
1264 *wp++ = '\0'; /* end of variable part */
1265 ungetsc(c);
1266 break;
1267 }
1268 Xcheck(*wsp, wp);
1269 *wp++ = c;
1270 }
1271 return wp;
1272 }
1273
1274 /*
1275 * Save an array subscript - returns true if matching bracket found, false
1276 * if eof or newline was found.
1277 * (Returned string double null terminated)
1278 */
1279 static int
arraysub(strp)1280 arraysub(strp)
1281 char **strp;
1282 {
1283 XString ws;
1284 char *wp;
1285 char c;
1286 int depth = 1; /* we are just past the initial [ */
1287
1288 Xinit(ws, wp, 32, ATEMP);
1289
1290 do {
1291 c = getsc();
1292 Xcheck(ws, wp);
1293 *wp++ = c;
1294 if (c == '[')
1295 depth++;
1296 else if (c == ']')
1297 depth--;
1298 } while (depth > 0 && c && c != '\n');
1299
1300 *wp++ = '\0';
1301 *strp = Xclose(ws, wp);
1302
1303 return depth == 0 ? 1 : 0;
1304 }
1305
1306 /* Unget a char: handles case when we are already at the start of the buffer */
1307 static const char *
ungetsc(c)1308 ungetsc(c)
1309 int c;
1310 {
1311 if (backslash_skip)
1312 backslash_skip--;
1313 /* Don't unget eof... */
1314 if (source->str == null && c == '\0')
1315 return source->str;
1316 if (source->str > source->start)
1317 source->str--;
1318 else {
1319 Source *s;
1320
1321 s = pushs(SREREAD, source->areap);
1322 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1323 s->start = s->str = s->ugbuf;
1324 s->next = source;
1325 source = s;
1326 }
1327 return source->str;
1328 }
1329
1330
1331 /* Called to get a char that isn't a \newline sequence. */
1332 static int
getsc_bn(void)1333 getsc_bn ARGS((void))
1334 {
1335 int c, c2;
1336
1337 if (ignore_backslash_newline)
1338 return getsc_();
1339
1340 if (backslash_skip == 1) {
1341 backslash_skip = 2;
1342 return getsc_();
1343 }
1344
1345 backslash_skip = 0;
1346
1347 while (1) {
1348 c = getsc_();
1349 if (c == '\\') {
1350 if ((c2 = getsc_()) == '\n')
1351 /* ignore the \newline; get the next char... */
1352 continue;
1353 ungetsc(c2);
1354 backslash_skip = 1;
1355 }
1356 return c;
1357 }
1358 }
1359
1360 static Lex_state *
push_state_(si,old_end)1361 push_state_(si, old_end)
1362 State_info *si;
1363 Lex_state *old_end;
1364 {
1365 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1366
1367 new[0].ls_info.base = old_end;
1368 si->base = &new[0];
1369 si->end = &new[STATE_BSIZE];
1370 return &new[1];
1371 }
1372
1373 static Lex_state *
pop_state_(si,old_end)1374 pop_state_(si, old_end)
1375 State_info *si;
1376 Lex_state *old_end;
1377 {
1378 Lex_state *old_base = si->base;
1379
1380 si->base = old_end->ls_info.base - STATE_BSIZE;
1381 si->end = old_end->ls_info.base;
1382
1383 afree(old_base, ATEMP);
1384
1385 return si->base + STATE_BSIZE - 1;
1386 }
1387