1 /*
2 * lexical analysis and source input
3 */
4
5 #include "sh.h"
6 #include <ctype.h>
7
8
9 /* Structure to keep track of the lexing state and the various pieces of info
10 * needed for each particular state.
11 */
12 typedef struct lex_state Lex_state;
13 struct lex_state {
14 int ls_state;
15 union {
16 /* $(...) */
17 struct scsparen_info {
18 int nparen; /* count open parenthesis */
19 int csstate; /* XXX remove */
20 #define ls_scsparen ls_info.u_scsparen
21 } u_scsparen;
22
23 /* $((...)) */
24 struct sasparen_info {
25 int nparen; /* count open parenthesis */
26 int start; /* marks start of $(( in output str */
27 #define ls_sasparen ls_info.u_sasparen
28 } u_sasparen;
29
30 /* ((...)) */
31 struct sletparen_info {
32 int nparen; /* count open parenthesis */
33 #define ls_sletparen ls_info.u_sletparen
34 } u_sletparen;
35
36 /* `...` */
37 struct sbquote_info {
38 int indquotes; /* true if in double quotes: "`...`" */
39 #define ls_sbquote ls_info.u_sbquote
40 } u_sbquote;
41
42 Lex_state *base; /* used to point to next state block */
43 } ls_info;
44 };
45
46 typedef struct State_info State_info;
47 struct State_info {
48 Lex_state *base;
49 Lex_state *end;
50 };
51
52
53 static void readhere ARGS((struct ioword *iop));
54 static int getsc__ ARGS((void));
55 static void getsc_line ARGS((Source *s));
56 static int getsc_bn ARGS((void));
57 static char *get_brace_var ARGS((XString *wsp, char *wp));
58 static int arraysub ARGS((char **strp));
59 static const char *ungetsc ARGS((int c));
60 static void gethere ARGS((void));
61 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
62 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
63
64 static int backslash_skip;
65 static int ignore_backslash_newline;
66
67 /* optimized getsc_bn() */
68 #define getsc() (*source->str != '\0' && *source->str != '\\' \
69 && !backslash_skip ? *source->str++ : getsc_bn())
70 /* optimized getsc__() */
71 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__())
72
73 #define STATE_BSIZE 32
74
75 #define PUSH_STATE(s) do { \
76 if (++statep == state_info.end) \
77 statep = push_state_(&state_info, statep); \
78 state = statep->ls_state = (s); \
79 } while (0)
80
81 #define POP_STATE() do { \
82 if (--statep == state_info.base) \
83 statep = pop_state_(&state_info, statep); \
84 state = statep->ls_state; \
85 } while (0)
86
87
88
89 /*
90 * Lexical analyzer
91 *
92 * tokens are not regular expressions, they are LL(1).
93 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
94 * hence the state stack.
95 */
96
97 int
yylex(cf)98 yylex(cf)
99 int cf;
100 {
101 Lex_state states[STATE_BSIZE], *statep;
102 State_info state_info;
103 register int c, state;
104 XString ws; /* expandable output word */
105 register char *wp; /* output word pointer */
106 char *sp, *dp;
107 int c2;
108
109
110 Again:
111 states[0].ls_state = -1;
112 states[0].ls_info.base = (Lex_state *) 0;
113 statep = &states[1];
114 state_info.base = states;
115 state_info.end = &states[STATE_BSIZE];
116
117 Xinit(ws, wp, 64, ATEMP);
118
119 backslash_skip = 0;
120 ignore_backslash_newline = 0;
121
122 if (cf&ONEWORD)
123 state = SWORD;
124 #ifdef KSH
125 else if (cf&LETEXPR) {
126 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */
127 state = SLETPAREN;
128 statep->ls_sletparen.nparen = 0;
129 }
130 #endif /* KSH */
131 else { /* normal lexing */
132 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
133 while ((c = getsc()) == ' ' || c == '\t')
134 ;
135 if (c == '#') {
136 ignore_backslash_newline++;
137 while ((c = getsc()) != '\0' && c != '\n')
138 ;
139 ignore_backslash_newline--;
140 }
141 ungetsc(c);
142 }
143 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */
144 source->flags &= ~SF_ALIAS;
145 /* In POSIX mode, a trailing space only counts if we are
146 * parsing a simple command
147 */
148 if (!Flag(FPOSIX) || (cf & CMDWORD))
149 cf |= ALIAS;
150 }
151
152 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
153 statep->ls_state = state;
154
155 /* collect non-special or quoted characters to form word */
156 while (!((c = getsc()) == 0
157 || ((state == SBASE || state == SHEREDELIM)
158 && ctype(c, C_LEX1))))
159 {
160 Xcheck(ws, wp);
161 switch (state) {
162 case SBASE:
163 if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
164 *wp = EOS; /* temporary */
165 if (is_wdvarname(Xstring(ws, wp), FALSE))
166 {
167 char *p, *tmp;
168
169 if (arraysub(&tmp)) {
170 *wp++ = CHAR;
171 *wp++ = c;
172 for (p = tmp; *p; ) {
173 Xcheck(ws, wp);
174 *wp++ = CHAR;
175 *wp++ = *p++;
176 }
177 afree(tmp, ATEMP);
178 break;
179 } else {
180 Source *s;
181
182 s = pushs(SREREAD,
183 source->areap);
184 s->start = s->str
185 = s->u.freeme = tmp;
186 s->next = source;
187 source = s;
188 }
189 }
190 *wp++ = CHAR;
191 *wp++ = c;
192 break;
193 }
194 /* fall through.. */
195 Sbase1: /* includes *(...|...) pattern (*+?@!) */
196 #ifdef KSH
197 if (c == '*' || c == '@' || c == '+' || c == '?'
198 || c == '!')
199 {
200 c2 = getsc();
201 if (c2 == '(' /*)*/ ) {
202 *wp++ = OPAT;
203 *wp++ = c;
204 PUSH_STATE(SPATTERN);
205 break;
206 }
207 ungetsc(c2);
208 }
209 #endif /* KSH */
210 /* fall through.. */
211 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
212 switch (c) {
213 case '\\':
214 c = getsc();
215 #ifdef OS2
216 if (isalnum(c)) {
217 *wp++ = CHAR, *wp++ = '\\';
218 *wp++ = CHAR, *wp++ = c;
219 } else
220 #endif
221 if (c) /* trailing \ is lost */
222 *wp++ = QCHAR, *wp++ = c;
223 break;
224 case '\'':
225 *wp++ = OQUOTE;
226 ignore_backslash_newline++;
227 PUSH_STATE(SSQUOTE);
228 break;
229 case '"':
230 *wp++ = OQUOTE;
231 PUSH_STATE(SDQUOTE);
232 break;
233 default:
234 goto Subst;
235 }
236 break;
237
238 Subst:
239 switch (c) {
240 case '\\':
241 c = getsc();
242 switch (c) {
243 case '"': case '\\':
244 case '$': case '`':
245 *wp++ = QCHAR, *wp++ = c;
246 break;
247 default:
248 Xcheck(ws, wp);
249 if (c) { /* trailing \ is lost */
250 *wp++ = CHAR, *wp++ = '\\';
251 *wp++ = CHAR, *wp++ = c;
252 }
253 break;
254 }
255 break;
256 case '$':
257 c = getsc();
258 if (c == '(') /*)*/ {
259 c = getsc();
260 if (c == '(') /*)*/ {
261 PUSH_STATE(SASPAREN);
262 statep->ls_sasparen.nparen = 2;
263 statep->ls_sasparen.start =
264 Xsavepos(ws, wp);
265 *wp++ = EXPRSUB;
266 } else {
267 ungetsc(c);
268 PUSH_STATE(SCSPAREN);
269 statep->ls_scsparen.nparen = 1;
270 statep->ls_scsparen.csstate = 0;
271 *wp++ = COMSUB;
272 }
273 } else if (c == '{') /*}*/ {
274 *wp++ = OSUBST;
275 *wp++ = '{'; /*}*/
276 wp = get_brace_var(&ws, wp);
277 c = getsc();
278 /* allow :# and :% (ksh88 compat) */
279 if (c == ':') {
280 *wp++ = CHAR, *wp++ = c;
281 c = getsc();
282 }
283 /* If this is a trim operation,
284 * treat (,|,) specially in STBRACE.
285 */
286 if (c == '#' || c == '%') {
287 ungetsc(c);
288 PUSH_STATE(STBRACE);
289 } else {
290 ungetsc(c);
291 PUSH_STATE(SBRACE);
292 }
293 } else if (ctype(c, C_ALPHA)) {
294 *wp++ = OSUBST;
295 *wp++ = 'X';
296 do {
297 Xcheck(ws, wp);
298 *wp++ = c;
299 c = getsc();
300 } while (ctype(c, C_ALPHA|C_DIGIT));
301 *wp++ = '\0';
302 *wp++ = CSUBST;
303 *wp++ = 'X';
304 ungetsc(c);
305 } else if (ctype(c, C_DIGIT|C_VAR1)) {
306 Xcheck(ws, wp);
307 *wp++ = OSUBST;
308 *wp++ = 'X';
309 *wp++ = c;
310 *wp++ = '\0';
311 *wp++ = CSUBST;
312 *wp++ = 'X';
313 } else {
314 *wp++ = CHAR, *wp++ = '$';
315 ungetsc(c);
316 }
317 break;
318 case '`':
319 PUSH_STATE(SBQUOTE);
320 *wp++ = COMSUB;
321 /* Need to know if we are inside double quotes
322 * since sh/at&t-ksh translate the \" to " in
323 * "`..\"..`".
324 * This is not done in posix mode (section
325 * 3.2.3, Double Quotes: "The backquote shall
326 * retain its special meaning introducing the
327 * other form of command substitution (see
328 * 3.6.3). The portion of the quoted string
329 * from the initial backquote and the
330 * characters up to the next backquote that
331 * is not preceded by a backslash (having
332 * escape characters removed) defines that
333 * command whose output replaces `...` when
334 * the word is expanded."
335 * Section 3.6.3, Command Substitution:
336 * "Within the backquoted style of command
337 * substitution, backslash shall retain its
338 * literal meaning, except when followed by
339 * $ ` \.").
340 */
341 statep->ls_sbquote.indquotes = 0;
342 if (!Flag(FPOSIX)) {
343 Lex_state *s = statep;
344 Lex_state *base = state_info.base;
345 while (1) {
346 for (; s != base; s--) {
347 if (s->ls_state == SDQUOTE) {
348 statep->ls_sbquote.indquotes = 1;
349 break;
350 }
351 }
352 if (s != base)
353 break;
354 if (!(s = s->ls_info.base))
355 break;
356 base = s-- - STATE_BSIZE;
357 }
358 }
359 break;
360 default:
361 *wp++ = CHAR, *wp++ = c;
362 }
363 break;
364
365 case SSQUOTE:
366 if (c == '\'') {
367 POP_STATE();
368 *wp++ = CQUOTE;
369 ignore_backslash_newline--;
370 } else
371 *wp++ = QCHAR, *wp++ = c;
372 break;
373
374 case SDQUOTE:
375 if (c == '"') {
376 POP_STATE();
377 *wp++ = CQUOTE;
378 } else
379 goto Subst;
380 break;
381
382 case SCSPAREN: /* $( .. ) */
383 /* todo: deal with $(...) quoting properly
384 * kludge to partly fake quoting inside $(..): doesn't
385 * really work because nested $(..) or ${..} inside
386 * double quotes aren't dealt with.
387 */
388 switch (statep->ls_scsparen.csstate) {
389 case 0: /* normal */
390 switch (c) {
391 case '(':
392 statep->ls_scsparen.nparen++;
393 break;
394 case ')':
395 statep->ls_scsparen.nparen--;
396 break;
397 case '\\':
398 statep->ls_scsparen.csstate = 1;
399 break;
400 case '"':
401 statep->ls_scsparen.csstate = 2;
402 break;
403 case '\'':
404 statep->ls_scsparen.csstate = 4;
405 ignore_backslash_newline++;
406 break;
407 }
408 break;
409
410 case 1: /* backslash in normal mode */
411 case 3: /* backslash in double quotes */
412 --statep->ls_scsparen.csstate;
413 break;
414
415 case 2: /* double quotes */
416 if (c == '"')
417 statep->ls_scsparen.csstate = 0;
418 else if (c == '\\')
419 statep->ls_scsparen.csstate = 3;
420 break;
421
422 case 4: /* single quotes */
423 if (c == '\'') {
424 statep->ls_scsparen.csstate = 0;
425 ignore_backslash_newline--;
426 }
427 break;
428 }
429 if (statep->ls_scsparen.nparen == 0) {
430 POP_STATE();
431 *wp++ = 0; /* end of COMSUB */
432 } else
433 *wp++ = c;
434 break;
435
436 case SASPAREN: /* $(( .. )) */
437 /* todo: deal with $((...); (...)) properly */
438 /* XXX should nest using existing state machine
439 * (embed "..", $(...), etc.) */
440 if (c == '(')
441 statep->ls_sasparen.nparen++;
442 else if (c == ')') {
443 statep->ls_sasparen.nparen--;
444 if (statep->ls_sasparen.nparen == 1) {
445 /*(*/
446 if ((c2 = getsc()) == ')') {
447 POP_STATE();
448 *wp++ = 0; /* end of EXPRSUB */
449 break;
450 } else {
451 char *s;
452
453 ungetsc(c2);
454 /* mismatched parenthesis -
455 * assume we were really
456 * parsing a $(..) expression
457 */
458 s = Xrestpos(ws, wp,
459 statep->ls_sasparen.start);
460 memmove(s + 1, s, wp - s);
461 *s++ = COMSUB;
462 *s = '('; /*)*/
463 wp++;
464 statep->ls_scsparen.nparen = 1;
465 statep->ls_scsparen.csstate = 0;
466 state = statep->ls_state
467 = SCSPAREN;
468
469 }
470 }
471 }
472 *wp++ = c;
473 break;
474
475 case SBRACE:
476 /*{*/
477 if (c == '}') {
478 POP_STATE();
479 *wp++ = CSUBST;
480 *wp++ = /*{*/ '}';
481 } else
482 goto Sbase1;
483 break;
484
485 case STBRACE:
486 /* Same as SBRACE, except (,|,) treated specially */
487 /*{*/
488 if (c == '}') {
489 POP_STATE();
490 *wp++ = CSUBST;
491 *wp++ = /*{*/ '}';
492 } else if (c == '|') {
493 *wp++ = SPAT;
494 } else if (c == '(') {
495 *wp++ = OPAT;
496 *wp++ = ' '; /* simile for @ */
497 PUSH_STATE(SPATTERN);
498 } else
499 goto Sbase1;
500 break;
501
502 case SBQUOTE:
503 if (c == '`') {
504 *wp++ = 0;
505 POP_STATE();
506 } else if (c == '\\') {
507 switch (c = getsc()) {
508 case '\\':
509 case '$': case '`':
510 *wp++ = c;
511 break;
512 case '"':
513 if (statep->ls_sbquote.indquotes) {
514 *wp++ = c;
515 break;
516 }
517 /* fall through.. */
518 default:
519 if (c) { /* trailing \ is lost */
520 *wp++ = '\\';
521 *wp++ = c;
522 }
523 break;
524 }
525 } else
526 *wp++ = c;
527 break;
528
529 case SWORD: /* ONEWORD */
530 goto Subst;
531
532 #ifdef KSH
533 case SLETPAREN: /* LETEXPR: (( ... )) */
534 /*(*/
535 if (c == ')') {
536 if (statep->ls_sletparen.nparen > 0)
537 --statep->ls_sletparen.nparen;
538 /*(*/
539 else if ((c2 = getsc()) == ')') {
540 c = 0;
541 *wp++ = CQUOTE;
542 goto Done;
543 } else
544 ungetsc(c2);
545 } else if (c == '(')
546 /* parenthesis inside quotes and backslashes
547 * are lost, but at&t ksh doesn't count them
548 * either
549 */
550 ++statep->ls_sletparen.nparen;
551 goto Sbase2;
552 #endif /* KSH */
553
554 case SHEREDELIM: /* <<,<<- delimiter */
555 /* XXX chuck this state (and the next) - use
556 * the existing states ($ and \`..` should be
557 * stripped of their specialness after the
558 * fact).
559 */
560 /* here delimiters need a special case since
561 * $ and `..` are not to be treated specially
562 */
563 if (c == '\\') {
564 c = getsc();
565 if (c) { /* trailing \ is lost */
566 *wp++ = QCHAR;
567 *wp++ = c;
568 }
569 } else if (c == '\'') {
570 PUSH_STATE(SSQUOTE);
571 *wp++ = OQUOTE;
572 ignore_backslash_newline++;
573 } else if (c == '"') {
574 state = statep->ls_state = SHEREDQUOTE;
575 *wp++ = OQUOTE;
576 } else {
577 *wp++ = CHAR;
578 *wp++ = c;
579 }
580 break;
581
582 case SHEREDQUOTE: /* " in <<,<<- delimiter */
583 if (c == '"') {
584 *wp++ = CQUOTE;
585 state = statep->ls_state = SHEREDELIM;
586 } else {
587 if (c == '\\') {
588 switch (c = getsc()) {
589 case '\\': case '"':
590 case '$': case '`':
591 break;
592 default:
593 if (c) { /* trailing \ lost */
594 *wp++ = CHAR;
595 *wp++ = '\\';
596 }
597 break;
598 }
599 }
600 *wp++ = CHAR;
601 *wp++ = c;
602 }
603 break;
604
605 case SPATTERN: /* in *(...|...) pattern (*+?@!) */
606 if ( /*(*/ c == ')') {
607 *wp++ = CPAT;
608 POP_STATE();
609 } else if (c == '|') {
610 *wp++ = SPAT;
611 } else if (c == '(') {
612 *wp++ = OPAT;
613 *wp++ = ' '; /* simile for @ */
614 PUSH_STATE(SPATTERN);
615 } else
616 goto Sbase1;
617 break;
618 }
619 }
620 Done:
621 Xcheck(ws, wp);
622 if (statep != &states[1])
623 /* XXX figure out what is missing */
624 yyerror("no closing quote\n");
625
626 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
627 if (state == SHEREDELIM)
628 state = SBASE;
629
630 dp = Xstring(ws, wp);
631 if ((c == '<' || c == '>') && state == SBASE
632 && ((c2 = Xlength(ws, wp)) == 0
633 || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
634 {
635 struct ioword *iop =
636 (struct ioword *) alloc(sizeof(*iop), ATEMP);
637
638 if (c2 == 2)
639 iop->unit = dp[1] - '0';
640 else
641 iop->unit = c == '>'; /* 0 for <, 1 for > */
642
643 c2 = getsc();
644 /* <<, >>, <> are ok, >< is not */
645 if (c == c2 || (c == '<' && c2 == '>')) {
646 iop->flag = c == c2 ?
647 (c == '>' ? IOCAT : IOHERE) : IORDWR;
648 if (iop->flag == IOHERE)
649 if ((c2 = getsc()) == '-')
650 iop->flag |= IOSKIP;
651 else
652 ungetsc(c2);
653 } else if (c2 == '&')
654 iop->flag = IODUP | (c == '<' ? IORDUP : 0);
655 else {
656 iop->flag = c == '>' ? IOWRITE : IOREAD;
657 if (c == '>' && c2 == '|')
658 iop->flag |= IOCLOB;
659 else
660 ungetsc(c2);
661 }
662
663 iop->name = (char *) 0;
664 iop->delim = (char *) 0;
665 iop->heredoc = (char *) 0;
666 Xfree(ws, wp); /* free word */
667 yylval.iop = iop;
668 return REDIR;
669 }
670
671 if (wp == dp && state == SBASE) {
672 Xfree(ws, wp); /* free word */
673 /* no word, process LEX1 character */
674 switch (c) {
675 default:
676 return c;
677
678 case '|':
679 case '&':
680 case ';':
681 if ((c2 = getsc()) == c)
682 c = (c == ';') ? BREAK :
683 (c == '|') ? LOGOR :
684 (c == '&') ? LOGAND :
685 YYERRCODE;
686 #ifdef KSH
687 else if (c == '|' && c2 == '&')
688 c = COPROC;
689 #endif /* KSH */
690 else
691 ungetsc(c2);
692 return c;
693
694 case '\n':
695 gethere();
696 if (cf & CONTIN)
697 goto Again;
698 return c;
699
700 case '(': /*)*/
701 #ifdef KSH
702 if ((c2 = getsc()) == '(') /*)*/
703 /* XXX need to handle ((...); (...)) */
704 c = MDPAREN;
705 else
706 ungetsc(c2);
707 #endif /* KSH */
708 return c;
709 /*(*/
710 case ')':
711 return c;
712 }
713 }
714
715 *wp++ = EOS; /* terminate word */
716 yylval.cp = Xclose(ws, wp);
717 if (state == SWORD
718 #ifdef KSH
719 || state == SLETPAREN
720 #endif /* KSH */
721 ) /* ONEWORD? */
722 return LWORD;
723 ungetsc(c); /* unget terminator */
724
725 /* copy word to unprefixed string ident */
726 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
727 *dp++ = *sp++;
728 /* Make sure the ident array stays '\0' paded */
729 memset(dp, 0, (ident+IDENT) - dp + 1);
730 if (c != EOS)
731 *ident = '\0'; /* word is not unquoted */
732
733 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
734 struct tbl *p;
735 int h = hash(ident);
736
737 /* { */
738 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
739 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
740 {
741 afree(yylval.cp, ATEMP);
742 return p->val.i;
743 }
744 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
745 && (p->flag & ISSET))
746 {
747 register Source *s;
748
749 for (s = source; s->type == SALIAS; s = s->next)
750 if (s->u.tblp == p)
751 return LWORD;
752 /* push alias expansion */
753 s = pushs(SALIAS, source->areap);
754 s->start = s->str = p->val.s;
755 s->u.tblp = p;
756 s->next = source;
757 source = s;
758 afree(yylval.cp, ATEMP);
759 goto Again;
760 }
761 }
762
763 return LWORD;
764 }
765
766 static void
gethere()767 gethere()
768 {
769 register struct ioword **p;
770
771 for (p = heres; p < herep; p++)
772 readhere(*p);
773 herep = heres;
774 }
775
776 /*
777 * read "<<word" text into temp file
778 */
779
780 static void
readhere(iop)781 readhere(iop)
782 struct ioword *iop;
783 {
784 register int c;
785 char *volatile eof;
786 char *eofp;
787 int skiptabs;
788 XString xs;
789 char *xp;
790 int xpos;
791
792 eof = evalstr(iop->delim, 0);
793
794 if (!(iop->flag & IOEVAL))
795 ignore_backslash_newline++;
796
797 Xinit(xs, xp, 256, ATEMP);
798
799 for (;;) {
800 eofp = eof;
801 skiptabs = iop->flag & IOSKIP;
802 xpos = Xsavepos(xs, xp);
803 while ((c = getsc()) != 0) {
804 if (skiptabs) {
805 if (c == '\t')
806 continue;
807 skiptabs = 0;
808 }
809 if (c != *eofp)
810 break;
811 Xcheck(xs, xp);
812 Xput(xs, xp, c);
813 eofp++;
814 }
815 /* Allow EOF here so commands with out trailing newlines
816 * will work (eg, ksh -c '...', $(...), etc).
817 */
818 if (*eofp == '\0' && (c == 0 || c == '\n')) {
819 xp = Xrestpos(xs, xp, xpos);
820 break;
821 }
822 ungetsc(c);
823 while ((c = getsc()) != '\n') {
824 if (c == 0)
825 yyerror("here document `%s' unclosed\n", eof);
826 Xcheck(xs, xp);
827 Xput(xs, xp, c);
828 }
829 Xcheck(xs, xp);
830 Xput(xs, xp, c);
831 }
832 Xput(xs, xp, '\0');
833 iop->heredoc = Xclose(xs, xp);
834
835 if (!(iop->flag & IOEVAL))
836 ignore_backslash_newline--;
837 }
838
839 void
840 #ifdef HAVE_PROTOTYPES
yyerror(const char * fmt,...)841 yyerror(const char *fmt, ...)
842 #else
843 yyerror(fmt, va_alist)
844 const char *fmt;
845 va_dcl
846 #endif
847 {
848 va_list va;
849
850 /* pop aliases and re-reads */
851 while (source->type == SALIAS || source->type == SREREAD)
852 source = source->next;
853 source->str = null; /* zap pending input */
854
855 error_prefix(TRUE);
856 SH_VA_START(va, fmt);
857 shf_vfprintf(shl_out, fmt, va);
858 va_end(va);
859 errorf(null);
860 }
861
862 /*
863 * input for yylex with alias expansion
864 */
865
866 Source *
pushs(type,areap)867 pushs(type, areap)
868 int type;
869 Area *areap;
870 {
871 register Source *s;
872
873 s = (Source *) alloc(sizeof(Source), areap);
874 s->type = type;
875 s->str = null;
876 s->start = NULL;
877 s->line = 0;
878 s->errline = 0;
879 s->file = NULL;
880 s->flags = 0;
881 s->next = NULL;
882 s->areap = areap;
883 if (type == SFILE || type == SSTDIN) {
884 char *dummy;
885 Xinit(s->xs, dummy, 256, s->areap);
886 } else
887 memset(&s->xs, 0, sizeof(s->xs));
888 return s;
889 }
890
891 static int
getsc__()892 getsc__()
893 {
894 register Source *s = source;
895 register int c;
896
897 while ((c = *s->str++) == 0) {
898 s->str = NULL; /* return 0 for EOF by default */
899 switch (s->type) {
900 case SEOF:
901 s->str = null;
902 return 0;
903
904 case SSTDIN:
905 case SFILE:
906 getsc_line(s);
907 break;
908
909 case SWSTR:
910 break;
911
912 case SSTRING:
913 break;
914
915 case SWORDS:
916 s->start = s->str = *s->u.strv++;
917 s->type = SWORDSEP;
918 break;
919
920 case SWORDSEP:
921 if (*s->u.strv == NULL) {
922 s->start = s->str = newline;
923 s->type = SEOF;
924 } else {
925 s->start = s->str = space;
926 s->type = SWORDS;
927 }
928 break;
929
930 case SALIAS:
931 if (s->flags & SF_ALIASEND) {
932 /* pass on an unused SF_ALIAS flag */
933 source = s->next;
934 source->flags |= s->flags & SF_ALIAS;
935 s = source;
936 } else if (*s->u.tblp->val.s
937 && isspace(strchr(s->u.tblp->val.s, 0)[-1]))
938 {
939 source = s = s->next; /* pop source stack */
940 /* Note that this alias ended with a space,
941 * enabling alias expansion on the following
942 * word.
943 */
944 s->flags |= SF_ALIAS;
945 } else {
946 /* At this point, we need to keep the current
947 * alias in the source list so recursive
948 * aliases can be detected and we also need
949 * to return the next character. Do this
950 * by temporarily popping the alias to get
951 * the next character and then put it back
952 * in the source list with the SF_ALIASEND
953 * flag set.
954 */
955 source = s->next; /* pop source stack */
956 source->flags |= s->flags & SF_ALIAS;
957 c = getsc__();
958 if (c) {
959 s->flags |= SF_ALIASEND;
960 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
961 s->start = s->str = s->ugbuf;
962 s->next = source;
963 source = s;
964 } else {
965 s = source;
966 /* avoid reading eof twice */
967 s->str = NULL;
968 break;
969 }
970 }
971 continue;
972
973 case SREREAD:
974 if (s->start != s->ugbuf) /* yuck */
975 afree(s->u.freeme, ATEMP);
976 source = s = s->next;
977 continue;
978 }
979 if (s->str == NULL) {
980 s->type = SEOF;
981 s->start = s->str = null;
982 return '\0';
983 }
984 if (s->flags & SF_ECHO) {
985 shf_puts(s->str, shl_out);
986 shf_flush(shl_out);
987 }
988 }
989 return c;
990 }
991
992 static void
getsc_line(s)993 getsc_line(s)
994 Source *s;
995 {
996 char *xp = Xstring(s->xs, xp);
997 int interactive = Flag(FTALKING) && s->type == SSTDIN;
998 int have_tty = interactive && (s->flags & SF_TTY);
999
1000 /* Done here to ensure nothing odd happens when a timeout occurs */
1001 XcheckN(s->xs, xp, LINE);
1002 *xp = '\0';
1003 s->start = s->str = xp;
1004
1005 #ifdef KSH
1006 if (have_tty && ksh_tmout) {
1007 ksh_tmout_state = TMOUT_READING;
1008 alarm(ksh_tmout);
1009 }
1010 #endif /* KSH */
1011 #ifdef EDIT
1012 if (have_tty && (0
1013 # ifdef VI
1014 || Flag(FVI)
1015 # endif /* VI */
1016 # ifdef EMACS
1017 || Flag(FEMACS) || Flag(FGMACS)
1018 # endif /* EMACS */
1019 ))
1020 {
1021 int nread;
1022
1023 nread = x_read(xp, LINE);
1024 if (nread < 0) /* read error */
1025 nread = 0;
1026 xp[nread] = '\0';
1027 xp += nread;
1028 }
1029 else
1030 #endif /* EDIT */
1031 {
1032 if (interactive) {
1033 pprompt(prompt, 0);
1034 } else
1035 s->line++;
1036
1037 while (1) {
1038 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1039
1040 if (!p && shf_error(s->u.shf)
1041 && shf_errno(s->u.shf) == EINTR)
1042 {
1043 shf_clearerr(s->u.shf);
1044 if (trap)
1045 runtraps(0);
1046 continue;
1047 }
1048 if (!p || (xp = p, xp[-1] == '\n'))
1049 break;
1050 /* double buffer size */
1051 xp++; /* move past null so doubling works... */
1052 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1053 xp--; /* ...and move back again */
1054 }
1055 /* flush any unwanted input so other programs/builtins
1056 * can read it. Not very optimal, but less error prone
1057 * than flushing else where, dealing with redirections,
1058 * etc..
1059 * todo: reduce size of shf buffer (~128?) if SSTDIN
1060 */
1061 if (s->type == SSTDIN)
1062 shf_flush(s->u.shf);
1063 }
1064 /* XXX: temporary kludge to restore source after a
1065 * trap may have been executed.
1066 */
1067 source = s;
1068 #ifdef KSH
1069 if (have_tty && ksh_tmout)
1070 {
1071 ksh_tmout_state = TMOUT_EXECUTING;
1072 alarm(0);
1073 }
1074 #endif /* KSH */
1075 s->start = s->str = Xstring(s->xs, xp);
1076 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1077 /* Note: if input is all nulls, this is not eof */
1078 if (Xlength(s->xs, xp) == 0) { /* EOF */
1079 if (s->type == SFILE)
1080 shf_fdclose(s->u.shf);
1081 s->str = NULL;
1082 } else if (interactive) {
1083 #ifdef HISTORY
1084 char *p = Xstring(s->xs, xp);
1085 if (cur_prompt == PS1)
1086 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1087 p++;
1088 if (*p) {
1089 # ifdef EASY_HISTORY
1090 if (cur_prompt == PS2)
1091 histappend(Xstring(s->xs, xp), 1);
1092 else
1093 # endif /* EASY_HISTORY */
1094 {
1095 s->line++;
1096 histsave(s->line, s->str, 1);
1097 }
1098 }
1099 #endif /* HISTORY */
1100 }
1101 if (interactive)
1102 set_prompt(PS2, (Source *) 0);
1103 }
1104
1105 void
set_prompt(to,s)1106 set_prompt(to, s)
1107 int to;
1108 Source *s;
1109 {
1110 cur_prompt = to;
1111
1112 switch (to) {
1113 case PS1: /* command */
1114 #ifdef KSH
1115 /* Substitute ! and !! here, before substitutions are done
1116 * so ! in expanded variables are not expanded.
1117 * NOTE: this is not what at&t ksh does (it does it after
1118 * substitutions, POSIX doesn't say which is to be done.
1119 */
1120 {
1121 struct shf *shf;
1122 char *ps1;
1123 Area *saved_atemp;
1124
1125 ps1 = str_val(global("PS1"));
1126 shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1127 SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1128 while (*ps1) {
1129 if (*ps1 != '!' || *++ps1 == '!')
1130 shf_putchar(*ps1++, shf);
1131 else
1132 shf_fprintf(shf, "%d",
1133 s ? s->line + 1 : 0);
1134 }
1135 ps1 = shf_sclose(shf);
1136 saved_atemp = ATEMP;
1137 newenv(E_ERRH);
1138 if (ksh_sigsetjmp(e->jbuf, 0)) {
1139 prompt = safe_prompt;
1140 /* Don't print an error - assume it has already
1141 * been printed. Reason is we may have forked
1142 * to run a command and the child may be
1143 * unwinding its stack through this code as it
1144 * exits.
1145 */
1146 } else
1147 prompt = str_save(substitute(ps1, 0),
1148 saved_atemp);
1149 quitenv();
1150 }
1151 #else /* KSH */
1152 prompt = str_val(global("PS1"));
1153 #endif /* KSH */
1154 break;
1155
1156 case PS2: /* command continuation */
1157 prompt = str_val(global("PS2"));
1158 break;
1159 }
1160 }
1161
1162 /* See also related routine, promptlen() in edit.c */
1163 void
pprompt(cp,ntruncate)1164 pprompt(cp, ntruncate)
1165 const char *cp;
1166 int ntruncate;
1167 {
1168 #if 0
1169 char nbuf[32];
1170 int c;
1171
1172 while (*cp != 0) {
1173 if (*cp != '!')
1174 c = *cp++;
1175 else if (*++cp == '!')
1176 c = *cp++;
1177 else {
1178 int len;
1179 char *p;
1180
1181 shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1182 source->line + 1);
1183 len = strlen(nbuf);
1184 if (ntruncate) {
1185 if (ntruncate >= len) {
1186 ntruncate -= len;
1187 continue;
1188 }
1189 p += ntruncate;
1190 len -= ntruncate;
1191 ntruncate = 0;
1192 }
1193 shf_write(p, len, shl_out);
1194 continue;
1195 }
1196 if (ntruncate)
1197 --ntruncate;
1198 else
1199 shf_putc(c, shl_out);
1200 }
1201 #endif /* 0 */
1202 shf_puts(cp + ntruncate, shl_out);
1203 shf_flush(shl_out);
1204 }
1205
1206 /* Read the variable part of a ${...} expression (ie, up to but not including
1207 * the :[-+?=#%] or close-brace.
1208 */
1209 static char *
get_brace_var(wsp,wp)1210 get_brace_var(wsp, wp)
1211 XString *wsp;
1212 char *wp;
1213 {
1214 enum parse_state {
1215 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1216 PS_NUMBER, PS_VAR1, PS_END
1217 }
1218 state;
1219 char c;
1220
1221 state = PS_INITIAL;
1222 while (1) {
1223 c = getsc();
1224 /* State machine to figure out where the variable part ends. */
1225 switch (state) {
1226 case PS_INITIAL:
1227 if (c == '#') {
1228 state = PS_SAW_HASH;
1229 break;
1230 }
1231 /* fall through.. */
1232 case PS_SAW_HASH:
1233 if (letter(c))
1234 state = PS_IDENT;
1235 else if (digit(c))
1236 state = PS_NUMBER;
1237 else if (ctype(c, C_VAR1))
1238 state = PS_VAR1;
1239 else
1240 state = PS_END;
1241 break;
1242 case PS_IDENT:
1243 if (!letnum(c)) {
1244 state = PS_END;
1245 if (c == '[') {
1246 char *tmp, *p;
1247
1248 if (!arraysub(&tmp))
1249 yyerror("missing ]\n");
1250 *wp++ = c;
1251 for (p = tmp; *p; ) {
1252 Xcheck(*wsp, wp);
1253 *wp++ = *p++;
1254 }
1255 afree(tmp, ATEMP);
1256 c = getsc(); /* the ] */
1257 }
1258 }
1259 break;
1260 case PS_NUMBER:
1261 if (!digit(c))
1262 state = PS_END;
1263 break;
1264 case PS_VAR1:
1265 state = PS_END;
1266 break;
1267 case PS_END: /* keep gcc happy */
1268 break;
1269 }
1270 if (state == PS_END) {
1271 *wp++ = '\0'; /* end of variable part */
1272 ungetsc(c);
1273 break;
1274 }
1275 Xcheck(*wsp, wp);
1276 *wp++ = c;
1277 }
1278 return wp;
1279 }
1280
1281 /*
1282 * Save an array subscript - returns true if matching bracket found, false
1283 * if eof or newline was found.
1284 * (Returned string double null terminated)
1285 */
1286 static int
arraysub(strp)1287 arraysub(strp)
1288 char **strp;
1289 {
1290 XString ws;
1291 char *wp;
1292 char c;
1293 int depth = 1; /* we are just past the initial [ */
1294
1295 Xinit(ws, wp, 32, ATEMP);
1296
1297 do {
1298 c = getsc();
1299 Xcheck(ws, wp);
1300 *wp++ = c;
1301 if (c == '[')
1302 depth++;
1303 else if (c == ']')
1304 depth--;
1305 } while (depth > 0 && c && c != '\n');
1306
1307 *wp++ = '\0';
1308 *strp = Xclose(ws, wp);
1309
1310 return depth == 0 ? 1 : 0;
1311 }
1312
1313 /* Unget a char: handles case when we are already at the start of the buffer */
1314 static const char *
ungetsc(c)1315 ungetsc(c)
1316 int c;
1317 {
1318 if (backslash_skip)
1319 backslash_skip--;
1320 /* Don't unget eof... */
1321 if (source->str == null && c == '\0')
1322 return source->str;
1323 if (source->str > source->start)
1324 source->str--;
1325 else {
1326 Source *s;
1327
1328 s = pushs(SREREAD, source->areap);
1329 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330 s->start = s->str = s->ugbuf;
1331 s->next = source;
1332 source = s;
1333 }
1334 return source->str;
1335 }
1336
1337
1338 /* Called to get a char that isn't a \newline sequence. */
1339 static int
getsc_bn(void)1340 getsc_bn ARGS((void))
1341 {
1342 int c, c2;
1343
1344 if (ignore_backslash_newline)
1345 return getsc_();
1346
1347 if (backslash_skip == 1) {
1348 backslash_skip = 2;
1349 return getsc_();
1350 }
1351
1352 backslash_skip = 0;
1353
1354 while (1) {
1355 c = getsc_();
1356 if (c == '\\') {
1357 if ((c2 = getsc_()) == '\n')
1358 /* ignore the \newline; get the next char... */
1359 continue;
1360 ungetsc(c2);
1361 backslash_skip = 1;
1362 }
1363 return c;
1364 }
1365 }
1366
1367 static Lex_state *
push_state_(si,old_end)1368 push_state_(si, old_end)
1369 State_info *si;
1370 Lex_state *old_end;
1371 {
1372 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1373
1374 new[0].ls_info.base = old_end;
1375 si->base = &new[0];
1376 si->end = &new[STATE_BSIZE];
1377 return &new[1];
1378 }
1379
1380 static Lex_state *
pop_state_(si,old_end)1381 pop_state_(si, old_end)
1382 State_info *si;
1383 Lex_state *old_end;
1384 {
1385 Lex_state *old_base = si->base;
1386
1387 si->base = old_end->ls_info.base - STATE_BSIZE;
1388 si->end = old_end->ls_info.base;
1389
1390 afree(old_base, ATEMP);
1391
1392 return si->base + STATE_BSIZE - 1;;
1393 }
1394