1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "fns.h"
5
6 Rune getnext(void);
7
8 int
wordchr(Rune c)9 wordchr(Rune c) /* is c in the alphabet of words (non-delimiters)? */
10 {
11 return c != EOF &&
12 (c >= Runeself || strchr("\n \t#;&|^$=`'{}()<>", c) == nil);
13 }
14
15 /*
16 * is c in the alphabet of identifiers? as in the c compiler, treat
17 * non-ascii as alphabetic.
18 */
19 int
idchr(Rune c)20 idchr(Rune c)
21 {
22 /*
23 * Formerly:
24 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
25 * || c=='_' || c=='*';
26 */
27 return c != EOF && (c >= Runeself ||
28 c > ' ' &&
29 strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c) == nil);
30 }
31
32 Rune future = EOF;
33 int doprompt = 1;
34 int inquote; /* are we processing a quoted word ('...')? */
35 int incomm; /* are we ignoring input in a comment (#...\n)? */
36 /*
37 * Look ahead in the input stream
38 */
39
40 Rune
nextc(void)41 nextc(void)
42 {
43 if(future==EOF)
44 future = getnext();
45 return future;
46 }
47 /*
48 * Consume the lookahead character.
49 */
50
51 Rune
advance(void)52 advance(void)
53 {
54 Rune c = nextc();
55
56 lastc = future;
57 future = EOF;
58 return c;
59 }
60 /*
61 * read a character from the input stream
62 */
63
64 Rune
getnext(void)65 getnext(void)
66 {
67 Rune c;
68 char buf[UTFmax+1];
69 static Rune peekc = EOF;
70
71 if(peekc!=EOF){
72 c = peekc;
73 peekc = EOF;
74 return c;
75 }
76 if(runq->eof)
77 return EOF;
78 if(doprompt)
79 pprompt();
80 rutf(runq->cmdfd, buf, &c);
81 if(!inquote && c=='\\'){
82 rutf(runq->cmdfd, buf, &c);
83 if(c=='\n' && !incomm){ /* don't continue a comment */
84 doprompt = 1;
85 c=' ';
86 }
87 else{
88 peekc = c;
89 c='\\';
90 }
91 }
92 doprompt = doprompt || c=='\n' || c==EOF;
93 if(c==EOF)
94 runq->eof++;
95 else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
96 return c;
97 }
98
99 void
pprompt(void)100 pprompt(void)
101 {
102 var *prompt;
103 if(runq->iflag){
104 pstr(err, promptstr);
105 flush(err);
106 prompt = vlook("prompt");
107 if(prompt->val && prompt->val->next)
108 promptstr = prompt->val->next->word;
109 else
110 promptstr="\t";
111 }
112 runq->lineno++;
113 doprompt = 0;
114 }
115
116 void
skipwhite(void)117 skipwhite(void)
118 {
119 Rune c;
120
121 for(;;){
122 c = nextc();
123 /* Why did this used to be if(!inquote && c=='#') ?? */
124 if(c=='#'){
125 incomm = 1;
126 for(;;){
127 c = nextc();
128 if(c=='\n' || c==EOF) {
129 incomm = 0;
130 break;
131 }
132 advance();
133 }
134 }
135 if(c==' ' || c=='\t')
136 advance();
137 else return;
138 }
139 }
140
141 void
skipnl(void)142 skipnl(void)
143 {
144 Rune c, c0;
145
146 for(c0 = nextc(); ; c0 = c){
147 skipwhite();
148 c = nextc();
149 if(c != c0)
150 lastword = 0; /* change of whitespace or c is not ws */
151 if(c!='\n')
152 return;
153 lastword = 0; /* new line; continue */
154 advance();
155 }
156 }
157
158 int
nextis(Rune c)159 nextis(Rune c)
160 {
161 if(nextc()==c){
162 advance();
163 return 1;
164 }
165 return 0;
166 }
167
168 char*
addutf(char * p,Rune c)169 addutf(char *p, Rune c)
170 {
171 if(p==0)
172 return 0;
173 if(p >= &tok[NTOK-1-UTFmax*2]){
174 *p = 0;
175 yyerror("token buffer too short");
176 return 0;
177 }
178 p += runetochar(p, &c);
179 return p;
180 }
181
182 int lastdol; /* was the last token read '$' or '$#' or '"'? */
183 int lastword; /* was the last token read a word or compound word terminator? */
184
185 int
yylex(void)186 yylex(void)
187 {
188 Rune c, d = nextc();
189 char *w = tok;
190 struct tree *t;
191
192 yylval.tree = 0;
193 /*
194 * Embarrassing sneakiness: if the last token read was a quoted or
195 * unquoted WORD then we alter the meaning of what follows. If the
196 * next character is `(', we return SUB (a subscript paren) and
197 * consume the `('. Otherwise, if the next character is the first
198 * character of a simple or compound word, we insert a `^' before it.
199 */
200 if(lastword){
201 lastword = 0;
202 if(d=='('){
203 advance();
204 strcpy(tok, "( [SUB]");
205 return SUB;
206 }
207 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
208 strcpy(tok, "^");
209 return '^';
210 }
211 }
212 skipwhite();
213 switch(c = advance()){
214 case EOF:
215 lastdol = 0;
216 strcpy(tok, "EOF");
217 return EOF;
218 case '$':
219 lastdol = 1;
220 if(nextis('#')){
221 strcpy(tok, "$#");
222 return COUNT;
223 }
224 if(nextis('"')){
225 strcpy(tok, "$\"");
226 return '"';
227 }
228 strcpy(tok, "$");
229 return '$';
230 case '&':
231 lastdol = 0;
232 if(nextis('&')){
233 skipnl();
234 strcpy(tok, "&&");
235 return ANDAND;
236 }
237 strcpy(tok, "&");
238 return '&';
239 case '|':
240 lastdol = 0;
241 if(nextis(c)){
242 skipnl();
243 strcpy(tok, "||");
244 return OROR;
245 }
246 case '<':
247 case '>':
248 lastdol = 0;
249 /*
250 * funny redirection tokens:
251 * redir: arrow | arrow '[' fd ']'
252 * arrow: '<' | '<<' | '>' | '>>' | '|'
253 * fd: digit | digit '=' | digit '=' digit
254 * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
255 * some possibilities are nonsensical and get a message.
256 */
257 *w++=c;
258 t = newtree();
259 switch(c){
260 case '|':
261 t->type = PIPE;
262 t->fd0 = 1;
263 t->fd1 = 0;
264 break;
265 case '>':
266 t->type = REDIR;
267 if(nextis(c)){
268 t->rtype = APPEND;
269 *w++=c;
270 }
271 else t->rtype = WRITE;
272 t->fd0 = 1;
273 break;
274 case '<':
275 t->type = REDIR;
276 if(nextis(c)){
277 t->rtype = HERE;
278 *w++=c;
279 } else if (nextis('>')){
280 t->rtype = RDWR;
281 *w++=c;
282 } else t->rtype = READ;
283 t->fd0 = 0;
284 break;
285 }
286 if(nextis('[')){
287 *w++='[';
288 c = advance();
289 *w++=c;
290 if(c<'0' || '9'<c){
291 RedirErr:
292 *w = 0;
293 yyerror(t->type==PIPE?"pipe syntax"
294 :"redirection syntax");
295 return EOF;
296 }
297 t->fd0 = 0;
298 do{
299 t->fd0 = t->fd0*10+c-'0';
300 *w++=c;
301 c = advance();
302 }while('0'<=c && c<='9');
303 if(c=='='){
304 *w++='=';
305 if(t->type==REDIR)
306 t->type = DUP;
307 c = advance();
308 if('0'<=c && c<='9'){
309 t->rtype = DUPFD;
310 t->fd1 = t->fd0;
311 t->fd0 = 0;
312 do{
313 t->fd0 = t->fd0*10+c-'0';
314 *w++=c;
315 c = advance();
316 }while('0'<=c && c<='9');
317 }
318 else{
319 if(t->type==PIPE)
320 goto RedirErr;
321 t->rtype = CLOSE;
322 }
323 }
324 if(c!=']'
325 || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
326 goto RedirErr;
327 *w++=']';
328 }
329 *w='\0';
330 yylval.tree = t;
331 if(t->type==PIPE)
332 skipnl();
333 return t->type;
334 case '\'':
335 lastdol = 0;
336 lastword = 1;
337 inquote = 1;
338 for(;;){
339 c = advance();
340 if(c==EOF)
341 break;
342 if(c=='\''){
343 if(nextc()!='\'')
344 break;
345 advance();
346 }
347 w = addutf(w, c);
348 }
349 if(w!=0)
350 *w='\0';
351 t = token(tok, WORD);
352 t->quoted = 1;
353 yylval.tree = t;
354 inquote = 0;
355 return t->type;
356 }
357 if(!wordchr(c)){
358 lastdol = 0;
359 addutf(tok, c);
360 return c;
361 }
362 for(;;){
363 if(c=='*' || c=='[' || c=='?' || c==GLOB)
364 w = addutf(w, GLOB);
365 w = addutf(w, c);
366 c = nextc();
367 if(lastdol?!idchr(c):!wordchr(c)) break;
368 advance();
369 }
370 lastword = 1;
371 lastdol = 0;
372 if(w!=0)
373 *w='\0';
374 t = klook(tok);
375 if(t->type!=WORD)
376 lastword = 0;
377 t->quoted = 0;
378 yylval.tree = t;
379 return t->type;
380 }
381