xref: /plan9/sys/src/cmd/rc/lex.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "getflags.h"
5 #include "fns.h"
6 int getnext(void);
7 
8 int
9 wordchr(int c)
10 {
11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
12 }
13 
14 int
15 idchr(int c)
16 {
17 	/*
18 	 * Formerly:
19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
20 	 *	|| c=='_' || c=='*';
21 	 */
22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
23 }
24 int future = EOF;
25 int doprompt = 1;
26 int inquote;
27 /*
28  * Look ahead in the input stream
29  */
30 
31 int
32 nextc(void)
33 {
34 	if(future==EOF)
35 		future = getnext();
36 	return future;
37 }
38 /*
39  * Consume the lookahead character.
40  */
41 
42 int
43 advance(void)
44 {
45 	int c = nextc();
46 	lastc = future;
47 	future = EOF;
48 	return c;
49 }
50 /*
51  * read a character from the input stream
52  */
53 
54 int
55 getnext(void)
56 {
57 	int c;
58 	static peekc = EOF;
59 	if(peekc!=EOF){
60 		c = peekc;
61 		peekc = EOF;
62 		return c;
63 	}
64 	if(runq->eof)
65 		return EOF;
66 	if(doprompt)
67 		pprompt();
68 	c = rchr(runq->cmdfd);
69 	if(!inquote && c=='\\'){
70 		c = rchr(runq->cmdfd);
71 		if(c=='\n'){
72 			doprompt = 1;
73 			c=' ';
74 		}
75 		else{
76 			peekc = c;
77 			c='\\';
78 		}
79 	}
80 	doprompt = doprompt || c=='\n' || c==EOF;
81 	if(c==EOF)
82 		runq->eof++;
83 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
84 	return c;
85 }
86 
87 void
88 pprompt(void)
89 {
90 	var *prompt;
91 	if(runq->iflag){
92 		pstr(err, promptstr);
93 		flush(err);
94 		prompt = vlook("prompt");
95 		if(prompt->val && prompt->val->next)
96 			promptstr = prompt->val->next->word;
97 		else
98 			promptstr="\t";
99 	}
100 	runq->lineno++;
101 	doprompt = 0;
102 }
103 
104 void
105 skipwhite(void)
106 {
107 	int c;
108 	for(;;){
109 		c = nextc();
110 		if(c=='#'){	/* Why did this used to be  if(!inquote && c=='#') ?? */
111 			for(;;){
112 				c = nextc();
113 				if(c=='\n' || c==EOF)
114 					break;
115 				advance();
116 			}
117 		}
118 		if(c==' ' || c=='\t')
119 			advance();
120 		else return;
121 	}
122 }
123 
124 void
125 skipnl(void)
126 {
127 	int c;
128 	for(;;){
129 		skipwhite();
130 		c = nextc();
131 		if(c!='\n')
132 			return;
133 		advance();
134 	}
135 }
136 
137 int
138 nextis(int c)
139 {
140 	if(nextc()==c){
141 		advance();
142 		return 1;
143 	}
144 	return 0;
145 }
146 
147 char*
148 addtok(char *p, int val)
149 {
150 	if(p==0)
151 		return 0;
152 	if(p==&tok[NTOK-1]){
153 		*p = 0;
154 		yyerror("token buffer too short");
155 		return 0;
156 	}
157 	*p++=val;
158 	return p;
159 }
160 
161 char*
162 addutf(char *p, int c)
163 {
164 	p = addtok(p, c);
165 	if(twobyte(c))	 /* 2-byte escape */
166 		return addtok(p, advance());
167 	if(threebyte(c)){	/* 3-byte escape */
168 		p = addtok(p, advance());
169 		return addtok(p, advance());
170 	}
171 	return p;
172 }
173 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
174 int lastword;	/* was the last token read a word or compound word terminator? */
175 
176 int
177 yylex(void)
178 {
179 	int c, d = nextc();
180 	char *w = tok;
181 	struct tree *t;
182 	yylval.tree = 0;
183 	/*
184 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
185 	 * WORD then we alter the meaning of what follows.  If the next character
186 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
187 	 * if the next character is the first character of a simple or compound word,
188 	 * we insert a `^' before it.
189 	 */
190 	if(lastword){
191 		lastword = 0;
192 		if(d=='('){
193 			advance();
194 			strcpy(tok, "( [SUB]");
195 			return SUB;
196 		}
197 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
198 			strcpy(tok, "^");
199 			return '^';
200 		}
201 	}
202 	inquote = 0;
203 	skipwhite();
204 	switch(c = advance()){
205 	case EOF:
206 		lastdol = 0;
207 		strcpy(tok, "EOF");
208 		return EOF;
209 	case '$':
210 		lastdol = 1;
211 		if(nextis('#')){
212 			strcpy(tok, "$#");
213 			return COUNT;
214 		}
215 		if(nextis('"')){
216 			strcpy(tok, "$\"");
217 			return '"';
218 		}
219 		strcpy(tok, "$");
220 		return '$';
221 	case '&':
222 		lastdol = 0;
223 		if(nextis('&')){
224 			skipnl();
225 			strcpy(tok, "&&");
226 			return ANDAND;
227 		}
228 		strcpy(tok, "&");
229 		return '&';
230 	case '|':
231 		lastdol = 0;
232 		if(nextis(c)){
233 			skipnl();
234 			strcpy(tok, "||");
235 			return OROR;
236 		}
237 	case '<':
238 	case '>':
239 		lastdol = 0;
240 		/*
241 		 * funny redirection tokens:
242 		 *	redir:	arrow | arrow '[' fd ']'
243 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
244 		 *	fd:	digit | digit '=' | digit '=' digit
245 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
246 		 * some possibilities are nonsensical and get a message.
247 		 */
248 		*w++=c;
249 		t = newtree();
250 		switch(c){
251 		case '|':
252 			t->type = PIPE;
253 			t->fd0 = 1;
254 			t->fd1 = 0;
255 			break;
256 		case '>':
257 			t->type = REDIR;
258 			if(nextis(c)){
259 				t->rtype = APPEND;
260 				*w++=c;
261 			}
262 			else t->rtype = WRITE;
263 			t->fd0 = 1;
264 			break;
265 		case '<':
266 			t->type = REDIR;
267 			if(nextis(c)){
268 				t->rtype = HERE;
269 				*w++=c;
270 			} else if (nextis('>')){
271 				t->rtype = RDWR;
272 				*w++=c;
273 			} else t->rtype = READ;
274 			t->fd0 = 0;
275 			break;
276 		}
277 		if(nextis('[')){
278 			*w++='[';
279 			c = advance();
280 			*w++=c;
281 			if(c<'0' || '9'<c){
282 			RedirErr:
283 				*w = 0;
284 				yyerror(t->type==PIPE?"pipe syntax"
285 						:"redirection syntax");
286 				return EOF;
287 			}
288 			t->fd0 = 0;
289 			do{
290 				t->fd0 = t->fd0*10+c-'0';
291 				*w++=c;
292 				c = advance();
293 			}while('0'<=c && c<='9');
294 			if(c=='='){
295 				*w++='=';
296 				if(t->type==REDIR)
297 					t->type = DUP;
298 				c = advance();
299 				if('0'<=c && c<='9'){
300 					t->rtype = DUPFD;
301 					t->fd1 = t->fd0;
302 					t->fd0 = 0;
303 					do{
304 						t->fd0 = t->fd0*10+c-'0';
305 						*w++=c;
306 						c = advance();
307 					}while('0'<=c && c<='9');
308 				}
309 				else{
310 					if(t->type==PIPE)
311 						goto RedirErr;
312 					t->rtype = CLOSE;
313 				}
314 			}
315 			if(c!=']'
316 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
317 				goto RedirErr;
318 			*w++=']';
319 		}
320 		*w='\0';
321 		yylval.tree = t;
322 		if(t->type==PIPE)
323 			skipnl();
324 		return t->type;
325 	case '\'':
326 		lastdol = 0;
327 		lastword = 1;
328 		inquote = 1;
329 		for(;;){
330 			c = advance();
331 			if(c==EOF)
332 				break;
333 			if(c=='\''){
334 				if(nextc()!='\'')
335 					break;
336 				advance();
337 			}
338 			w = addutf(w, c);
339 		}
340 		if(w!=0)
341 			*w='\0';
342 		t = token(tok, WORD);
343 		t->quoted = 1;
344 		yylval.tree = t;
345 		return t->type;
346 	}
347 	if(!wordchr(c)){
348 		lastdol = 0;
349 		tok[0] = c;
350 		tok[1]='\0';
351 		return c;
352 	}
353 	for(;;){
354 		/* next line should have (char)c==GLOB, but ken's compiler is broken */
355 		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
356 			w = addtok(w, GLOB);
357 		w = addutf(w, c);
358 		c = nextc();
359 		if(lastdol?!idchr(c):!wordchr(c)) break;
360 		advance();
361 	}
362 
363 	lastword = 1;
364 	lastdol = 0;
365 	if(w!=0)
366 		*w='\0';
367 	t = klook(tok);
368 	if(t->type!=WORD)
369 		lastword = 0;
370 	t->quoted = 0;
371 	yylval.tree = t;
372 	return t->type;
373 }
374