xref: /plan9/sys/src/cmd/rc/lex.c (revision 4e3613ab15c331a9ada113286cc0f2a35bc0373d)
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "getflags.h"
5 #include "fns.h"
6 int getnext(void);
7 
8 int
wordchr(int c)9 wordchr(int c)
10 {
11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
12 }
13 
14 int
idchr(int c)15 idchr(int c)
16 {
17 	/*
18 	 * Formerly:
19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
20 	 *	|| c=='_' || c=='*';
21 	 */
22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
23 }
24 int future = EOF;
25 int doprompt = 1;
26 int inquote;
27 int incomm;
28 /*
29  * Look ahead in the input stream
30  */
31 
32 int
nextc(void)33 nextc(void)
34 {
35 	if(future==EOF)
36 		future = getnext();
37 	return future;
38 }
39 /*
40  * Consume the lookahead character.
41  */
42 
43 int
advance(void)44 advance(void)
45 {
46 	int c = nextc();
47 	lastc = future;
48 	future = EOF;
49 	return c;
50 }
51 /*
52  * read a character from the input stream
53  */
54 
55 int
getnext(void)56 getnext(void)
57 {
58 	int c;
59 	static int peekc = EOF;
60 	if(peekc!=EOF){
61 		c = peekc;
62 		peekc = EOF;
63 		return c;
64 	}
65 	if(runq->eof)
66 		return EOF;
67 	if(doprompt)
68 		pprompt();
69 	c = rchr(runq->cmdfd);
70 	if(!inquote && c=='\\'){
71 		c = rchr(runq->cmdfd);
72 		if(c=='\n' && !incomm){		/* don't continue a comment */
73 			doprompt = 1;
74 			c=' ';
75 		}
76 		else{
77 			peekc = c;
78 			c='\\';
79 		}
80 	}
81 	doprompt = doprompt || c=='\n' || c==EOF;
82 	if(c==EOF)
83 		runq->eof++;
84 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
85 	return c;
86 }
87 
88 void
pprompt(void)89 pprompt(void)
90 {
91 	var *prompt;
92 	if(runq->iflag){
93 		pstr(err, promptstr);
94 		flush(err);
95 		prompt = vlook("prompt");
96 		if(prompt->val && prompt->val->next)
97 			promptstr = prompt->val->next->word;
98 		else
99 			promptstr="\t";
100 	}
101 	runq->lineno++;
102 	doprompt = 0;
103 }
104 
105 void
skipwhite(void)106 skipwhite(void)
107 {
108 	int c;
109 	for(;;){
110 		c = nextc();
111 		/* Why did this used to be  if(!inquote && c=='#') ?? */
112 		if(c=='#'){
113 			incomm = 1;
114 			for(;;){
115 				c = nextc();
116 				if(c=='\n' || c==EOF) {
117 					incomm = 0;
118 					break;
119 				}
120 				advance();
121 			}
122 		}
123 		if(c==' ' || c=='\t')
124 			advance();
125 		else return;
126 	}
127 }
128 
129 void
skipnl(void)130 skipnl(void)
131 {
132 	int c;
133 	for(;;){
134 		skipwhite();
135 		c = nextc();
136 		if(c!='\n')
137 			return;
138 		advance();
139 	}
140 }
141 
142 int
nextis(int c)143 nextis(int c)
144 {
145 	if(nextc()==c){
146 		advance();
147 		return 1;
148 	}
149 	return 0;
150 }
151 
152 char*
addtok(char * p,int val)153 addtok(char *p, int val)
154 {
155 	if(p==0)
156 		return 0;
157 	if(p >= &tok[NTOK]){
158 		*p = 0;
159 		yyerror("token buffer too short");
160 		return 0;
161 	}
162 	*p++=val;
163 	return p;
164 }
165 
166 char*
addutf(char * p,int c)167 addutf(char *p, int c)
168 {
169 	uchar b, m;
170 	int i;
171 
172 	p = addtok(p, c);	/* 1-byte UTF runes are special */
173 	if(c < Runeself)
174 		return p;
175 
176 	m = 0xc0;
177 	b = 0x80;
178 	for(i=1; i < UTFmax; i++){
179 		if((c&m) == b)
180 			break;
181 		p = addtok(p, advance());
182 		b = m;
183 		m = (m >> 1)|0x80;
184 	}
185 	return p;
186 }
187 
188 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
189 int lastword;	/* was the last token read a word or compound word terminator? */
190 
191 int
yylex(void)192 yylex(void)
193 {
194 	int c, d = nextc();
195 	char *w = tok;
196 	struct tree *t;
197 	yylval.tree = 0;
198 	/*
199 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
200 	 * WORD then we alter the meaning of what follows.  If the next character
201 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
202 	 * if the next character is the first character of a simple or compound word,
203 	 * we insert a `^' before it.
204 	 */
205 	if(lastword){
206 		lastword = 0;
207 		if(d=='('){
208 			advance();
209 			strcpy(tok, "( [SUB]");
210 			return SUB;
211 		}
212 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
213 			strcpy(tok, "^");
214 			return '^';
215 		}
216 	}
217 	inquote = 0;
218 	skipwhite();
219 	switch(c = advance()){
220 	case EOF:
221 		lastdol = 0;
222 		strcpy(tok, "EOF");
223 		return EOF;
224 	case '$':
225 		lastdol = 1;
226 		if(nextis('#')){
227 			strcpy(tok, "$#");
228 			return COUNT;
229 		}
230 		if(nextis('"')){
231 			strcpy(tok, "$\"");
232 			return '"';
233 		}
234 		strcpy(tok, "$");
235 		return '$';
236 	case '&':
237 		lastdol = 0;
238 		if(nextis('&')){
239 			skipnl();
240 			strcpy(tok, "&&");
241 			return ANDAND;
242 		}
243 		strcpy(tok, "&");
244 		return '&';
245 	case '|':
246 		lastdol = 0;
247 		if(nextis(c)){
248 			skipnl();
249 			strcpy(tok, "||");
250 			return OROR;
251 		}
252 	case '<':
253 	case '>':
254 		lastdol = 0;
255 		/*
256 		 * funny redirection tokens:
257 		 *	redir:	arrow | arrow '[' fd ']'
258 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
259 		 *	fd:	digit | digit '=' | digit '=' digit
260 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
261 		 * some possibilities are nonsensical and get a message.
262 		 */
263 		*w++=c;
264 		t = newtree();
265 		switch(c){
266 		case '|':
267 			t->type = PIPE;
268 			t->fd0 = 1;
269 			t->fd1 = 0;
270 			break;
271 		case '>':
272 			t->type = REDIR;
273 			if(nextis(c)){
274 				t->rtype = APPEND;
275 				*w++=c;
276 			}
277 			else t->rtype = WRITE;
278 			t->fd0 = 1;
279 			break;
280 		case '<':
281 			t->type = REDIR;
282 			if(nextis(c)){
283 				t->rtype = HERE;
284 				*w++=c;
285 			} else if (nextis('>')){
286 				t->rtype = RDWR;
287 				*w++=c;
288 			} else t->rtype = READ;
289 			t->fd0 = 0;
290 			break;
291 		}
292 		if(nextis('[')){
293 			*w++='[';
294 			c = advance();
295 			*w++=c;
296 			if(c<'0' || '9'<c){
297 			RedirErr:
298 				*w = 0;
299 				yyerror(t->type==PIPE?"pipe syntax"
300 						:"redirection syntax");
301 				return EOF;
302 			}
303 			t->fd0 = 0;
304 			do{
305 				t->fd0 = t->fd0*10+c-'0';
306 				*w++=c;
307 				c = advance();
308 			}while('0'<=c && c<='9');
309 			if(c=='='){
310 				*w++='=';
311 				if(t->type==REDIR)
312 					t->type = DUP;
313 				c = advance();
314 				if('0'<=c && c<='9'){
315 					t->rtype = DUPFD;
316 					t->fd1 = t->fd0;
317 					t->fd0 = 0;
318 					do{
319 						t->fd0 = t->fd0*10+c-'0';
320 						*w++=c;
321 						c = advance();
322 					}while('0'<=c && c<='9');
323 				}
324 				else{
325 					if(t->type==PIPE)
326 						goto RedirErr;
327 					t->rtype = CLOSE;
328 				}
329 			}
330 			if(c!=']'
331 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
332 				goto RedirErr;
333 			*w++=']';
334 		}
335 		*w='\0';
336 		yylval.tree = t;
337 		if(t->type==PIPE)
338 			skipnl();
339 		return t->type;
340 	case '\'':
341 		lastdol = 0;
342 		lastword = 1;
343 		inquote = 1;
344 		for(;;){
345 			c = advance();
346 			if(c==EOF)
347 				break;
348 			if(c=='\''){
349 				if(nextc()!='\'')
350 					break;
351 				advance();
352 			}
353 			w = addutf(w, c);
354 		}
355 		if(w!=0)
356 			*w='\0';
357 		t = token(tok, WORD);
358 		t->quoted = 1;
359 		yylval.tree = t;
360 		return t->type;
361 	}
362 	if(!wordchr(c)){
363 		lastdol = 0;
364 		tok[0] = c;
365 		tok[1]='\0';
366 		return c;
367 	}
368 	for(;;){
369 		if(c=='*' || c=='[' || c=='?' || c==GLOB)
370 			w = addtok(w, GLOB);
371 		w = addutf(w, c);
372 		c = nextc();
373 		if(lastdol?!idchr(c):!wordchr(c)) break;
374 		advance();
375 	}
376 
377 	lastword = 1;
378 	lastdol = 0;
379 	if(w!=0)
380 		*w='\0';
381 	t = klook(tok);
382 	if(t->type!=WORD)
383 		lastword = 0;
384 	t->quoted = 0;
385 	yylval.tree = t;
386 	return t->type;
387 }
388