xref: /plan9/sys/src/cmd/rc/lex.c (revision f9e1cf08d3be51592e03e639fc848a68dc31a55e)
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "getflags.h"
5 #include "fns.h"
6 int getnext(void);
7 
8 int
9 wordchr(int c)
10 {
11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
12 }
13 
14 int
15 idchr(int c)
16 {
17 	/*
18 	 * Formerly:
19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
20 	 *	|| c=='_' || c=='*';
21 	 */
22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
23 }
24 int future = EOF;
25 int doprompt = 1;
26 int inquote;
27 int incomm;
28 /*
29  * Look ahead in the input stream
30  */
31 
32 int
33 nextc(void)
34 {
35 	if(future==EOF)
36 		future = getnext();
37 	return future;
38 }
39 /*
40  * Consume the lookahead character.
41  */
42 
43 int
44 advance(void)
45 {
46 	int c = nextc();
47 	lastc = future;
48 	future = EOF;
49 	return c;
50 }
51 /*
52  * read a character from the input stream
53  */
54 
55 int
56 getnext(void)
57 {
58 	int c;
59 	static int peekc = EOF;
60 	if(peekc!=EOF){
61 		c = peekc;
62 		peekc = EOF;
63 		return c;
64 	}
65 	if(runq->eof)
66 		return EOF;
67 	if(doprompt)
68 		pprompt();
69 	c = rchr(runq->cmdfd);
70 	if(!inquote && c=='\\'){
71 		c = rchr(runq->cmdfd);
72 		if(c=='\n' && !incomm){		/* don't continue a comment */
73 			doprompt = 1;
74 			c=' ';
75 		}
76 		else{
77 			peekc = c;
78 			c='\\';
79 		}
80 	}
81 	doprompt = doprompt || c=='\n' || c==EOF;
82 	if(c==EOF)
83 		runq->eof++;
84 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
85 	return c;
86 }
87 
88 void
89 pprompt(void)
90 {
91 	var *prompt;
92 	if(runq->iflag){
93 		pstr(err, promptstr);
94 		flush(err);
95 		prompt = vlook("prompt");
96 		if(prompt->val && prompt->val->next)
97 			promptstr = prompt->val->next->word;
98 		else
99 			promptstr="\t";
100 	}
101 	runq->lineno++;
102 	doprompt = 0;
103 }
104 
105 void
106 skipwhite(void)
107 {
108 	int c;
109 	for(;;){
110 		c = nextc();
111 		/* Why did this used to be  if(!inquote && c=='#') ?? */
112 		if(c=='#'){
113 			incomm = 1;
114 			for(;;){
115 				c = nextc();
116 				if(c=='\n' || c==EOF) {
117 					incomm = 0;
118 					break;
119 				}
120 				advance();
121 			}
122 		}
123 		if(c==' ' || c=='\t')
124 			advance();
125 		else return;
126 	}
127 }
128 
129 void
130 skipnl(void)
131 {
132 	int c;
133 	for(;;){
134 		skipwhite();
135 		c = nextc();
136 		if(c!='\n')
137 			return;
138 		advance();
139 	}
140 }
141 
142 int
143 nextis(int c)
144 {
145 	if(nextc()==c){
146 		advance();
147 		return 1;
148 	}
149 	return 0;
150 }
151 
152 char*
153 addtok(char *p, int val)
154 {
155 	if(p==0)
156 		return 0;
157 	if(p==&tok[NTOK-1]){
158 		*p = 0;
159 		yyerror("token buffer too short");
160 		return 0;
161 	}
162 	*p++=val;
163 	return p;
164 }
165 
166 char*
167 addutf(char *p, int c)
168 {
169 	p = addtok(p, c);
170 	if(twobyte(c))	 /* 2-byte escape */
171 		return addtok(p, advance());
172 	if(threebyte(c)){	/* 3-byte escape */
173 		p = addtok(p, advance());
174 		return addtok(p, advance());
175 	}
176 	return p;
177 }
178 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
179 int lastword;	/* was the last token read a word or compound word terminator? */
180 
181 int
182 yylex(void)
183 {
184 	int c, d = nextc();
185 	char *w = tok;
186 	struct tree *t;
187 	yylval.tree = 0;
188 	/*
189 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
190 	 * WORD then we alter the meaning of what follows.  If the next character
191 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
192 	 * if the next character is the first character of a simple or compound word,
193 	 * we insert a `^' before it.
194 	 */
195 	if(lastword){
196 		lastword = 0;
197 		if(d=='('){
198 			advance();
199 			strcpy(tok, "( [SUB]");
200 			return SUB;
201 		}
202 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
203 			strcpy(tok, "^");
204 			return '^';
205 		}
206 	}
207 	inquote = 0;
208 	skipwhite();
209 	switch(c = advance()){
210 	case EOF:
211 		lastdol = 0;
212 		strcpy(tok, "EOF");
213 		return EOF;
214 	case '$':
215 		lastdol = 1;
216 		if(nextis('#')){
217 			strcpy(tok, "$#");
218 			return COUNT;
219 		}
220 		if(nextis('"')){
221 			strcpy(tok, "$\"");
222 			return '"';
223 		}
224 		strcpy(tok, "$");
225 		return '$';
226 	case '&':
227 		lastdol = 0;
228 		if(nextis('&')){
229 			skipnl();
230 			strcpy(tok, "&&");
231 			return ANDAND;
232 		}
233 		strcpy(tok, "&");
234 		return '&';
235 	case '|':
236 		lastdol = 0;
237 		if(nextis(c)){
238 			skipnl();
239 			strcpy(tok, "||");
240 			return OROR;
241 		}
242 	case '<':
243 	case '>':
244 		lastdol = 0;
245 		/*
246 		 * funny redirection tokens:
247 		 *	redir:	arrow | arrow '[' fd ']'
248 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
249 		 *	fd:	digit | digit '=' | digit '=' digit
250 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
251 		 * some possibilities are nonsensical and get a message.
252 		 */
253 		*w++=c;
254 		t = newtree();
255 		switch(c){
256 		case '|':
257 			t->type = PIPE;
258 			t->fd0 = 1;
259 			t->fd1 = 0;
260 			break;
261 		case '>':
262 			t->type = REDIR;
263 			if(nextis(c)){
264 				t->rtype = APPEND;
265 				*w++=c;
266 			}
267 			else t->rtype = WRITE;
268 			t->fd0 = 1;
269 			break;
270 		case '<':
271 			t->type = REDIR;
272 			if(nextis(c)){
273 				t->rtype = HERE;
274 				*w++=c;
275 			} else if (nextis('>')){
276 				t->rtype = RDWR;
277 				*w++=c;
278 			} else t->rtype = READ;
279 			t->fd0 = 0;
280 			break;
281 		}
282 		if(nextis('[')){
283 			*w++='[';
284 			c = advance();
285 			*w++=c;
286 			if(c<'0' || '9'<c){
287 			RedirErr:
288 				*w = 0;
289 				yyerror(t->type==PIPE?"pipe syntax"
290 						:"redirection syntax");
291 				return EOF;
292 			}
293 			t->fd0 = 0;
294 			do{
295 				t->fd0 = t->fd0*10+c-'0';
296 				*w++=c;
297 				c = advance();
298 			}while('0'<=c && c<='9');
299 			if(c=='='){
300 				*w++='=';
301 				if(t->type==REDIR)
302 					t->type = DUP;
303 				c = advance();
304 				if('0'<=c && c<='9'){
305 					t->rtype = DUPFD;
306 					t->fd1 = t->fd0;
307 					t->fd0 = 0;
308 					do{
309 						t->fd0 = t->fd0*10+c-'0';
310 						*w++=c;
311 						c = advance();
312 					}while('0'<=c && c<='9');
313 				}
314 				else{
315 					if(t->type==PIPE)
316 						goto RedirErr;
317 					t->rtype = CLOSE;
318 				}
319 			}
320 			if(c!=']'
321 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
322 				goto RedirErr;
323 			*w++=']';
324 		}
325 		*w='\0';
326 		yylval.tree = t;
327 		if(t->type==PIPE)
328 			skipnl();
329 		return t->type;
330 	case '\'':
331 		lastdol = 0;
332 		lastword = 1;
333 		inquote = 1;
334 		for(;;){
335 			c = advance();
336 			if(c==EOF)
337 				break;
338 			if(c=='\''){
339 				if(nextc()!='\'')
340 					break;
341 				advance();
342 			}
343 			w = addutf(w, c);
344 		}
345 		if(w!=0)
346 			*w='\0';
347 		t = token(tok, WORD);
348 		t->quoted = 1;
349 		yylval.tree = t;
350 		return t->type;
351 	}
352 	if(!wordchr(c)){
353 		lastdol = 0;
354 		tok[0] = c;
355 		tok[1]='\0';
356 		return c;
357 	}
358 	for(;;){
359 		/* next line should have (char)c==GLOB, but ken's compiler is broken */
360 		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
361 			w = addtok(w, GLOB);
362 		w = addutf(w, c);
363 		c = nextc();
364 		if(lastdol?!idchr(c):!wordchr(c)) break;
365 		advance();
366 	}
367 
368 	lastword = 1;
369 	lastdol = 0;
370 	if(w!=0)
371 		*w='\0';
372 	t = klook(tok);
373 	if(t->type!=WORD)
374 		lastword = 0;
375 	t->quoted = 0;
376 	yylval.tree = t;
377 	return t->type;
378 }
379