xref: /plan9-contrib/sys/src/cmd/rc/lex.c (revision 9b943567965ba040fd275927fbe088656eb8ce4f)
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "getflags.h"
5 #include "fns.h"
6 int getnext(void);
7 
8 int
9 wordchr(int c)
10 {
11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
12 }
13 
14 int
15 idchr(int c)
16 {
17 	/*
18 	 * Formerly:
19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
20 	 *	|| c=='_' || c=='*';
21 	 */
22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
23 }
24 int future = EOF;
25 int doprompt = 1;
26 int inquote;
27 /*
28  * Look ahead in the input stream
29  */
30 
31 int
32 nextc(void)
33 {
34 	if(future==EOF)
35 		future = getnext();
36 	return future;
37 }
38 /*
39  * Consume the lookahead character.
40  */
41 
42 int
43 advance(void)
44 {
45 	int c = nextc();
46 	lastc = future;
47 	future = EOF;
48 	return c;
49 }
50 /*
51  * read a character from the input stream
52  */
53 
54 int
55 getnext(void)
56 {
57 	int c;
58 	static peekc = EOF;
59 	if(peekc!=EOF){
60 		c = peekc;
61 		peekc = EOF;
62 		return c;
63 	}
64 	if(runq->eof)
65 		return EOF;
66 	if(doprompt)
67 		pprompt();
68 	c = rchr(runq->cmdfd);
69 	if(!inquote && c=='\\'){
70 		c = rchr(runq->cmdfd);
71 		if(c=='\n'){
72 			doprompt = 1;
73 			c=' ';
74 		}
75 		else{
76 			peekc = c;
77 			c='\\';
78 		}
79 	}
80 	doprompt = doprompt || c=='\n' || c==EOF;
81 	if(c==EOF)
82 		runq->eof++;
83 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
84 	return c;
85 }
86 
87 void
88 pprompt(void)
89 {
90 	var *prompt;
91 	if(runq->iflag){
92 		pstr(err, promptstr);
93 		flush(err);
94 		prompt = vlook("prompt");
95 		if(prompt->val && prompt->val->next)
96 			promptstr = prompt->val->next->word;
97 		else
98 			promptstr="\t";
99 	}
100 	runq->lineno++;
101 	doprompt = 0;
102 }
103 
104 void
105 skipwhite(void)
106 {
107 	int c;
108 	for(;;){
109 		c = nextc();
110 		if(c=='#'){	/* Why did this used to be  if(!inquote && c=='#') ?? */
111 			for(;;){
112 				c = nextc();
113 				if(c=='\n' || c==EOF)
114 					break;
115 				advance();
116 			}
117 		}
118 		if(c==' ' || c=='\t')
119 			advance();
120 		else return;
121 	}
122 }
123 
124 void
125 skipnl(void)
126 {
127 	int c;
128 	for(;;){
129 		skipwhite();
130 		c = nextc();
131 		if(c!='\n')
132 			return;
133 		advance();
134 	}
135 }
136 
137 int
138 nextis(int c)
139 {
140 	if(nextc()==c){
141 		advance();
142 		return 1;
143 	}
144 	return 0;
145 }
146 
147 char*
148 addtok(char *p, int val)
149 {
150 	if(p==0)
151 		return 0;
152 	if(p==&tok[NTOK]){
153 		*p = 0;
154 		yyerror("token buffer too short");
155 		return 0;
156 	}
157 	*p++=val;
158 	return p;
159 }
160 
161 char*
162 addutf(char *p, int c)
163 {
164 	p = addtok(p, c);
165 	if(twobyte(c))	 /* 2-byte escape */
166 		return addtok(p, advance());
167 	if(threebyte(c)){	/* 3-byte escape */
168 		p = addtok(p, advance());
169 		return addtok(p, advance());
170 	}
171 	return p;
172 }
173 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
174 int lastword;	/* was the last token read a word or compound word terminator? */
175 
176 int
177 yylex(void)
178 {
179 	int c, d = nextc();
180 	char *w = tok;
181 	struct tree *t;
182 	yylval.tree = 0;
183 	/*
184 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
185 	 * WORD then we alter the meaning of what follows.  If the next character
186 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
187 	 * if the next character is the first character of a simple or compound word,
188 	 * we insert a `^' before it.
189 	 */
190 	if(lastword){
191 		lastword = 0;
192 		if(d=='('){
193 			advance();
194 			strcpy(tok, "( [SUB]");
195 			return SUB;
196 		}
197 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
198 			strcpy(tok, "^");
199 			return '^';
200 		}
201 	}
202 	inquote = 0;
203 	skipwhite();
204 	switch(c = advance()){
205 	case EOF:
206 		lastdol = 0;
207 		strcpy(tok, "EOF");
208 		return EOF;
209 	case '$':
210 		lastdol = 1;
211 		if(nextis('#')){
212 			strcpy(tok, "$#");
213 			return COUNT;
214 		}
215 		if(nextis('"')){
216 			strcpy(tok, "$\"");
217 			return '"';
218 		}
219 		strcpy(tok, "$");
220 		return '$';
221 	case '&':
222 		lastdol = 0;
223 		if(nextis('&')){
224 			skipnl();
225 			strcpy(tok, "&&");
226 			return ANDAND;
227 		}
228 		strcpy(tok, "&");
229 		return '&';
230 	case '|':
231 		lastdol = 0;
232 		if(nextis(c)){
233 			skipnl();
234 			strcpy(tok, "||");
235 			return OROR;
236 		}
237 	case '<':
238 	case '>':
239 		lastdol = 0;
240 		/*
241 		 * funny redirection tokens:
242 		 *	redir:	arrow | arrow '[' fd ']'
243 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
244 		 *	fd:	digit | digit '=' | digit '=' digit
245 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
246 		 * some possibilities are nonsensical and get a message.
247 		 */
248 		*w++=c;
249 		t = newtree();
250 		switch(c){
251 		case '|':
252 			t->type = PIPE;
253 			t->fd0 = 1;
254 			t->fd1 = 0;
255 			break;
256 		case '>':
257 			t->type = REDIR;
258 			if(nextis(c)){
259 				t->rtype = APPEND;
260 				*w++=c;
261 			}
262 			else t->rtype = WRITE;
263 			t->fd0 = 1;
264 			break;
265 		case '<':
266 			t->type = REDIR;
267 			if(nextis(c)){
268 				t->rtype = HERE;
269 				*w++=c;
270 			}
271 			else t->rtype = READ;
272 			t->fd0 = 0;
273 			break;
274 		}
275 		if(nextis('[')){
276 			*w++='[';
277 			c = advance();
278 			*w++=c;
279 			if(c<'0' || '9'<c){
280 			RedirErr:
281 				*w = 0;
282 				yyerror(t->type==PIPE?"pipe syntax"
283 						:"redirection syntax");
284 				return EOF;
285 			}
286 			t->fd0 = 0;
287 			do{
288 				t->fd0 = t->fd0*10+c-'0';
289 				*w++=c;
290 				c = advance();
291 			}while('0'<=c && c<='9');
292 			if(c=='='){
293 				*w++='=';
294 				if(t->type==REDIR)
295 					t->type = DUP;
296 				c = advance();
297 				if('0'<=c && c<='9'){
298 					t->rtype = DUPFD;
299 					t->fd1 = t->fd0;
300 					t->fd0 = 0;
301 					do{
302 						t->fd0 = t->fd0*10+c-'0';
303 						*w++=c;
304 						c = advance();
305 					}while('0'<=c && c<='9');
306 				}
307 				else{
308 					if(t->type==PIPE)
309 						goto RedirErr;
310 					t->rtype = CLOSE;
311 				}
312 			}
313 			if(c!=']'
314 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
315 				goto RedirErr;
316 			*w++=']';
317 		}
318 		*w='\0';
319 		yylval.tree = t;
320 		if(t->type==PIPE)
321 			skipnl();
322 		return t->type;
323 	case '\'':
324 		lastdol = 0;
325 		lastword = 1;
326 		inquote = 1;
327 		for(;;){
328 			c = advance();
329 			if(c==EOF)
330 				break;
331 			if(c=='\''){
332 				if(nextc()!='\'')
333 					break;
334 				advance();
335 			}
336 			w = addutf(w, c);
337 		}
338 		if(w!=0)
339 			*w='\0';
340 		t = token(tok, WORD);
341 		t->quoted = 1;
342 		yylval.tree = t;
343 		return t->type;
344 	}
345 	if(!wordchr(c)){
346 		lastdol = 0;
347 		tok[0] = c;
348 		tok[1]='\0';
349 		return c;
350 	}
351 	for(;;){
352 		/* next line should have (char)c==GLOB, but ken's compiler is broken */
353 		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
354 			w = addtok(w, GLOB);
355 		w = addutf(w, c);
356 		c = nextc();
357 		if(lastdol?!idchr(c):!wordchr(c)) break;
358 		advance();
359 	}
360 
361 	lastword = 1;
362 	lastdol = 0;
363 	if(w!=0)
364 		*w='\0';
365 	t = klook(tok);
366 	if(t->type!=WORD)
367 		lastword = 0;
368 	t->quoted = 0;
369 	yylval.tree = t;
370 	return t->type;
371 }
372