xref: /plan9-contrib/sys/src/cmd/rc/lex.c (revision c6df144405f586b73992827d584728dc975dff14)
1 #include "rc.h"
2 #include "exec.h"
3 #include "io.h"
4 #include "fns.h"
5 
6 Rune getnext(void);
7 
8 int
wordchr(Rune c)9 wordchr(Rune c)		/* is c in the alphabet of words (non-delimiters)? */
10 {
11 	return c != EOF &&
12 		(c >= Runeself || strchr("\n \t#;&|^$=`'{}()<>", c) == nil);
13 }
14 
15 /*
16  * is c in the alphabet of identifiers?  as in the c compiler, treat
17  * non-ascii as alphabetic.
18  */
19 int
idchr(Rune c)20 idchr(Rune c)
21 {
22 	/*
23 	 * Formerly:
24 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
25 	 *	|| c=='_' || c=='*';
26 	 */
27 	return c != EOF && (c >= Runeself ||
28 		c > ' ' &&
29 		  strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c) == nil);
30 }
31 
32 Rune future = EOF;
33 int doprompt = 1;
34 int inquote;		/* are we processing a quoted word ('...')? */
35 int incomm;		/* are we ignoring input in a comment (#...\n)? */
36 /*
37  * Look ahead in the input stream
38  */
39 
40 Rune
nextc(void)41 nextc(void)
42 {
43 	if(future==EOF)
44 		future = getnext();
45 	return future;
46 }
47 /*
48  * Consume the lookahead character.
49  */
50 
51 Rune
advance(void)52 advance(void)
53 {
54 	Rune c = nextc();
55 
56 	lastc = future;
57 	future = EOF;
58 	return c;
59 }
60 /*
61  * read a character from the input stream
62  */
63 
64 Rune
getnext(void)65 getnext(void)
66 {
67 	Rune c;
68 	char buf[UTFmax+1];
69 	static Rune peekc = EOF;
70 
71 	if(peekc!=EOF){
72 		c = peekc;
73 		peekc = EOF;
74 		return c;
75 	}
76 	if(runq->eof)
77 		return EOF;
78 	if(doprompt)
79 		pprompt();
80 	rutf(runq->cmdfd, buf, &c);
81 	if(!inquote && c=='\\'){
82 		rutf(runq->cmdfd, buf, &c);
83 		if(c=='\n' && !incomm){		/* don't continue a comment */
84 			doprompt = 1;
85 			c=' ';
86 		}
87 		else{
88 			peekc = c;
89 			c='\\';
90 		}
91 	}
92 	doprompt = doprompt || c=='\n' || c==EOF;
93 	if(c==EOF)
94 		runq->eof++;
95 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
96 	return c;
97 }
98 
99 void
pprompt(void)100 pprompt(void)
101 {
102 	var *prompt;
103 	if(runq->iflag){
104 		pstr(err, promptstr);
105 		flush(err);
106 		prompt = vlook("prompt");
107 		if(prompt->val && prompt->val->next)
108 			promptstr = prompt->val->next->word;
109 		else
110 			promptstr="\t";
111 	}
112 	runq->lineno++;
113 	doprompt = 0;
114 }
115 
116 void
skipwhite(void)117 skipwhite(void)
118 {
119 	Rune c;
120 
121 	for(;;){
122 		c = nextc();
123 		/* Why did this used to be  if(!inquote && c=='#') ?? */
124 		if(c=='#'){
125 			incomm = 1;
126 			for(;;){
127 				c = nextc();
128 				if(c=='\n' || c==EOF) {
129 					incomm = 0;
130 					break;
131 				}
132 				advance();
133 			}
134 		}
135 		if(c==' ' || c=='\t')
136 			advance();
137 		else return;
138 	}
139 }
140 
141 void
skipnl(void)142 skipnl(void)
143 {
144 	Rune c, c0;
145 
146 	for(c0 = nextc(); ; c0 = c){
147 		skipwhite();
148 		c = nextc();
149 		if(c != c0)
150 			lastword = 0; /* change of whitespace or c is not ws */
151 		if(c!='\n')
152 			return;
153 		lastword = 0;			/* new line; continue */
154 		advance();
155 	}
156 }
157 
158 int
nextis(Rune c)159 nextis(Rune c)
160 {
161 	if(nextc()==c){
162 		advance();
163 		return 1;
164 	}
165 	return 0;
166 }
167 
168 char*
addutf(char * p,Rune c)169 addutf(char *p, Rune c)
170 {
171 	if(p==0)
172 		return 0;
173 	if(p >= &tok[NTOK-1-UTFmax*2]){
174 		*p = 0;
175 		yyerror("token buffer too short");
176 		return 0;
177 	}
178 	p += runetochar(p, &c);
179 	return p;
180 }
181 
182 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
183 int lastword;	/* was the last token read a word or compound word terminator? */
184 
185 int
yylex(void)186 yylex(void)
187 {
188 	Rune c, d = nextc();
189 	char *w = tok;
190 	struct tree *t;
191 
192 	yylval.tree = 0;
193 	/*
194 	 * Embarrassing sneakiness: if the last token read was a quoted or
195 	 * unquoted WORD then we alter the meaning of what follows.  If the
196 	 * next character is `(', we return SUB (a subscript paren) and
197 	 * consume the `('.  Otherwise, if the next character is the first
198 	 * character of a simple or compound word, we insert a `^' before it.
199 	 */
200 	if(lastword){
201 		lastword = 0;
202 		if(d=='('){
203 			advance();
204 			strcpy(tok, "( [SUB]");
205 			return SUB;
206 		}
207 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
208 			strcpy(tok, "^");
209 			return '^';
210 		}
211 	}
212 	skipwhite();
213 	switch(c = advance()){
214 	case EOF:
215 		lastdol = 0;
216 		strcpy(tok, "EOF");
217 		return EOF;
218 	case '$':
219 		lastdol = 1;
220 		if(nextis('#')){
221 			strcpy(tok, "$#");
222 			return COUNT;
223 		}
224 		if(nextis('"')){
225 			strcpy(tok, "$\"");
226 			return '"';
227 		}
228 		strcpy(tok, "$");
229 		return '$';
230 	case '&':
231 		lastdol = 0;
232 		if(nextis('&')){
233 			skipnl();
234 			strcpy(tok, "&&");
235 			return ANDAND;
236 		}
237 		strcpy(tok, "&");
238 		return '&';
239 	case '|':
240 		lastdol = 0;
241 		if(nextis(c)){
242 			skipnl();
243 			strcpy(tok, "||");
244 			return OROR;
245 		}
246 	case '<':
247 	case '>':
248 		lastdol = 0;
249 		/*
250 		 * funny redirection tokens:
251 		 *	redir:	arrow | arrow '[' fd ']'
252 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
253 		 *	fd:	digit | digit '=' | digit '=' digit
254 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
255 		 * some possibilities are nonsensical and get a message.
256 		 */
257 		*w++=c;
258 		t = newtree();
259 		switch(c){
260 		case '|':
261 			t->type = PIPE;
262 			t->fd0 = 1;
263 			t->fd1 = 0;
264 			break;
265 		case '>':
266 			t->type = REDIR;
267 			if(nextis(c)){
268 				t->rtype = APPEND;
269 				*w++=c;
270 			}
271 			else t->rtype = WRITE;
272 			t->fd0 = 1;
273 			break;
274 		case '<':
275 			t->type = REDIR;
276 			if(nextis(c)){
277 				t->rtype = HERE;
278 				*w++=c;
279 			} else if (nextis('>')){
280 				t->rtype = RDWR;
281 				*w++=c;
282 			} else t->rtype = READ;
283 			t->fd0 = 0;
284 			break;
285 		}
286 		if(nextis('[')){
287 			*w++='[';
288 			c = advance();
289 			*w++=c;
290 			if(c<'0' || '9'<c){
291 			RedirErr:
292 				*w = 0;
293 				yyerror(t->type==PIPE?"pipe syntax"
294 						:"redirection syntax");
295 				return EOF;
296 			}
297 			t->fd0 = 0;
298 			do{
299 				t->fd0 = t->fd0*10+c-'0';
300 				*w++=c;
301 				c = advance();
302 			}while('0'<=c && c<='9');
303 			if(c=='='){
304 				*w++='=';
305 				if(t->type==REDIR)
306 					t->type = DUP;
307 				c = advance();
308 				if('0'<=c && c<='9'){
309 					t->rtype = DUPFD;
310 					t->fd1 = t->fd0;
311 					t->fd0 = 0;
312 					do{
313 						t->fd0 = t->fd0*10+c-'0';
314 						*w++=c;
315 						c = advance();
316 					}while('0'<=c && c<='9');
317 				}
318 				else{
319 					if(t->type==PIPE)
320 						goto RedirErr;
321 					t->rtype = CLOSE;
322 				}
323 			}
324 			if(c!=']'
325 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
326 				goto RedirErr;
327 			*w++=']';
328 		}
329 		*w='\0';
330 		yylval.tree = t;
331 		if(t->type==PIPE)
332 			skipnl();
333 		return t->type;
334 	case '\'':
335 		lastdol = 0;
336 		lastword = 1;
337 		inquote = 1;
338 		for(;;){
339 			c = advance();
340 			if(c==EOF)
341 				break;
342 			if(c=='\''){
343 				if(nextc()!='\'')
344 					break;
345 				advance();
346 			}
347 			w = addutf(w, c);
348 		}
349 		if(w!=0)
350 			*w='\0';
351 		t = token(tok, WORD);
352 		t->quoted = 1;
353 		yylval.tree = t;
354 		inquote = 0;
355 		return t->type;
356 	}
357 	if(!wordchr(c)){
358 		lastdol = 0;
359 		addutf(tok, c);
360 		return c;
361 	}
362 	for(;;){
363 		if(c=='*' || c=='[' || c=='?' || c==GLOB)
364 			w = addutf(w, GLOB);
365 		w = addutf(w, c);
366 		c = nextc();
367 		if(lastdol?!idchr(c):!wordchr(c)) break;
368 		advance();
369 	}
370 	lastword = 1;
371 	lastdol = 0;
372 	if(w!=0)
373 		*w='\0';
374 	t = klook(tok);
375 	if(t->type!=WORD)
376 		lastword = 0;
377 	t->quoted = 0;
378 	yylval.tree = t;
379 	return t->type;
380 }
381