xref: /plan9/sys/src/cmd/lex/parser.y (revision 169d3509e3830be480b499459c3245b23637fe07)
1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2 %left SCON '/' NEWE
3 %left '|'
4 %left '$' '^'
5 %left CHAR CCL NCCL '(' '.' STR NULLS
6 %left ITER
7 %left CAT
8 %left '*' '+' '?'
9 
10 %{
11 # include "ldefs.h"
12 #define YYSTYPE union _yystype_
13 union _yystype_
14 {
15 	int	i;
16 	uchar	*cp;
17 };
18 %}
19 %%
20 %{
21 int i;
22 int j,k;
23 int g;
24 uchar *p;
25 %}
26 acc	:	lexinput
27 	={
28 # ifdef DEBUG
29 		if(debug) sect2dump();
30 # endif
31 	}
32 	;
33 lexinput:	defns delim prods end
34 	|	defns delim end
35 	={
36 		if(!funcflag)phead2();
37 		funcflag = TRUE;
38 	}
39 	| error
40 	={
41 # ifdef DEBUG
42 		if(debug) {
43 			sect1dump();
44 			sect2dump();
45 			}
46 # endif
47 		}
48 	;
49 end:		delim | ;
50 defns:	defns STR STR
51 	={	strcpy((char*)dp,(char*)$2.cp);
52 		def[dptr] = dp;
53 		dp += strlen((char*)$2.cp) + 1;
54 		strcpy((char*)dp,(char*)$3.cp);
55 		subs[dptr++] = dp;
56 		if(dptr >= DEFSIZE)
57 			error("Too many definitions");
58 		dp += strlen((char*)$3.cp) + 1;
59 		if(dp >= dchar+DEFCHAR)
60 			error("Definitions too long");
61 		subs[dptr]=def[dptr]=0;	/* for lookup - require ending null */
62 	}
63 	|
64 	;
65 delim:	DELIM
66 	={
67 # ifdef DEBUG
68 		if(sect == DEFSECTION && debug) sect1dump();
69 # endif
70 		sect++;
71 		}
72 	;
73 prods:	prods pr
74 	={	$$.i = mn2(RNEWE,$1.i,$2.i);
75 		}
76 	|	pr
77 	={	$$.i = $1.i;}
78 	;
79 pr:	r NEWE
80 	={
81 		if(divflg == TRUE)
82 			i = mn1(S1FINAL,casecount);
83 		else i = mn1(FINAL,casecount);
84 		$$.i = mn2(RCAT,$1.i,i);
85 		divflg = FALSE;
86 		casecount++;
87 		}
88 	| error NEWE
89 	={
90 # ifdef DEBUG
91 		if(debug) sect2dump();
92 # endif
93 		}
94 r:	CHAR
95 	={	$$.i = mn0($1.i); }
96 	| STR
97 	={
98 		p = $1.cp;
99 		i = mn0(*p++);
100 		while(*p)
101 			i = mn2(RSTR,i,*p++);
102 		$$.i = i;
103 		}
104 	| '.'
105 	={	symbol['\n'] = 0;
106 		if(psave == FALSE){
107 			p = ccptr;
108 			psave = ccptr;
109 			for(i=1;i<'\n';i++){
110 				symbol[i] = 1;
111 				*ccptr++ = i;
112 				}
113 			for(i='\n'+1;i<NCH;i++){
114 				symbol[i] = 1;
115 				*ccptr++ = i;
116 				}
117 			*ccptr++ = 0;
118 			if(ccptr > ccl+CCLSIZE)
119 				error("Too many large character classes");
120 			}
121 		else
122 			p = psave;
123 		$$.i = mnp(RCCL, p);
124 		cclinter(1);
125 		}
126 	| CCL
127 	={	$$.i = mnp(RCCL,$1.cp); }
128 	| NCCL
129 	={	$$.i = mnp(RNCCL,$1.cp); }
130 	| r '*'
131 	={	$$.i = mn1(STAR,$1.i); }
132 	| r '+'
133 	={	$$.i = mn1(PLUS,$1.i); }
134 	| r '?'
135 	={	$$.i = mn1(QUEST,$1.i); }
136 	| r '|' r
137 	={	$$.i = mn2(BAR,$1.i,$3.i); }
138 	| r r %prec CAT
139 	={	$$.i = mn2(RCAT,$1.i,$2.i); }
140 	| r '/' r
141 	={	if(!divflg){
142 			j = mn1(S2FINAL,-casecount);
143 			i = mn2(RCAT,$1.i,j);
144 			$$.i = mn2(DIV,i,$3.i);
145 			}
146 		else {
147 			$$.i = mn2(RCAT,$1.i,$3.i);
148 			warning("Extra slash removed");
149 			}
150 		divflg = TRUE;
151 		}
152 	| r ITER ',' ITER '}'
153 	={	if($2.i > $4.i){
154 			i = $2.i;
155 			$2.i = $4.i;
156 			$4.i = i;
157 			}
158 		if($4.i <= 0)
159 			warning("Iteration range must be positive");
160 		else {
161 			j = $1.i;
162 			for(k = 2; k<=$2.i;k++)
163 				j = mn2(RCAT,j,dupl($1.i));
164 			for(i = $2.i+1; i<=$4.i; i++){
165 				g = dupl($1.i);
166 				for(k=2;k<=i;k++)
167 					g = mn2(RCAT,g,dupl($1.i));
168 				j = mn2(BAR,j,g);
169 				}
170 			$$.i = j;
171 			}
172 	}
173 	| r ITER '}'
174 	={
175 		if($2.i < 0)warning("Can't have negative iteration");
176 		else if($2.i == 0) $$.i = mn0(RNULLS);
177 		else {
178 			j = $1.i;
179 			for(k=2;k<=$2.i;k++)
180 				j = mn2(RCAT,j,dupl($1.i));
181 			$$.i = j;
182 			}
183 		}
184 	| r ITER ',' '}'
185 	={
186 				/* from n to infinity */
187 		if($2.i < 0)warning("Can't have negative iteration");
188 		else if($2.i == 0) $$.i = mn1(STAR,$1.i);
189 		else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
190 		else {		/* >= 2 iterations minimum */
191 			j = $1.i;
192 			for(k=2;k<$2.i;k++)
193 				j = mn2(RCAT,j,dupl($1.i));
194 			k = mn1(PLUS,dupl($1.i));
195 			$$.i = mn2(RCAT,j,k);
196 			}
197 		}
198 	| SCON r
199 	={	$$.i = mn2(RSCON,$2.i,(uintptr)$1.cp); }
200 	| '^' r
201 	={	$$.i = mn1(CARAT,$2.i); }
202 	| r '$'
203 	={	i = mn0('\n');
204 		if(!divflg){
205 			j = mn1(S2FINAL,-casecount);
206 			k = mn2(RCAT,$1.i,j);
207 			$$.i = mn2(DIV,k,i);
208 			}
209 		else $$.i = mn2(RCAT,$1.i,i);
210 		divflg = TRUE;
211 		}
212 	| '(' r ')'
213 	={	$$.i = $2.i; }
214 	|	NULLS
215 	={	$$.i = mn0(RNULLS); }
216 	;
217 %%
218 int
219 yylex(void)
220 {
221 	uchar *p;
222 	int c, i;
223 	uchar  *t, *xp;
224 	int n, j, k, x;
225 	static int sectbegin;
226 	static uchar token[TOKENSIZE];
227 	static int iter;
228 
229 # ifdef DEBUG
230 	yylval.i = 0;
231 	yylval.p = 0;
232 # endif
233 
234 	if(sect == DEFSECTION) {		/* definitions section */
235 		while(!eof) {
236 			if(prev == '\n'){		/* next char is at beginning of line */
237 				getl(p=buf);
238 				switch(*p){
239 				case '%':
240 					switch(*(p+1)){
241 					case '%':
242 						lgate();
243 						Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
244 						Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
245 						sectbegin = TRUE;
246 						i = treesize*(sizeof(*name)+sizeof(*left)+
247 							sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
248 						p = myalloc(i,1);
249 						if(p == 0)
250 							error("Too little core for parse tree");
251 						free(p);
252 						name = myalloc(treesize,sizeof(*name));
253 						left = myalloc(treesize,sizeof(*left));
254 						right = myalloc(treesize,sizeof(*right));
255 						nullstr = myalloc(treesize,sizeof(*nullstr));
256 						parent = myalloc(treesize,sizeof(*parent));
257 						ptr = myalloc(treesize,sizeof(*ptr));
258 						if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0)
259 							error("Too little core for parse tree");
260 						return(freturn(DELIM));
261 					case 'p': case 'P':	/* has overridden number of positions */
262 						while(*p && !isdigit(*p))p++;
263 						maxpos = atol((char*)p);
264 # ifdef DEBUG
265 						if (debug) print("positions (%%p) now %d\n",maxpos);
266 # endif
267 						if(report == 2)report = 1;
268 						continue;
269 					case 'n': case 'N':	/* has overridden number of states */
270 						while(*p && !isdigit(*p))p++;
271 						nstates = atol((char*)p);
272 # ifdef DEBUG
273 						if(debug)print( " no. states (%%n) now %d\n",nstates);
274 # endif
275 						if(report == 2)report = 1;
276 						continue;
277 					case 'e': case 'E':		/* has overridden number of tree nodes */
278 						while(*p && !isdigit(*p))p++;
279 						treesize = atol((char*)p);
280 # ifdef DEBUG
281 						if (debug) print("treesize (%%e) now %d\n",treesize);
282 # endif
283 						if(report == 2)report = 1;
284 						continue;
285 					case 'o': case 'O':
286 						while (*p && !isdigit(*p))p++;
287 						outsize = atol((char*)p);
288 						if (report ==2) report=1;
289 						continue;
290 					case 'a': case 'A':		/* has overridden number of transitions */
291 						while(*p && !isdigit(*p))p++;
292 						if(report == 2)report = 1;
293 						ntrans = atol((char*)p);
294 # ifdef DEBUG
295 						if (debug)print("N. trans (%%a) now %d\n",ntrans);
296 # endif
297 						continue;
298 					case 'k': case 'K': /* overriden packed char classes */
299 						while (*p && !isdigit(*p))p++;
300 						if (report==2) report=1;
301 						free(pchar);
302 						pchlen = atol((char*)p);
303 # ifdef DEBUG
304 						if (debug) print( "Size classes (%%k) now %d\n",pchlen);
305 # endif
306 						pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
307 						continue;
308 					case '{':
309 						lgate();
310 						while(getl(p) && strcmp((char*)p,"%}") != 0)
311 							Bprint(&fout, "%s\n",(char*)p);
312 						if(p[0] == '%') continue;
313 						error("Premature eof");
314 					case 's': case 'S':		/* start conditions */
315 						lgate();
316 						while(*p && strchr(" \t,", *p) == 0) p++;
317 						n = TRUE;
318 						while(n){
319 							while(*p && strchr(" \t,", *p)) p++;
320 							t = p;
321 							while(*p && strchr(" \t,", *p) == 0)p++;
322 							if(!*p) n = FALSE;
323 							*p++ = 0;
324 							if (*t == 0) continue;
325 							i = sptr*2;
326 							Bprint(&fout,"#define %s %d\n",(char*)t,i);
327 							strcpy((char*)sp, (char*)t);
328 							sname[sptr++] = sp;
329 							sname[sptr] = 0;	/* required by lookup */
330 							if(sptr >= STARTSIZE)
331 								error("Too many start conditions");
332 							sp += strlen((char*)sp) + 1;
333 							if(sp >= stchar+STARTCHAR)
334 								error("Start conditions too long");
335 						}
336 						continue;
337 					default:
338 						warning("Invalid request %s",p);
339 						continue;
340 					}	/* end of switch after seeing '%' */
341 				case ' ': case '\t':		/* must be code */
342 					lgate();
343 					Bprint(&fout, "%s\n",(char*)p);
344 					continue;
345 				default:		/* definition */
346 					while(*p && !isspace(*p)) p++;
347 					if(*p == 0)
348 						continue;
349 					prev = *p;
350 					*p = 0;
351 					bptr = p+1;
352 					yylval.cp = buf;
353 					if(isdigit(buf[0]))
354 						warning("Substitution strings may not begin with digits");
355 					return(freturn(STR));
356 				}
357 			}
358 			/* still sect 1, but prev != '\n' */
359 			else {
360 				p = bptr;
361 				while(*p && isspace(*p)) p++;
362 				if(*p == 0)
363 					warning("No translation given - null string assumed");
364 				strcpy((char*)token, (char*)p);
365 				yylval.cp = token;
366 				prev = '\n';
367 				return(freturn(STR));
368 			}
369 		}
370 		/* end of section one processing */
371 	} else if(sect == RULESECTION){		/* rules and actions */
372 		while(!eof){
373 			switch(c=gch()){
374 			case '\0':
375 				return(freturn(0));
376 			case '\n':
377 				if(prev == '\n') continue;
378 				x = NEWE;
379 				break;
380 			case ' ':
381 			case '\t':
382 				if(sectbegin == TRUE){
383 					cpyact();
384 					while((c=gch()) && c != '\n');
385 					continue;
386 				}
387 				if(!funcflag)phead2();
388 				funcflag = TRUE;
389 				Bprint(&fout,"case %d:\n",casecount);
390 				if(cpyact())
391 					Bprint(&fout,"break;\n");
392 				while((c=gch()) && c != '\n');
393 				if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
394 					warning("Executable statements should occur right after %%");
395 					continue;
396 				}
397 				x = NEWE;
398 				break;
399 			case '%':
400 				if(prev != '\n') goto character;
401 				if(peek == '{'){	/* included code */
402 					getl(buf);
403 					while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
404 						Bprint(&fout,"%s\n",(char*)buf);
405 					continue;
406 				}
407 				if(peek == '%'){
408 					gch();
409 					gch();
410 					x = DELIM;
411 					break;
412 				}
413 				goto character;
414 			case '|':
415 				if(peek == ' ' || peek == '\t' || peek == '\n'){
416 					Bprint(&fout,"%d\n",30000+casecount++);
417 					continue;
418 				}
419 				x = '|';
420 				break;
421 			case '$':
422 				if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
423 					x = c;
424 					break;
425 				}
426 				goto character;
427 			case '^':
428 				if(prev != '\n' && scon != TRUE) goto character;	/* valid only at line begin */
429 				x = c;
430 				break;
431 			case '?':
432 			case '+':
433 			case '.':
434 			case '*':
435 			case '(':
436 			case ')':
437 			case ',':
438 			case '/':
439 				x = c;
440 				break;
441 			case '}':
442 				iter = FALSE;
443 				x = c;
444 				break;
445 			case '{':	/* either iteration or definition */
446 				if(isdigit(c=gch())){	/* iteration */
447 					iter = TRUE;
448 				ieval:
449 					i = 0;
450 					while(isdigit(c)){
451 						token[i++] = c;
452 						c = gch();
453 					}
454 					token[i] = 0;
455 					yylval.i = atol((char*)token);
456 					munputc(c);
457 					x = ITER;
458 					break;
459 				} else {		/* definition */
460 					i = 0;
461 					while(c && c!='}'){
462 						token[i++] = c;
463 						c = gch();
464 					}
465 					token[i] = 0;
466 					i = lookup(token,def);
467 					if(i < 0)
468 						warning("Definition %s not found",token);
469 					else
470 						munputs(subs[i]);
471 					continue;
472 				}
473 			case '<':		/* start condition ? */
474 				if(prev != '\n')		/* not at line begin, not start */
475 					goto character;
476 				t = slptr;
477 				do {
478 					i = 0;
479 					c = gch();
480 					while(c != ',' && c && c != '>'){
481 						token[i++] = c;
482 						c = gch();
483 					}
484 					token[i] = 0;
485 					if(i == 0)
486 						goto character;
487 					i = lookup(token,sname);
488 					if(i < 0) {
489 						warning("Undefined start condition %s",token);
490 						continue;
491 					}
492 					*slptr++ = i+1;
493 				} while(c && c != '>');
494 				*slptr++ = 0;
495 				/* check if previous value re-usable */
496 				for (xp=slist; xp<t; ){
497 					if (strcmp((char*)xp, (char*)t)==0)
498 						break;
499 					while (*xp++);
500 				}
501 				if (xp<t){
502 					/* re-use previous pointer to string */
503 					slptr=t;
504 					t=xp;
505 				}
506 				if(slptr > slist+STARTSIZE)		/* note not packed ! */
507 					error("Too many start conditions used");
508 				yylval.cp = t;
509 				x = SCON;
510 				break;
511 			case '"':
512 				i = 0;
513 				while((c=gch()) && c != '"' && c != '\n'){
514 					if(c == '\\') c = usescape(gch());
515 					token[i++] = c;
516 					if(i > TOKENSIZE){
517 						warning("String too long");
518 						i = TOKENSIZE-1;
519 						break;
520 					}
521 				}
522 				if(c == '\n') {
523 					yyline--;
524 					warning("Non-terminated string");
525 					yyline++;
526 				}
527 				token[i] = 0;
528 				if(i == 0)x = NULLS;
529 				else if(i == 1){
530 					yylval.i = token[0];
531 					x = CHAR;
532 				} else {
533 					yylval.cp = token;
534 					x = STR;
535 				}
536 				break;
537 			case '[':
538 				for(i=1;i<NCH;i++) symbol[i] = 0;
539 				x = CCL;
540 				if((c = gch()) == '^'){
541 					x = NCCL;
542 					c = gch();
543 				}
544 				while(c != ']' && c){
545 					if(c == '\\') c = usescape(gch());
546 					symbol[c] = 1;
547 					j = c;
548 					if((c=gch()) == '-' && peek != ']'){		/* range specified */
549 						c = gch();
550 						if(c == '\\') c = usescape(gch());
551 						k = c;
552 						if(j > k) {
553 							n = j;
554 							j = k;
555 							k = n;
556 						}
557 						if(!(('A' <= j && k <= 'Z') ||
558 						     ('a' <= j && k <= 'z') ||
559 						     ('0' <= j && k <= '9')))
560 							warning("Non-portable Character Class");
561 						for(n=j+1;n<=k;n++)
562 							symbol[n] = 1;		/* implementation dependent */
563 						c = gch();
564 					}
565 				}
566 				/* try to pack ccl's */
567 				i = 0;
568 				for(j=0;j<NCH;j++)
569 					if(symbol[j])token[i++] = j;
570 				token[i] = 0;
571 				p = ccl;
572 				while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
573 				if(p < ccptr)	/* found it */
574 					yylval.cp = p;
575 				else {
576 					yylval.cp = ccptr;
577 					strcpy((char*)ccptr,(char*)token);
578 					ccptr += strlen((char*)token) + 1;
579 					if(ccptr >= ccl+CCLSIZE)
580 						error("Too many large character classes");
581 				}
582 				cclinter(x==CCL);
583 				break;
584 			case '\\':
585 				c = usescape(gch());
586 			default:
587 			character:
588 				if(iter){	/* second part of an iteration */
589 					iter = FALSE;
590 					if('0' <= c && c <= '9')
591 						goto ieval;
592 				}
593 				if(isalpha(peek)){
594 					i = 0;
595 					yylval.cp = token;
596 					token[i++] = c;
597 					while(isalpha(peek))
598 						token[i++] = gch();
599 					if(peek == '?' || peek == '*' || peek == '+')
600 						munputc(token[--i]);
601 					token[i] = 0;
602 					if(i == 1){
603 						yylval.i = token[0];
604 						x = CHAR;
605 					}
606 					else x = STR;
607 				} else {
608 					yylval.i = c;
609 					x = CHAR;
610 				}
611 			}
612 			scon = FALSE;
613 			if(x == SCON)scon = TRUE;
614 			sectbegin = FALSE;
615 			return(freturn(x));
616 		}
617 	}
618 	/* section three */
619 	ptail();
620 # ifdef DEBUG
621 	if(debug)
622 		Bprint(&fout,"\n/*this comes from section three - debug */\n");
623 # endif
624 	while(getl(buf) && !eof)
625 		Bprint(&fout,"%s\n",(char*)buf);
626 	return(freturn(0));
627 }
628 /* end of yylex */
629 # ifdef DEBUG
630 int
freturn(int i)631 freturn(int i)
632 {
633 	if(yydebug) {
634 		print("now return ");
635 		if(i < NCH) allprint(i);
636 		else print("%d",i);
637 		print("   yylval = ");
638 		switch(i){
639 			case STR: case CCL: case NCCL:
640 				strpt(yylval.cp);
641 				break;
642 			case CHAR:
643 				allprint(yylval.i);
644 				break;
645 			default:
646 				print("%d",yylval.i);
647 				break;
648 		}
649 		print("\n");
650 	}
651 	return(i);
652 }
653 # endif
654