1 %token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
2 %left SCON '/' NEWE
3 %left '|'
4 %left '$' '^'
5 %left CHAR CCL NCCL '(' '.' STR NULLS
6 %left ITER
7 %left CAT
8 %left '*' '+' '?'
9
10 %{
11 # include "ldefs.h"
12 #define YYSTYPE union _yystype_
13 union _yystype_
14 {
15 int i;
16 uchar *cp;
17 };
18 %}
19 %%
20 %{
21 int i;
22 int j,k;
23 int g;
24 uchar *p;
25 %}
26 acc : lexinput
27 ={
28 # ifdef DEBUG
29 if(debug) sect2dump();
30 # endif
31 }
32 ;
33 lexinput: defns delim prods end
34 | defns delim end
35 ={
36 if(!funcflag)phead2();
37 funcflag = TRUE;
38 }
39 | error
40 ={
41 # ifdef DEBUG
42 if(debug) {
43 sect1dump();
44 sect2dump();
45 }
46 # endif
47 }
48 ;
49 end: delim | ;
50 defns: defns STR STR
51 ={ strcpy((char*)dp,(char*)$2.cp);
52 def[dptr] = dp;
53 dp += strlen((char*)$2.cp) + 1;
54 strcpy((char*)dp,(char*)$3.cp);
55 subs[dptr++] = dp;
56 if(dptr >= DEFSIZE)
57 error("Too many definitions");
58 dp += strlen((char*)$3.cp) + 1;
59 if(dp >= dchar+DEFCHAR)
60 error("Definitions too long");
61 subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
62 }
63 |
64 ;
65 delim: DELIM
66 ={
67 # ifdef DEBUG
68 if(sect == DEFSECTION && debug) sect1dump();
69 # endif
70 sect++;
71 }
72 ;
73 prods: prods pr
74 ={ $$.i = mn2(RNEWE,$1.i,$2.i);
75 }
76 | pr
77 ={ $$.i = $1.i;}
78 ;
79 pr: r NEWE
80 ={
81 if(divflg == TRUE)
82 i = mn1(S1FINAL,casecount);
83 else i = mn1(FINAL,casecount);
84 $$.i = mn2(RCAT,$1.i,i);
85 divflg = FALSE;
86 casecount++;
87 }
88 | error NEWE
89 ={
90 # ifdef DEBUG
91 if(debug) sect2dump();
92 # endif
93 }
94 r: CHAR
95 ={ $$.i = mn0($1.i); }
96 | STR
97 ={
98 p = $1.cp;
99 i = mn0(*p++);
100 while(*p)
101 i = mn2(RSTR,i,*p++);
102 $$.i = i;
103 }
104 | '.'
105 ={ symbol['\n'] = 0;
106 if(psave == FALSE){
107 p = ccptr;
108 psave = ccptr;
109 for(i=1;i<'\n';i++){
110 symbol[i] = 1;
111 *ccptr++ = i;
112 }
113 for(i='\n'+1;i<NCH;i++){
114 symbol[i] = 1;
115 *ccptr++ = i;
116 }
117 *ccptr++ = 0;
118 if(ccptr > ccl+CCLSIZE)
119 error("Too many large character classes");
120 }
121 else
122 p = psave;
123 $$.i = mnp(RCCL, p);
124 cclinter(1);
125 }
126 | CCL
127 ={ $$.i = mnp(RCCL,$1.cp); }
128 | NCCL
129 ={ $$.i = mnp(RNCCL,$1.cp); }
130 | r '*'
131 ={ $$.i = mn1(STAR,$1.i); }
132 | r '+'
133 ={ $$.i = mn1(PLUS,$1.i); }
134 | r '?'
135 ={ $$.i = mn1(QUEST,$1.i); }
136 | r '|' r
137 ={ $$.i = mn2(BAR,$1.i,$3.i); }
138 | r r %prec CAT
139 ={ $$.i = mn2(RCAT,$1.i,$2.i); }
140 | r '/' r
141 ={ if(!divflg){
142 j = mn1(S2FINAL,-casecount);
143 i = mn2(RCAT,$1.i,j);
144 $$.i = mn2(DIV,i,$3.i);
145 }
146 else {
147 $$.i = mn2(RCAT,$1.i,$3.i);
148 warning("Extra slash removed");
149 }
150 divflg = TRUE;
151 }
152 | r ITER ',' ITER '}'
153 ={ if($2.i > $4.i){
154 i = $2.i;
155 $2.i = $4.i;
156 $4.i = i;
157 }
158 if($4.i <= 0)
159 warning("Iteration range must be positive");
160 else {
161 j = $1.i;
162 for(k = 2; k<=$2.i;k++)
163 j = mn2(RCAT,j,dupl($1.i));
164 for(i = $2.i+1; i<=$4.i; i++){
165 g = dupl($1.i);
166 for(k=2;k<=i;k++)
167 g = mn2(RCAT,g,dupl($1.i));
168 j = mn2(BAR,j,g);
169 }
170 $$.i = j;
171 }
172 }
173 | r ITER '}'
174 ={
175 if($2.i < 0)warning("Can't have negative iteration");
176 else if($2.i == 0) $$.i = mn0(RNULLS);
177 else {
178 j = $1.i;
179 for(k=2;k<=$2.i;k++)
180 j = mn2(RCAT,j,dupl($1.i));
181 $$.i = j;
182 }
183 }
184 | r ITER ',' '}'
185 ={
186 /* from n to infinity */
187 if($2.i < 0)warning("Can't have negative iteration");
188 else if($2.i == 0) $$.i = mn1(STAR,$1.i);
189 else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
190 else { /* >= 2 iterations minimum */
191 j = $1.i;
192 for(k=2;k<$2.i;k++)
193 j = mn2(RCAT,j,dupl($1.i));
194 k = mn1(PLUS,dupl($1.i));
195 $$.i = mn2(RCAT,j,k);
196 }
197 }
198 | SCON r
199 ={ $$.i = mn2(RSCON,$2.i,(uintptr)$1.cp); }
200 | '^' r
201 ={ $$.i = mn1(CARAT,$2.i); }
202 | r '$'
203 ={ i = mn0('\n');
204 if(!divflg){
205 j = mn1(S2FINAL,-casecount);
206 k = mn2(RCAT,$1.i,j);
207 $$.i = mn2(DIV,k,i);
208 }
209 else $$.i = mn2(RCAT,$1.i,i);
210 divflg = TRUE;
211 }
212 | '(' r ')'
213 ={ $$.i = $2.i; }
214 | NULLS
215 ={ $$.i = mn0(RNULLS); }
216 ;
217 %%
218 int
219 yylex(void)
220 {
221 uchar *p;
222 int c, i;
223 uchar *t, *xp;
224 int n, j, k, x;
225 static int sectbegin;
226 static uchar token[TOKENSIZE];
227 static int iter;
228
229 # ifdef DEBUG
230 yylval.i = 0;
231 yylval.p = 0;
232 # endif
233
234 if(sect == DEFSECTION) { /* definitions section */
235 while(!eof) {
236 if(prev == '\n'){ /* next char is at beginning of line */
237 getl(p=buf);
238 switch(*p){
239 case '%':
240 switch(*(p+1)){
241 case '%':
242 lgate();
243 Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
244 Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
245 sectbegin = TRUE;
246 i = treesize*(sizeof(*name)+sizeof(*left)+
247 sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
248 p = myalloc(i,1);
249 if(p == 0)
250 error("Too little core for parse tree");
251 free(p);
252 name = myalloc(treesize,sizeof(*name));
253 left = myalloc(treesize,sizeof(*left));
254 right = myalloc(treesize,sizeof(*right));
255 nullstr = myalloc(treesize,sizeof(*nullstr));
256 parent = myalloc(treesize,sizeof(*parent));
257 ptr = myalloc(treesize,sizeof(*ptr));
258 if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0 || ptr == 0)
259 error("Too little core for parse tree");
260 return(freturn(DELIM));
261 case 'p': case 'P': /* has overridden number of positions */
262 while(*p && !isdigit(*p))p++;
263 maxpos = atol((char*)p);
264 # ifdef DEBUG
265 if (debug) print("positions (%%p) now %d\n",maxpos);
266 # endif
267 if(report == 2)report = 1;
268 continue;
269 case 'n': case 'N': /* has overridden number of states */
270 while(*p && !isdigit(*p))p++;
271 nstates = atol((char*)p);
272 # ifdef DEBUG
273 if(debug)print( " no. states (%%n) now %d\n",nstates);
274 # endif
275 if(report == 2)report = 1;
276 continue;
277 case 'e': case 'E': /* has overridden number of tree nodes */
278 while(*p && !isdigit(*p))p++;
279 treesize = atol((char*)p);
280 # ifdef DEBUG
281 if (debug) print("treesize (%%e) now %d\n",treesize);
282 # endif
283 if(report == 2)report = 1;
284 continue;
285 case 'o': case 'O':
286 while (*p && !isdigit(*p))p++;
287 outsize = atol((char*)p);
288 if (report ==2) report=1;
289 continue;
290 case 'a': case 'A': /* has overridden number of transitions */
291 while(*p && !isdigit(*p))p++;
292 if(report == 2)report = 1;
293 ntrans = atol((char*)p);
294 # ifdef DEBUG
295 if (debug)print("N. trans (%%a) now %d\n",ntrans);
296 # endif
297 continue;
298 case 'k': case 'K': /* overriden packed char classes */
299 while (*p && !isdigit(*p))p++;
300 if (report==2) report=1;
301 free(pchar);
302 pchlen = atol((char*)p);
303 # ifdef DEBUG
304 if (debug) print( "Size classes (%%k) now %d\n",pchlen);
305 # endif
306 pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
307 continue;
308 case '{':
309 lgate();
310 while(getl(p) && strcmp((char*)p,"%}") != 0)
311 Bprint(&fout, "%s\n",(char*)p);
312 if(p[0] == '%') continue;
313 error("Premature eof");
314 case 's': case 'S': /* start conditions */
315 lgate();
316 while(*p && strchr(" \t,", *p) == 0) p++;
317 n = TRUE;
318 while(n){
319 while(*p && strchr(" \t,", *p)) p++;
320 t = p;
321 while(*p && strchr(" \t,", *p) == 0)p++;
322 if(!*p) n = FALSE;
323 *p++ = 0;
324 if (*t == 0) continue;
325 i = sptr*2;
326 Bprint(&fout,"#define %s %d\n",(char*)t,i);
327 strcpy((char*)sp, (char*)t);
328 sname[sptr++] = sp;
329 sname[sptr] = 0; /* required by lookup */
330 if(sptr >= STARTSIZE)
331 error("Too many start conditions");
332 sp += strlen((char*)sp) + 1;
333 if(sp >= stchar+STARTCHAR)
334 error("Start conditions too long");
335 }
336 continue;
337 default:
338 warning("Invalid request %s",p);
339 continue;
340 } /* end of switch after seeing '%' */
341 case ' ': case '\t': /* must be code */
342 lgate();
343 Bprint(&fout, "%s\n",(char*)p);
344 continue;
345 default: /* definition */
346 while(*p && !isspace(*p)) p++;
347 if(*p == 0)
348 continue;
349 prev = *p;
350 *p = 0;
351 bptr = p+1;
352 yylval.cp = buf;
353 if(isdigit(buf[0]))
354 warning("Substitution strings may not begin with digits");
355 return(freturn(STR));
356 }
357 }
358 /* still sect 1, but prev != '\n' */
359 else {
360 p = bptr;
361 while(*p && isspace(*p)) p++;
362 if(*p == 0)
363 warning("No translation given - null string assumed");
364 strcpy((char*)token, (char*)p);
365 yylval.cp = token;
366 prev = '\n';
367 return(freturn(STR));
368 }
369 }
370 /* end of section one processing */
371 } else if(sect == RULESECTION){ /* rules and actions */
372 while(!eof){
373 switch(c=gch()){
374 case '\0':
375 return(freturn(0));
376 case '\n':
377 if(prev == '\n') continue;
378 x = NEWE;
379 break;
380 case ' ':
381 case '\t':
382 if(sectbegin == TRUE){
383 cpyact();
384 while((c=gch()) && c != '\n');
385 continue;
386 }
387 if(!funcflag)phead2();
388 funcflag = TRUE;
389 Bprint(&fout,"case %d:\n",casecount);
390 if(cpyact())
391 Bprint(&fout,"break;\n");
392 while((c=gch()) && c != '\n');
393 if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
394 warning("Executable statements should occur right after %%");
395 continue;
396 }
397 x = NEWE;
398 break;
399 case '%':
400 if(prev != '\n') goto character;
401 if(peek == '{'){ /* included code */
402 getl(buf);
403 while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
404 Bprint(&fout,"%s\n",(char*)buf);
405 continue;
406 }
407 if(peek == '%'){
408 gch();
409 gch();
410 x = DELIM;
411 break;
412 }
413 goto character;
414 case '|':
415 if(peek == ' ' || peek == '\t' || peek == '\n'){
416 Bprint(&fout,"%d\n",30000+casecount++);
417 continue;
418 }
419 x = '|';
420 break;
421 case '$':
422 if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
423 x = c;
424 break;
425 }
426 goto character;
427 case '^':
428 if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
429 x = c;
430 break;
431 case '?':
432 case '+':
433 case '.':
434 case '*':
435 case '(':
436 case ')':
437 case ',':
438 case '/':
439 x = c;
440 break;
441 case '}':
442 iter = FALSE;
443 x = c;
444 break;
445 case '{': /* either iteration or definition */
446 if(isdigit(c=gch())){ /* iteration */
447 iter = TRUE;
448 ieval:
449 i = 0;
450 while(isdigit(c)){
451 token[i++] = c;
452 c = gch();
453 }
454 token[i] = 0;
455 yylval.i = atol((char*)token);
456 munputc(c);
457 x = ITER;
458 break;
459 } else { /* definition */
460 i = 0;
461 while(c && c!='}'){
462 token[i++] = c;
463 c = gch();
464 }
465 token[i] = 0;
466 i = lookup(token,def);
467 if(i < 0)
468 warning("Definition %s not found",token);
469 else
470 munputs(subs[i]);
471 continue;
472 }
473 case '<': /* start condition ? */
474 if(prev != '\n') /* not at line begin, not start */
475 goto character;
476 t = slptr;
477 do {
478 i = 0;
479 c = gch();
480 while(c != ',' && c && c != '>'){
481 token[i++] = c;
482 c = gch();
483 }
484 token[i] = 0;
485 if(i == 0)
486 goto character;
487 i = lookup(token,sname);
488 if(i < 0) {
489 warning("Undefined start condition %s",token);
490 continue;
491 }
492 *slptr++ = i+1;
493 } while(c && c != '>');
494 *slptr++ = 0;
495 /* check if previous value re-usable */
496 for (xp=slist; xp<t; ){
497 if (strcmp((char*)xp, (char*)t)==0)
498 break;
499 while (*xp++);
500 }
501 if (xp<t){
502 /* re-use previous pointer to string */
503 slptr=t;
504 t=xp;
505 }
506 if(slptr > slist+STARTSIZE) /* note not packed ! */
507 error("Too many start conditions used");
508 yylval.cp = t;
509 x = SCON;
510 break;
511 case '"':
512 i = 0;
513 while((c=gch()) && c != '"' && c != '\n'){
514 if(c == '\\') c = usescape(gch());
515 token[i++] = c;
516 if(i > TOKENSIZE){
517 warning("String too long");
518 i = TOKENSIZE-1;
519 break;
520 }
521 }
522 if(c == '\n') {
523 yyline--;
524 warning("Non-terminated string");
525 yyline++;
526 }
527 token[i] = 0;
528 if(i == 0)x = NULLS;
529 else if(i == 1){
530 yylval.i = token[0];
531 x = CHAR;
532 } else {
533 yylval.cp = token;
534 x = STR;
535 }
536 break;
537 case '[':
538 for(i=1;i<NCH;i++) symbol[i] = 0;
539 x = CCL;
540 if((c = gch()) == '^'){
541 x = NCCL;
542 c = gch();
543 }
544 while(c != ']' && c){
545 if(c == '\\') c = usescape(gch());
546 symbol[c] = 1;
547 j = c;
548 if((c=gch()) == '-' && peek != ']'){ /* range specified */
549 c = gch();
550 if(c == '\\') c = usescape(gch());
551 k = c;
552 if(j > k) {
553 n = j;
554 j = k;
555 k = n;
556 }
557 if(!(('A' <= j && k <= 'Z') ||
558 ('a' <= j && k <= 'z') ||
559 ('0' <= j && k <= '9')))
560 warning("Non-portable Character Class");
561 for(n=j+1;n<=k;n++)
562 symbol[n] = 1; /* implementation dependent */
563 c = gch();
564 }
565 }
566 /* try to pack ccl's */
567 i = 0;
568 for(j=0;j<NCH;j++)
569 if(symbol[j])token[i++] = j;
570 token[i] = 0;
571 p = ccl;
572 while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
573 if(p < ccptr) /* found it */
574 yylval.cp = p;
575 else {
576 yylval.cp = ccptr;
577 strcpy((char*)ccptr,(char*)token);
578 ccptr += strlen((char*)token) + 1;
579 if(ccptr >= ccl+CCLSIZE)
580 error("Too many large character classes");
581 }
582 cclinter(x==CCL);
583 break;
584 case '\\':
585 c = usescape(gch());
586 default:
587 character:
588 if(iter){ /* second part of an iteration */
589 iter = FALSE;
590 if('0' <= c && c <= '9')
591 goto ieval;
592 }
593 if(isalpha(peek)){
594 i = 0;
595 yylval.cp = token;
596 token[i++] = c;
597 while(isalpha(peek))
598 token[i++] = gch();
599 if(peek == '?' || peek == '*' || peek == '+')
600 munputc(token[--i]);
601 token[i] = 0;
602 if(i == 1){
603 yylval.i = token[0];
604 x = CHAR;
605 }
606 else x = STR;
607 } else {
608 yylval.i = c;
609 x = CHAR;
610 }
611 }
612 scon = FALSE;
613 if(x == SCON)scon = TRUE;
614 sectbegin = FALSE;
615 return(freturn(x));
616 }
617 }
618 /* section three */
619 ptail();
620 # ifdef DEBUG
621 if(debug)
622 Bprint(&fout,"\n/*this comes from section three - debug */\n");
623 # endif
624 while(getl(buf) && !eof)
625 Bprint(&fout,"%s\n",(char*)buf);
626 return(freturn(0));
627 }
628 /* end of yylex */
629 # ifdef DEBUG
630 int
freturn(int i)631 freturn(int i)
632 {
633 if(yydebug) {
634 print("now return ");
635 if(i < NCH) allprint(i);
636 else print("%d",i);
637 print(" yylval = ");
638 switch(i){
639 case STR: case CCL: case NCCL:
640 strpt(yylval.cp);
641 break;
642 case CHAR:
643 allprint(yylval.i);
644 break;
645 default:
646 print("%d",yylval.i);
647 break;
648 }
649 print("\n");
650 }
651 return(i);
652 }
653 # endif
654