1 /*-
2 * Copyright (c) 1979, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.proprietary.c%
6 */
7
8 #ifndef lint
9 static char sccsid[] = "@(#)ey2.c 8.1 (Berkeley) 06/06/93";
10 #endif /* not lint */
11
12 # include "ey.h"
13 # define IDENTIFIER 257
14 # define MARK 258
15 # define TERM 259
16 # define LEFT 260
17 # define BINARY 261
18 # define RIGHT 262
19 # define PREC 263
20 # define LCURLY 264
21 # define C_IDENTIFIER 265 /* name followed by colon */
22 # define NUMBER 266
23
24 FILE *copen();
25
setup(argc,argv)26 setup(argc,argv) int argc; char *argv[];
27 { int i,j,lev,t;
28 int c;
29
30 foutput = stdout;
31 i = 1;
32 while( argc >= 2 && argv[1][0] == '-' ) {
33 while( *++(argv[1]) ){
34 switch( *argv[1] ){
35 case 'v':
36 case 'V':
37 foutput = copen("y.output", 'w' );
38 if( foutput == 0 ) error( "cannot open y.output");
39 continue;
40 case 'o':
41 case 'O':
42 oflag = 1;
43 continue;
44 case 'r':
45 case 'R':
46 oflag = 1;
47 rflag = 1;
48 continue;
49 default: error( "illegal option: %c", *argv[1]);
50 }
51 }
52 argv++;
53 argc--;
54 }
55
56 ftable = copen( oflag ? "yacc.tmp" : "y.tab.c" , 'w' );
57 if( ftable==0 ) error( "cannot open table file" );
58 if( argc > 1 ) { cin = copen( argv[1], 'r' );
59 if( cin == 0 ) error( "cannot open input" );
60 }
61 settab();
62 fprintf( cout , "#\n");
63 ctokn = "$end";
64 defin(0); /* eof */
65 extval = 0400; /* beginning of assigned values */
66 ctokn = "error";
67 defin(0);
68 ctokn = "$accept";
69 defin(1);
70 mem=mem0;
71 cnamp = cnames;
72 lev=0;
73 i=0;
74
75 while( ( t = gettok() ) != EOF ) {
76 switch( t ){
77 case IDENTIFIER: j = chfind(0);
78 trmlev[j] = lev;
79 continue;
80 case ',':
81 case ';': continue;
82 case TERM: lev=0; continue;
83 case LEFT: lev=(++i<<3)|01; continue;
84 case BINARY: lev=(++i<<3)|02; continue;
85 case RIGHT: lev=(++i<<3)|03; continue;
86 case MARK:
87 defout();
88 if( rflag ){ /* RATFOR */
89 fprintf( cout , "define yyerrok yyerrf = 0\n" );
90 fprintf( cout , "define yyclearin yychar = -1\n" );
91 fprintf( cout , "subroutine yyactr(yyprdn)\n");
92 fprintf( cout , "common/yycomn/yylval,yyval,yypv,yyvalv(150)\n" );
93 fprintf( cout , "common/yylcom/yychar,yyerrf,yydebu\n" );
94 fprintf( cout , "integer yychar, yyerrf, yydebu\n" );
95 fprintf( cout , "integer yyprdn,yyval,yylval,yypv,yyvalv\n" );
96 }
97 else {
98 fprintf( cout , "#define yyclearin yychar = -1\n" );
99 fprintf( cout , "#define yyerrok yyerrflag = 0\n" );
100 fprintf( cout , "extern int yychar, yyerrflag;\n" );
101 fprintf( cout , "\nint yyval 0;\nint *yypv;\nint yylval 0;");
102 fprintf( cout , "\nyyactr(__np__){\n");
103 }
104 break;
105 case LCURLY: defout();
106 cpycode();
107 continue;
108 case NUMBER:
109 trmset[j].value = numbval;
110 if( j < ndefout && j>2 )
111 error("please define type # of %s earlier", trmset[j].name );
112 continue;
113 default: error("bad precedence syntax, input %d", t );
114 }
115 break;
116 }
117 prdptr[0]=mem;
118 /* added production */
119 *mem++ = NTBASE;
120 *mem++ = NTBASE+1;
121 *mem++ = 1;
122 *mem++ = 0;
123 prdptr[1]=mem;
124 i=0;
125
126 /* i is 0 when a rule can begin, 1 otherwise */
127
128 for(;;) switch( t=gettok() ) {
129 case C_IDENTIFIER: if( mem == prdptr[1] ) { /* first time */
130 if( rflag ){
131 fprintf( cout , "goto 1000\n" );
132 }
133 else fprintf( cout , "\nswitch(__np__){\n");
134 }
135 if( i != 0 ) error( "previous rule not terminated" );
136 *mem = chfind(1);
137 if( *mem < NTBASE )error( "token illegal on lhs of grammar rule" );
138 i=1;
139 ++mem;
140 continue;
141 case IDENTIFIER:
142 *mem=chfind(1);
143 if(*mem < NTBASE)levprd[nprod]=trmlev[*mem];
144 mem++;
145 if(i==0) error("missing :");
146 continue;
147 case '=': levprd[nprod] |= 04;
148 if( i==0 ) error("semicolon preceeds action");
149 fprintf( cout , rflag?"\n%d ":"\ncase %d:", nprod );
150 cpyact();
151 fprintf( cout , rflag ? " return" : " break;" );
152 case '|':
153 case ';': if(i){
154 *mem++ = -nprod;
155 prdptr[++nprod] = mem;
156 levprd[nprod]=0;
157 i=0;}
158 if (t=='|'){i=1;*mem++ = *prdptr[nprod-1];}
159 continue;
160 case 0: /* End Of File */
161 case EOF:
162 case MARK: if( i != 0 ) error( "rule not terminated before %%%% or EOF" );
163 settab();
164 finact();
165 /* copy the programs which follow the rules */
166 if( t == MARK ){
167 while (( c=fgetc( cin)) != EOF ) fputc(c,cout);
168 }
169 return;
170 case PREC:
171 if( i==0 ) error( "%%prec must appear inside rule" );
172 if( gettok()!=IDENTIFIER)error("illegal %%prec syntax" );
173 j=chfind(2);
174 if(j>=NTBASE)error("nonterminal %s illegal after %%prec", nontrst[j-NTBASE].name);
175 levprd[nprod]=trmlev[j];
176 continue;
177 case LCURLY:
178 if( i!=0 ) error( "%%{ appears within a rule" );
179 cpycode();
180 continue;
181 default: error( "syntax error, input %d", t );
182 }
183 }
184
finact()185 finact(){
186 /* finish action routine */
187 register i;
188
189 if( rflag ){
190
191 fprintf( cout , "\n1000 goto(" );
192 for( i=1; i<nprod; ++i ){
193 fprintf( cout , "%d,", (levprd[i]&04)==0?999:i );
194 }
195 fprintf( cout , "999),yyprdn\n" );
196 fprintf( cout , "999 return\nend\n" );
197 fprintf( cout , "define YYERRCODE %d\n", trmset[2].value );
198 }
199 else {
200 fprintf( cout , "\n}\n}\n" );
201 fprintf( cout , "int yyerrval %d;\n", trmset[2].value );
202 }
203 }
defin(t)204 defin(t) {
205 /* define ctokn to be a terminal if t=0
206 or a nonterminal if t=1 */
207 char *cp,*p;
208 int c;
209
210
211 if (t) {
212 if( ++nnonter >= ntlim ) error("too many nonterminals, limit %d",ntlim);
213 nontrst[nnonter].name = ctokn;
214 return( NTBASE + nnonter );
215 }
216 else {
217 if( ++nterms >= tlim ) error("too many terminals, limit %d",tlim );
218 trmset[nterms].name = ctokn;
219 if( ctokn[0]==' ' && ctokn[2]=='\0' ) /* single character literal */
220 trmset[nterms].value = ctokn[1];
221 else if ( ctokn[0]==' ' && ctokn[1]=='\\' ) { /* escape sequence */
222 if( ctokn[3] == '\0' ){ /* single character escape sequence */
223 switch ( ctokn[2] ){
224 /* character which is escaped */
225 case 'n': trmset[nterms].value = '\n'; break;
226 case 'r': trmset[nterms].value = '\r'; break;
227 case 'b': trmset[nterms].value = '\b'; break;
228 case 't': trmset[nterms].value = '\t'; break;
229 case '\'': trmset[nterms].value = '\''; break;
230 case '"': trmset[nterms].value = '"'; break;
231 case '\\': trmset[nterms].value = '\\'; break;
232 default: error( "invalid escape" );
233 }
234 }
235 else if( ctokn[2] <= '7' && ctokn[2]>='0' ){ /* \nnn sequence */
236 if( ctokn[3]<'0' || ctokn[3] > '7' || ctokn[4]<'0' ||
237 ctokn[4]>'7' || ctokn[5] != '\0' ) error("illegal \\nnn construction" );
238 trmset[nterms].value = 64*(ctokn[2]-'0')+8*(ctokn[3]-'0')+ctokn[4]-'0';
239 if( trmset[nterms].value == 0 ) error( "'\\000' is illegal" );
240 }
241 }
242 else {
243 trmset[nterms].value = extval++;
244
245 }
246 trmlev[nterms] = 0;
247 return( nterms );
248 }
249 }
250
defout()251 defout(){ /* write out the defines (at the end of the declaration section) */
252
253 _REGISTER int i, c;
254 _REGISTER char *cp;
255
256 for( i=ndefout; i<=nterms; ++i ){
257
258 cp = trmset[i].name;
259 if( *cp == ' ' ) ++cp; /* literals */
260
261 for( ; (c= *cp)!='\0'; ++cp ){
262
263 if( c>='a' && c<='z' ||
264 c>='A' && c<='Z' ||
265 c>='0' && c<='9' ||
266 c=='_' ) ; /* VOID */
267 else goto nodef;
268 }
269
270 /* define it */
271
272 fprintf( cout , "%c define %s %d\n", rflag?' ':'#', trmset[i].name, trmset[i].value );
273
274 nodef: ;
275 }
276
277 ndefout = nterms+1;
278
279 }
280
chstash(c)281 chstash( c ){
282 /* put character away into cnames */
283 if( cnamp >= &cnames[cnamsz] ) error("too many characters in id's and literals" );
284 else *cnamp++ = c;
285 }
286
gettok()287 int gettok() {
288 int j, base;
289 static int peekline; /* number of '\n' seen in lookahead */
290 auto int c, match, reserve;
291
292 begin:
293 reserve = 0;
294 if( peekc>=0 ) {
295 c = peekc;
296 lineno += peekline;
297 peekc = -1;
298 peekline = 0;
299 }
300 else c = fgetc( cin);
301 while( c==' ' || c=='\n' || c=='\t' || c == '\014'){
302 if( c == '\n' ) ++lineno;
303 c=fgetc( cin);
304 }
305 if (c=='/')
306 {if (fgetc( cin)!='*')error("illegal /");
307 c=fgetc( cin);
308 while(c != EOF) {
309 if( c == '\n' ) ++lineno;
310 if (c=='*')
311 {if((c=fgetc( cin))=='/')break;}
312 else c=fgetc( cin);}
313 if (!c) return(0);
314 goto begin;}
315 j=0;
316 switch(c){
317 case '"':
318 case '\'': match = c;
319 ctokn = cnamp;
320 chstash( ' ' );
321 while(1){
322 c = fgetc( cin);
323 if( c == '\n' || c == '\0' )
324 error("illegal or missing ' or \"");
325 if( c == '\\' ){
326 c = fgetc( cin);
327 chstash( '\\' );
328 }
329 else if( c == match ) break;
330 chstash( c );
331 }
332 break;
333 case '%':
334 case '\\': switch(c=fgetc( cin))
335 {case '0': return(TERM);
336 case '<': return(LEFT);
337 case '2': return(BINARY);
338 case '>': return(RIGHT);
339 case '%':
340 case '\\': return(MARK);
341 case '=': return(PREC);
342 case '{': return(LCURLY);
343 default: reserve = 1;
344 }
345 default: if( c >= '0' && c <= '9' ){ /* number */
346 numbval = c-'0' ;
347 base = (c=='0') ? 8 : 10 ;
348 for( c=fgetc( cin); c>='0' && c<='9'; c=fgetc( cin) ){
349 numbval = numbval*base + c - '0';
350 }
351 peekc = c;
352 return(NUMBER);
353 }
354 else if( (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_'||c=='.'||c=='$'){
355 ctokn = cnamp;
356 while( (c>='a'&&c<='z') ||
357 (c>='A'&&c<='Z') ||
358 (c>='0'&&c<='9') ||
359 c=='_' || c=='.' || c=='$' ) {
360 chstash( c );
361 if( peekc>=0 ) { c = peekc; peekc = -1; }
362 else c = fgetc( cin);
363 }
364 }
365 else return(c);
366
367 peekc=c;
368 }
369 chstash( '\0' );
370
371 if( reserve ){ /* find a reserved word */
372 if( compare("term")) return( TERM );
373 if( compare("TERM")) return( TERM );
374 if( compare("token")) return( TERM );
375 if( compare("TOKEN")) return( TERM );
376 if( compare("left")) return( LEFT );
377 if( compare("LEFT")) return( LEFT );
378 if( compare("nonassoc")) return( BINARY );
379 if( compare("NONASSOC")) return( BINARY );
380 if( compare("binary")) return( BINARY );
381 if( compare("BINARY")) return( BINARY );
382 if( compare("right")) return( RIGHT );
383 if( compare("RIGHT")) return( RIGHT );
384 if( compare("prec")) return( PREC );
385 if( compare("PREC")) return( PREC );
386 error("invalid escape, or illegal reserved word: %s", ctokn );
387 }
388
389 /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */
390
391 look:
392 while( peekc==' ' || peekc=='\t' || peekc == '\n' || peekc == '\014' )
393 {
394 if( peekc == '\n' ) ++peekline;
395 peekc = fgetc( cin);
396 }
397
398 if( peekc != ':' ) return( IDENTIFIER );
399 peekc = -1;
400 lineno += peekline;
401 peekline = 0;
402 return( C_IDENTIFIER );
403 }
chfind(t)404 chfind(t)
405
406 { int i,j;
407
408 if (ctokn[0]==' ')t=0;
409 for(i=1;i<=nterms;i++)
410 if(compare(trmset[i].name)){
411 cnamp = ctokn;
412 return( i );
413 }
414 for(i=1;i<=nnonter;i++)
415 if(compare(nontrst[i].name)) {
416 cnamp = ctokn;
417 return( i+NTBASE );
418 }
419 /* cannot find name */
420 if( t>1 && ctokn[0] != ' ' )
421 error( "%s should have been defined earlier", ctokn );
422 return( defin( t ) );
423 }
424
cpycode()425 cpycode(){ /* copies code between \{ and \} */
426
427 int c;
428 c = fgetc( cin);
429 if( c == '\n' ) {
430 c = fgetc( cin);
431 lineno++;
432 }
433 while( c != EOF ){
434 if( c=='\\' )
435 if( (c=fgetc( cin)) == '}' ) return;
436 else fputc('\\',cout);
437 if( c=='%' )
438 if( (c=fgetc( cin)) == '}' ) return;
439 else fputc('%',cout);
440 fputc( c, cout );
441 if( c == '\n' ) ++lineno;
442 c = fgetc( cin);
443 }
444 error("eof before %%}");
445 }
446
cpyact()447 cpyact(){ /* copy C action to the next ; or closing } */
448 int brac, c, match, *i, j, s;
449
450 brac = 0;
451
452 loop:
453 c = fgetc( cin);
454 swt:
455 switch( c ){
456
457 case ';':
458 if( brac == 0 ){
459 fputc( c, cout );
460 return;
461 }
462 goto lcopy;
463
464 case '{':
465 brac++;
466 goto lcopy;
467
468 case '$':
469 s = 1;
470 c = fgetc( cin);
471 if( c == '$' ){
472 fprintf( cout , "yyval");
473 goto loop;
474 }
475 if( c == '-' ){
476 s = -s;
477 c = fgetc( cin);
478 }
479 if( c>='0' && c <= '9' ){
480 j=0;
481 while( c>='0' && c<= '9' ){
482 j= j*10+c-'0';
483 c = fgetc( cin);
484 }
485 if( rflag ) fprintf( cout , "yyvalv(yypv%c%d)", s==1?'+':'-', j );
486 else fprintf( cout , "yypv[%d]", s*j );
487 goto swt;
488 }
489 fputc( '$' , cout);
490 if( s<0 ) fputc('-', cout);
491 goto swt;
492
493 case '}':
494 brac--;
495 if( brac == 0 ){
496 fputc( c , cout);
497 return;
498 }
499 goto lcopy;
500
501 case '/': /* look for comments */
502 fputc( c ,cout);
503 c = fgetc( cin);
504 if( c != '*' ) goto swt;
505
506 /* it really is a comment */
507
508 fputc( c , cout);
509 while( (c=fgetc( cin)) != EOF ){
510 if( c=='*' ){
511 fputc( c , cout);
512 if( (c=fgetc( cin)) == '/' ) goto lcopy;
513 }
514 fputc( c , cout);
515 }
516 error( "EOF inside comment" );
517
518 case '\'': /* character constant */
519 match = '\'';
520 goto string;
521
522 case '"': /* character string */
523 match = '"';
524
525 string:
526
527 fputc( c , cout);
528 while( (c=fgetc( cin)) != EOF ){
529
530 if( c=='\\' ){
531 fputc( c , cout);
532 c=fgetc( cin);
533 }
534 else if( c==match ) goto lcopy;
535 fputc( c , cout);
536 }
537 error( "EOF in string or character constant" );
538
539 case '\0':
540 error("action does not terminate");
541 case '\n': ++lineno;
542 goto lcopy;
543
544 }
545
546 lcopy:
547 fputc( c , cout);
548 goto loop;
549 }
550