xref: /openbsd-src/usr.bin/yacc/lalr.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: lalr.c,v 1.8 2003/06/19 16:34:53 pvalchev Exp $	*/
2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)lalr.c	5.3 (Berkeley) 6/1/90";
39 #else
40 static char rcsid[] = "$OpenBSD: lalr.c,v 1.8 2003/06/19 16:34:53 pvalchev Exp $";
41 #endif
42 #endif /* not lint */
43 
44 #include "defs.h"
45 
46 typedef
47   struct shorts
48     {
49       struct shorts *next;
50       short value;
51     }
52   shorts;
53 
54 int tokensetsize;
55 short *lookaheads;
56 short *LAruleno;
57 unsigned *LA;
58 short *accessing_symbol;
59 core **state_table;
60 shifts **shift_table;
61 reductions **reduction_table;
62 short *goto_map;
63 short *from_state;
64 short *to_state;
65 
66 short **transpose();
67 void set_state_table(void);
68 void set_accessing_symbol(void);
69 void set_shift_table(void);
70 void set_reduction_table(void);
71 void set_maxrhs(void);
72 void initialize_LA(void);
73 void set_goto_map(void);
74 void initialize_F(void);
75 void build_relations(void);
76 void compute_FOLLOWS(void);
77 void compute_lookaheads(void);
78 int map_goto(int, int);
79 void digraph(short **);
80 void add_lookback_edge(int, int, int);
81 void traverse(int);
82 
83 static int infinity;
84 static int maxrhs;
85 static int ngotos;
86 static unsigned *F;
87 static short **includes;
88 static shorts **lookback;
89 static short **R;
90 static short *INDEX;
91 static short *VERTICES;
92 static int top;
93 
94 void
95 lalr(void)
96 {
97     tokensetsize = WORDSIZE(ntokens);
98 
99     set_state_table();
100     set_accessing_symbol();
101     set_shift_table();
102     set_reduction_table();
103     set_maxrhs();
104     initialize_LA();
105     set_goto_map();
106     initialize_F();
107     build_relations();
108     compute_FOLLOWS();
109     compute_lookaheads();
110 }
111 
112 
113 void
114 set_state_table(void)
115 {
116     core *sp;
117 
118     state_table = NEW2(nstates, core *);
119     for (sp = first_state; sp; sp = sp->next)
120 	state_table[sp->number] = sp;
121 }
122 
123 
124 void
125 set_accessing_symbol(void)
126 {
127     core *sp;
128 
129     accessing_symbol = NEW2(nstates, short);
130     for (sp = first_state; sp; sp = sp->next)
131 	accessing_symbol[sp->number] = sp->accessing_symbol;
132 }
133 
134 
135 void
136 set_shift_table(void)
137 {
138     shifts *sp;
139 
140     shift_table = NEW2(nstates, shifts *);
141     for (sp = first_shift; sp; sp = sp->next)
142 	shift_table[sp->number] = sp;
143 }
144 
145 
146 void
147 set_reduction_table(void)
148 {
149     reductions *rp;
150 
151     reduction_table = NEW2(nstates, reductions *);
152     for (rp = first_reduction; rp; rp = rp->next)
153 	reduction_table[rp->number] = rp;
154 }
155 
156 
157 void
158 set_maxrhs(void)
159 {
160   short *itemp;
161   short *item_end;
162   int length;
163   int max;
164 
165   length = 0;
166   max = 0;
167   item_end = ritem + nitems;
168   for (itemp = ritem; itemp < item_end; itemp++)
169     {
170       if (*itemp >= 0)
171 	{
172 	  length++;
173 	}
174       else
175 	{
176 	  if (length > max) max = length;
177 	  length = 0;
178 	}
179     }
180 
181   maxrhs = max;
182 }
183 
184 
185 void
186 initialize_LA(void)
187 {
188   int i, j, k;
189   reductions *rp;
190 
191   lookaheads = NEW2(nstates + 1, short);
192 
193   k = 0;
194   for (i = 0; i < nstates; i++)
195     {
196       lookaheads[i] = k;
197       rp = reduction_table[i];
198       if (rp)
199 	k += rp->nreds;
200     }
201   lookaheads[nstates] = k;
202 
203   LA = NEW2(k * tokensetsize, unsigned);
204   LAruleno = NEW2(k, short);
205   lookback = NEW2(k, shorts *);
206 
207   k = 0;
208   for (i = 0; i < nstates; i++)
209     {
210       rp = reduction_table[i];
211       if (rp)
212 	{
213 	  for (j = 0; j < rp->nreds; j++)
214 	    {
215 	      LAruleno[k] = rp->rules[j];
216 	      k++;
217 	    }
218 	}
219     }
220 }
221 
222 void
223 set_goto_map(void)
224 {
225   shifts *sp;
226   int i;
227   int symbol;
228   int k;
229   short *temp_map;
230   int state2;
231   int state1;
232 
233   goto_map = NEW2(nvars + 1, short) - ntokens;
234   temp_map = NEW2(nvars + 1, short) - ntokens;
235 
236   ngotos = 0;
237   for (sp = first_shift; sp; sp = sp->next)
238     {
239       for (i = sp->nshifts - 1; i >= 0; i--)
240 	{
241 	  symbol = accessing_symbol[sp->shift[i]];
242 
243 	  if (ISTOKEN(symbol)) break;
244 
245 	  if (ngotos == MAXSHORT)
246 	    fatal("too many gotos");
247 
248 	  ngotos++;
249 	  goto_map[symbol]++;
250         }
251     }
252 
253   k = 0;
254   for (i = ntokens; i < nsyms; i++)
255     {
256       temp_map[i] = k;
257       k += goto_map[i];
258     }
259 
260   for (i = ntokens; i < nsyms; i++)
261     goto_map[i] = temp_map[i];
262 
263   goto_map[nsyms] = ngotos;
264   temp_map[nsyms] = ngotos;
265 
266   from_state = NEW2(ngotos, short);
267   to_state = NEW2(ngotos, short);
268 
269   for (sp = first_shift; sp; sp = sp->next)
270     {
271       state1 = sp->number;
272       for (i = sp->nshifts - 1; i >= 0; i--)
273 	{
274 	  state2 = sp->shift[i];
275 	  symbol = accessing_symbol[state2];
276 
277 	  if (ISTOKEN(symbol)) break;
278 
279 	  k = temp_map[symbol]++;
280 	  from_state[k] = state1;
281 	  to_state[k] = state2;
282 	}
283     }
284 
285   FREE(temp_map + ntokens);
286 }
287 
288 
289 
290 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
291 
292 int
293 map_goto(int state, int symbol)
294 {
295     int high;
296     int low;
297     int middle;
298     int s;
299 
300     low = goto_map[symbol];
301     high = goto_map[symbol + 1];
302 
303     for (;;)
304     {
305 	assert(low <= high);
306 	middle = (low + high) >> 1;
307 	s = from_state[middle];
308 	if (s == state)
309 	    return (middle);
310 	else if (s < state)
311 	    low = middle + 1;
312 	else
313 	    high = middle - 1;
314     }
315 }
316 
317 
318 void
319 initialize_F(void)
320 {
321   int i;
322   int j;
323   int k;
324   shifts *sp;
325   short *edge;
326   unsigned *rowp;
327   short *rp;
328   short **reads;
329   int nedges;
330   int stateno;
331   int symbol;
332   int nwords;
333 
334   nwords = ngotos * tokensetsize;
335   F = NEW2(nwords, unsigned);
336 
337   reads = NEW2(ngotos, short *);
338   edge = NEW2(ngotos + 1, short);
339   nedges = 0;
340 
341   rowp = F;
342   for (i = 0; i < ngotos; i++)
343     {
344       stateno = to_state[i];
345       sp = shift_table[stateno];
346 
347       if (sp)
348 	{
349 	  k = sp->nshifts;
350 
351 	  for (j = 0; j < k; j++)
352 	    {
353 	      symbol = accessing_symbol[sp->shift[j]];
354 	      if (ISVAR(symbol))
355 		break;
356 	      SETBIT(rowp, symbol);
357 	    }
358 
359 	  for (; j < k; j++)
360 	    {
361 	      symbol = accessing_symbol[sp->shift[j]];
362 	      if (nullable[symbol])
363 		edge[nedges++] = map_goto(stateno, symbol);
364 	    }
365 
366 	  if (nedges)
367 	    {
368 	      reads[i] = rp = NEW2(nedges + 1, short);
369 
370 	      for (j = 0; j < nedges; j++)
371 		rp[j] = edge[j];
372 
373 	      rp[nedges] = -1;
374 	      nedges = 0;
375 	    }
376 	}
377 
378       rowp += tokensetsize;
379     }
380 
381   SETBIT(F, 0);
382   digraph(reads);
383 
384   for (i = 0; i < ngotos; i++)
385     {
386       if (reads[i])
387 	FREE(reads[i]);
388     }
389 
390   FREE(reads);
391   FREE(edge);
392 }
393 
394 
395 void
396 build_relations(void)
397 {
398   int i;
399   int j;
400   int k;
401   short *rulep;
402   short *rp;
403   shifts *sp;
404   int length;
405   int nedges;
406   int done;
407   int state1;
408   int stateno;
409   int symbol1;
410   int symbol2;
411   short *shortp;
412   short *edge;
413   short *states;
414   short **new_includes;
415 
416   includes = NEW2(ngotos, short *);
417   edge = NEW2(ngotos + 1, short);
418   states = NEW2(maxrhs + 1, short);
419 
420   for (i = 0; i < ngotos; i++)
421     {
422       nedges = 0;
423       state1 = from_state[i];
424       symbol1 = accessing_symbol[to_state[i]];
425 
426       for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
427 	{
428 	  length = 1;
429 	  states[0] = state1;
430 	  stateno = state1;
431 
432 	  for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
433 	    {
434 	      symbol2 = *rp;
435 	      sp = shift_table[stateno];
436 	      k = sp->nshifts;
437 
438 	      for (j = 0; j < k; j++)
439 		{
440 		  stateno = sp->shift[j];
441 		  if (accessing_symbol[stateno] == symbol2) break;
442 		}
443 
444 	      states[length++] = stateno;
445 	    }
446 
447 	  add_lookback_edge(stateno, *rulep, i);
448 
449 	  length--;
450 	  done = 0;
451 	  while (!done)
452 	    {
453 	      done = 1;
454 	      rp--;
455 	      if (ISVAR(*rp))
456 		{
457 		  stateno = states[--length];
458 		  edge[nedges++] = map_goto(stateno, *rp);
459 		  if (nullable[*rp] && length > 0) done = 0;
460 		}
461 	    }
462 	}
463 
464       if (nedges)
465 	{
466 	  includes[i] = shortp = NEW2(nedges + 1, short);
467 	  for (j = 0; j < nedges; j++)
468 	    shortp[j] = edge[j];
469 	  shortp[nedges] = -1;
470 	}
471     }
472 
473   new_includes = transpose(includes, ngotos);
474 
475   for (i = 0; i < ngotos; i++)
476     if (includes[i])
477       FREE(includes[i]);
478 
479   FREE(includes);
480 
481   includes = new_includes;
482 
483   FREE(edge);
484   FREE(states);
485 }
486 
487 void
488 add_lookback_edge(int stateno, int ruleno, int gotono)
489 {
490     int i, k;
491     int found;
492     shorts *sp;
493 
494     i = lookaheads[stateno];
495     k = lookaheads[stateno + 1];
496     found = 0;
497     while (!found && i < k)
498     {
499 	if (LAruleno[i] == ruleno)
500 	    found = 1;
501 	else
502 	    ++i;
503     }
504     assert(found);
505 
506     sp = NEW(shorts);
507     sp->next = lookback[i];
508     sp->value = gotono;
509     lookback[i] = sp;
510 }
511 
512 
513 
514 short **
515 transpose(short **R, int n)
516 {
517   short **new_R;
518   short **temp_R;
519   short *nedges;
520   short *sp;
521   int i;
522   int k;
523 
524   nedges = NEW2(n, short);
525 
526   for (i = 0; i < n; i++)
527     {
528       sp = R[i];
529       if (sp)
530 	{
531 	  while (*sp >= 0)
532 	    nedges[*sp++]++;
533 	}
534     }
535 
536   new_R = NEW2(n, short *);
537   temp_R = NEW2(n, short *);
538 
539   for (i = 0; i < n; i++)
540     {
541       k = nedges[i];
542       if (k > 0)
543 	{
544 	  sp = NEW2(k + 1, short);
545 	  new_R[i] = sp;
546 	  temp_R[i] = sp;
547 	  sp[k] = -1;
548 	}
549     }
550 
551   FREE(nedges);
552 
553   for (i = 0; i < n; i++)
554     {
555       sp = R[i];
556       if (sp)
557 	{
558 	  while (*sp >= 0)
559 	    *temp_R[*sp++]++ = i;
560 	}
561     }
562 
563   FREE(temp_R);
564 
565   return (new_R);
566 }
567 
568 
569 void
570 compute_FOLLOWS(void)
571 {
572   digraph(includes);
573 }
574 
575 void
576 compute_lookaheads(void)
577 {
578   int i, n;
579   unsigned *fp1, *fp2, *fp3;
580   shorts *sp, *next;
581   unsigned *rowp;
582 
583   rowp = LA;
584   n = lookaheads[nstates];
585   for (i = 0; i < n; i++)
586     {
587       fp3 = rowp + tokensetsize;
588       for (sp = lookback[i]; sp; sp = sp->next)
589 	{
590 	  fp1 = rowp;
591 	  fp2 = F + tokensetsize * sp->value;
592 	  while (fp1 < fp3)
593 	    *fp1++ |= *fp2++;
594 	}
595       rowp = fp3;
596     }
597 
598   for (i = 0; i < n; i++)
599     for (sp = lookback[i]; sp; sp = next)
600       {
601         next = sp->next;
602         FREE(sp);
603       }
604 
605   FREE(lookback);
606   FREE(F);
607 }
608 
609 void
610 digraph(short **relation)
611 {
612   int i;
613 
614   infinity = ngotos + 2;
615   INDEX = NEW2(ngotos + 1, short);
616   VERTICES = NEW2(ngotos + 1, short);
617   top = 0;
618 
619   R = relation;
620 
621   for (i = 0; i < ngotos; i++)
622     INDEX[i] = 0;
623 
624   for (i = 0; i < ngotos; i++)
625     {
626       if (INDEX[i] == 0 && R[i])
627 	traverse(i);
628     }
629 
630   FREE(INDEX);
631   FREE(VERTICES);
632 }
633 
634 
635 void
636 traverse(int i)
637 {
638   unsigned *fp1;
639   unsigned *fp2;
640   unsigned *fp3;
641   int j;
642   short *rp;
643 
644   int height;
645   unsigned *base;
646 
647   VERTICES[++top] = i;
648   INDEX[i] = height = top;
649 
650   base = F + i * tokensetsize;
651   fp3 = base + tokensetsize;
652 
653   rp = R[i];
654   if (rp)
655     {
656       while ((j = *rp++) >= 0)
657 	{
658 	  if (INDEX[j] == 0)
659 	    traverse(j);
660 
661 	  if (INDEX[i] > INDEX[j])
662 	    INDEX[i] = INDEX[j];
663 
664 	  fp1 = base;
665 	  fp2 = F + j * tokensetsize;
666 
667 	  while (fp1 < fp3)
668 	    *fp1++ |= *fp2++;
669 	}
670     }
671 
672   if (INDEX[i] == height)
673     {
674       for (;;)
675 	{
676 	  j = VERTICES[top--];
677 	  INDEX[j] = infinity;
678 
679 	  if (i == j)
680 	    break;
681 
682 	  fp1 = base;
683 	  fp2 = F + j * tokensetsize;
684 
685 	  while (fp1 < fp3)
686 	    *fp2++ = *fp1++;
687 	}
688     }
689 }
690