xref: /openbsd-src/usr.bin/yacc/lalr.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: lalr.c,v 1.4 2001/07/16 06:29:44 pvalchev Exp $	*/
2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)lalr.c	5.3 (Berkeley) 6/1/90";
43 #else
44 static char rcsid[] = "$OpenBSD: lalr.c,v 1.4 2001/07/16 06:29:44 pvalchev Exp $";
45 #endif
46 #endif /* not lint */
47 
48 #include "defs.h"
49 
50 typedef
51   struct shorts
52     {
53       struct shorts *next;
54       short value;
55     }
56   shorts;
57 
58 int tokensetsize;
59 short *lookaheads;
60 short *LAruleno;
61 unsigned *LA;
62 short *accessing_symbol;
63 core **state_table;
64 shifts **shift_table;
65 reductions **reduction_table;
66 short *goto_map;
67 short *from_state;
68 short *to_state;
69 
70 short **transpose();
71 void set_state_table __P((void));
72 void set_accessing_symbol __P((void));
73 void set_shift_table __P((void));
74 void set_reduction_table __P((void));
75 void set_maxrhs __P((void));
76 void initialize_LA __P((void));
77 void set_goto_map __P((void));
78 void initialize_F __P((void));
79 void build_relations __P((void));
80 void compute_FOLLOWS __P((void));
81 void compute_lookaheads __P((void));
82 int map_goto __P((int, int));
83 void digraph __P((short **));
84 void add_lookback_edge __P((int, int, int));
85 void traverse __P((int));
86 
87 static int infinity;
88 static int maxrhs;
89 static int ngotos;
90 static unsigned *F;
91 static short **includes;
92 static shorts **lookback;
93 static short **R;
94 static short *INDEX;
95 static short *VERTICES;
96 static int top;
97 
98 void
99 lalr()
100 {
101     tokensetsize = WORDSIZE(ntokens);
102 
103     set_state_table();
104     set_accessing_symbol();
105     set_shift_table();
106     set_reduction_table();
107     set_maxrhs();
108     initialize_LA();
109     set_goto_map();
110     initialize_F();
111     build_relations();
112     compute_FOLLOWS();
113     compute_lookaheads();
114 }
115 
116 
117 void
118 set_state_table()
119 {
120     register core *sp;
121 
122     state_table = NEW2(nstates, core *);
123     for (sp = first_state; sp; sp = sp->next)
124 	state_table[sp->number] = sp;
125 }
126 
127 
128 void
129 set_accessing_symbol()
130 {
131     register core *sp;
132 
133     accessing_symbol = NEW2(nstates, short);
134     for (sp = first_state; sp; sp = sp->next)
135 	accessing_symbol[sp->number] = sp->accessing_symbol;
136 }
137 
138 
139 void
140 set_shift_table()
141 {
142     register shifts *sp;
143 
144     shift_table = NEW2(nstates, shifts *);
145     for (sp = first_shift; sp; sp = sp->next)
146 	shift_table[sp->number] = sp;
147 }
148 
149 
150 void
151 set_reduction_table()
152 {
153     register reductions *rp;
154 
155     reduction_table = NEW2(nstates, reductions *);
156     for (rp = first_reduction; rp; rp = rp->next)
157 	reduction_table[rp->number] = rp;
158 }
159 
160 
161 void
162 set_maxrhs()
163 {
164   register short *itemp;
165   register short *item_end;
166   register int length;
167   register int max;
168 
169   length = 0;
170   max = 0;
171   item_end = ritem + nitems;
172   for (itemp = ritem; itemp < item_end; itemp++)
173     {
174       if (*itemp >= 0)
175 	{
176 	  length++;
177 	}
178       else
179 	{
180 	  if (length > max) max = length;
181 	  length = 0;
182 	}
183     }
184 
185   maxrhs = max;
186 }
187 
188 
189 void
190 initialize_LA()
191 {
192   register int i, j, k;
193   register reductions *rp;
194 
195   lookaheads = NEW2(nstates + 1, short);
196 
197   k = 0;
198   for (i = 0; i < nstates; i++)
199     {
200       lookaheads[i] = k;
201       rp = reduction_table[i];
202       if (rp)
203 	k += rp->nreds;
204     }
205   lookaheads[nstates] = k;
206 
207   LA = NEW2(k * tokensetsize, unsigned);
208   LAruleno = NEW2(k, short);
209   lookback = NEW2(k, shorts *);
210 
211   k = 0;
212   for (i = 0; i < nstates; i++)
213     {
214       rp = reduction_table[i];
215       if (rp)
216 	{
217 	  for (j = 0; j < rp->nreds; j++)
218 	    {
219 	      LAruleno[k] = rp->rules[j];
220 	      k++;
221 	    }
222 	}
223     }
224 }
225 
226 void
227 set_goto_map()
228 {
229   register shifts *sp;
230   register int i;
231   register int symbol;
232   register int k;
233   register short *temp_map;
234   register int state2;
235   register int state1;
236 
237   goto_map = NEW2(nvars + 1, short) - ntokens;
238   temp_map = NEW2(nvars + 1, short) - ntokens;
239 
240   ngotos = 0;
241   for (sp = first_shift; sp; sp = sp->next)
242     {
243       for (i = sp->nshifts - 1; i >= 0; i--)
244 	{
245 	  symbol = accessing_symbol[sp->shift[i]];
246 
247 	  if (ISTOKEN(symbol)) break;
248 
249 	  if (ngotos == MAXSHORT)
250 	    fatal("too many gotos");
251 
252 	  ngotos++;
253 	  goto_map[symbol]++;
254         }
255     }
256 
257   k = 0;
258   for (i = ntokens; i < nsyms; i++)
259     {
260       temp_map[i] = k;
261       k += goto_map[i];
262     }
263 
264   for (i = ntokens; i < nsyms; i++)
265     goto_map[i] = temp_map[i];
266 
267   goto_map[nsyms] = ngotos;
268   temp_map[nsyms] = ngotos;
269 
270   from_state = NEW2(ngotos, short);
271   to_state = NEW2(ngotos, short);
272 
273   for (sp = first_shift; sp; sp = sp->next)
274     {
275       state1 = sp->number;
276       for (i = sp->nshifts - 1; i >= 0; i--)
277 	{
278 	  state2 = sp->shift[i];
279 	  symbol = accessing_symbol[state2];
280 
281 	  if (ISTOKEN(symbol)) break;
282 
283 	  k = temp_map[symbol]++;
284 	  from_state[k] = state1;
285 	  to_state[k] = state2;
286 	}
287     }
288 
289   FREE(temp_map + ntokens);
290 }
291 
292 
293 
294 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
295 
296 int
297 map_goto(state, symbol)
298 int state;
299 int symbol;
300 {
301     register int high;
302     register int low;
303     register int middle;
304     register int s;
305 
306     low = goto_map[symbol];
307     high = goto_map[symbol + 1];
308 
309     for (;;)
310     {
311 	assert(low <= high);
312 	middle = (low + high) >> 1;
313 	s = from_state[middle];
314 	if (s == state)
315 	    return (middle);
316 	else if (s < state)
317 	    low = middle + 1;
318 	else
319 	    high = middle - 1;
320     }
321 }
322 
323 
324 void
325 initialize_F()
326 {
327   register int i;
328   register int j;
329   register int k;
330   register shifts *sp;
331   register short *edge;
332   register unsigned *rowp;
333   register short *rp;
334   register short **reads;
335   register int nedges;
336   register int stateno;
337   register int symbol;
338   register int nwords;
339 
340   nwords = ngotos * tokensetsize;
341   F = NEW2(nwords, unsigned);
342 
343   reads = NEW2(ngotos, short *);
344   edge = NEW2(ngotos + 1, short);
345   nedges = 0;
346 
347   rowp = F;
348   for (i = 0; i < ngotos; i++)
349     {
350       stateno = to_state[i];
351       sp = shift_table[stateno];
352 
353       if (sp)
354 	{
355 	  k = sp->nshifts;
356 
357 	  for (j = 0; j < k; j++)
358 	    {
359 	      symbol = accessing_symbol[sp->shift[j]];
360 	      if (ISVAR(symbol))
361 		break;
362 	      SETBIT(rowp, symbol);
363 	    }
364 
365 	  for (; j < k; j++)
366 	    {
367 	      symbol = accessing_symbol[sp->shift[j]];
368 	      if (nullable[symbol])
369 		edge[nedges++] = map_goto(stateno, symbol);
370 	    }
371 
372 	  if (nedges)
373 	    {
374 	      reads[i] = rp = NEW2(nedges + 1, short);
375 
376 	      for (j = 0; j < nedges; j++)
377 		rp[j] = edge[j];
378 
379 	      rp[nedges] = -1;
380 	      nedges = 0;
381 	    }
382 	}
383 
384       rowp += tokensetsize;
385     }
386 
387   SETBIT(F, 0);
388   digraph(reads);
389 
390   for (i = 0; i < ngotos; i++)
391     {
392       if (reads[i])
393 	FREE(reads[i]);
394     }
395 
396   FREE(reads);
397   FREE(edge);
398 }
399 
400 
401 void
402 build_relations()
403 {
404   register int i;
405   register int j;
406   register int k;
407   register short *rulep;
408   register short *rp;
409   register shifts *sp;
410   register int length;
411   register int nedges;
412   register int done;
413   register int state1;
414   register int stateno;
415   register int symbol1;
416   register int symbol2;
417   register short *shortp;
418   register short *edge;
419   register short *states;
420   register short **new_includes;
421 
422   includes = NEW2(ngotos, short *);
423   edge = NEW2(ngotos + 1, short);
424   states = NEW2(maxrhs + 1, short);
425 
426   for (i = 0; i < ngotos; i++)
427     {
428       nedges = 0;
429       state1 = from_state[i];
430       symbol1 = accessing_symbol[to_state[i]];
431 
432       for (rulep = derives[symbol1]; *rulep >= 0; rulep++)
433 	{
434 	  length = 1;
435 	  states[0] = state1;
436 	  stateno = state1;
437 
438 	  for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++)
439 	    {
440 	      symbol2 = *rp;
441 	      sp = shift_table[stateno];
442 	      k = sp->nshifts;
443 
444 	      for (j = 0; j < k; j++)
445 		{
446 		  stateno = sp->shift[j];
447 		  if (accessing_symbol[stateno] == symbol2) break;
448 		}
449 
450 	      states[length++] = stateno;
451 	    }
452 
453 	  add_lookback_edge(stateno, *rulep, i);
454 
455 	  length--;
456 	  done = 0;
457 	  while (!done)
458 	    {
459 	      done = 1;
460 	      rp--;
461 	      if (ISVAR(*rp))
462 		{
463 		  stateno = states[--length];
464 		  edge[nedges++] = map_goto(stateno, *rp);
465 		  if (nullable[*rp] && length > 0) done = 0;
466 		}
467 	    }
468 	}
469 
470       if (nedges)
471 	{
472 	  includes[i] = shortp = NEW2(nedges + 1, short);
473 	  for (j = 0; j < nedges; j++)
474 	    shortp[j] = edge[j];
475 	  shortp[nedges] = -1;
476 	}
477     }
478 
479   new_includes = transpose(includes, ngotos);
480 
481   for (i = 0; i < ngotos; i++)
482     if (includes[i])
483       FREE(includes[i]);
484 
485   FREE(includes);
486 
487   includes = new_includes;
488 
489   FREE(edge);
490   FREE(states);
491 }
492 
493 void
494 add_lookback_edge(stateno, ruleno, gotono)
495 int stateno, ruleno, gotono;
496 {
497     register int i, k;
498     register int found;
499     register shorts *sp;
500 
501     i = lookaheads[stateno];
502     k = lookaheads[stateno + 1];
503     found = 0;
504     while (!found && i < k)
505     {
506 	if (LAruleno[i] == ruleno)
507 	    found = 1;
508 	else
509 	    ++i;
510     }
511     assert(found);
512 
513     sp = NEW(shorts);
514     sp->next = lookback[i];
515     sp->value = gotono;
516     lookback[i] = sp;
517 }
518 
519 
520 
521 short **
522 transpose(R, n)
523 short **R;
524 int n;
525 {
526   register short **new_R;
527   register short **temp_R;
528   register short *nedges;
529   register short *sp;
530   register int i;
531   register int k;
532 
533   nedges = NEW2(n, short);
534 
535   for (i = 0; i < n; i++)
536     {
537       sp = R[i];
538       if (sp)
539 	{
540 	  while (*sp >= 0)
541 	    nedges[*sp++]++;
542 	}
543     }
544 
545   new_R = NEW2(n, short *);
546   temp_R = NEW2(n, short *);
547 
548   for (i = 0; i < n; i++)
549     {
550       k = nedges[i];
551       if (k > 0)
552 	{
553 	  sp = NEW2(k + 1, short);
554 	  new_R[i] = sp;
555 	  temp_R[i] = sp;
556 	  sp[k] = -1;
557 	}
558     }
559 
560   FREE(nedges);
561 
562   for (i = 0; i < n; i++)
563     {
564       sp = R[i];
565       if (sp)
566 	{
567 	  while (*sp >= 0)
568 	    *temp_R[*sp++]++ = i;
569 	}
570     }
571 
572   FREE(temp_R);
573 
574   return (new_R);
575 }
576 
577 
578 void
579 compute_FOLLOWS()
580 {
581   digraph(includes);
582 }
583 
584 void
585 compute_lookaheads()
586 {
587   register int i, n;
588   register unsigned *fp1, *fp2, *fp3;
589   register shorts *sp, *next;
590   register unsigned *rowp;
591 
592   rowp = LA;
593   n = lookaheads[nstates];
594   for (i = 0; i < n; i++)
595     {
596       fp3 = rowp + tokensetsize;
597       for (sp = lookback[i]; sp; sp = sp->next)
598 	{
599 	  fp1 = rowp;
600 	  fp2 = F + tokensetsize * sp->value;
601 	  while (fp1 < fp3)
602 	    *fp1++ |= *fp2++;
603 	}
604       rowp = fp3;
605     }
606 
607   for (i = 0; i < n; i++)
608     for (sp = lookback[i]; sp; sp = next)
609       {
610         next = sp->next;
611         FREE(sp);
612       }
613 
614   FREE(lookback);
615   FREE(F);
616 }
617 
618 void
619 digraph(relation)
620 short **relation;
621 {
622   register int i;
623 
624   infinity = ngotos + 2;
625   INDEX = NEW2(ngotos + 1, short);
626   VERTICES = NEW2(ngotos + 1, short);
627   top = 0;
628 
629   R = relation;
630 
631   for (i = 0; i < ngotos; i++)
632     INDEX[i] = 0;
633 
634   for (i = 0; i < ngotos; i++)
635     {
636       if (INDEX[i] == 0 && R[i])
637 	traverse(i);
638     }
639 
640   FREE(INDEX);
641   FREE(VERTICES);
642 }
643 
644 
645 void
646 traverse(i)
647 register int i;
648 {
649   register unsigned *fp1;
650   register unsigned *fp2;
651   register unsigned *fp3;
652   register int j;
653   register short *rp;
654 
655   int height;
656   unsigned *base;
657 
658   VERTICES[++top] = i;
659   INDEX[i] = height = top;
660 
661   base = F + i * tokensetsize;
662   fp3 = base + tokensetsize;
663 
664   rp = R[i];
665   if (rp)
666     {
667       while ((j = *rp++) >= 0)
668 	{
669 	  if (INDEX[j] == 0)
670 	    traverse(j);
671 
672 	  if (INDEX[i] > INDEX[j])
673 	    INDEX[i] = INDEX[j];
674 
675 	  fp1 = base;
676 	  fp2 = F + j * tokensetsize;
677 
678 	  while (fp1 < fp3)
679 	    *fp1++ |= *fp2++;
680 	}
681     }
682 
683   if (INDEX[i] == height)
684     {
685       for (;;)
686 	{
687 	  j = VERTICES[top--];
688 	  INDEX[j] = infinity;
689 
690 	  if (i == j)
691 	    break;
692 
693 	  fp1 = base;
694 	  fp2 = F + j * tokensetsize;
695 
696 	  while (fp1 < fp3)
697 	    *fp2++ = *fp1++;
698 	}
699     }
700 }
701