xref: /openbsd-src/usr.bin/yacc/lr0.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: lr0.c,v 1.12 2012/03/03 19:15:00 nicm Exp $	*/
2 /*	$NetBSD: lr0.c,v 1.4 1996/03/19 03:21:35 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989 The Regents of the University of California.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Robert Paul Corbett.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include "defs.h"
37 
38 extern short *itemset;
39 extern short *itemsetend;
40 extern unsigned *ruleset;
41 
42 int nstates;
43 core *first_state;
44 shifts *first_shift;
45 reductions *first_reduction;
46 
47 int get_state(int);
48 core *new_state(int);
49 
50 void allocate_itemsets(void);
51 void allocate_storage(void);
52 void append_states(void);
53 void free_storage(void);
54 void generate_states(void);
55 void initialize_states(void);
56 void new_itemsets(void);
57 void save_shifts(void);
58 void save_reductions(void);
59 void set_derives(void);
60 void print_derives(void);
61 void set_nullable(void);
62 
63 static core **state_set;
64 static core *this_state;
65 static core *last_state;
66 static shifts *last_shift;
67 static reductions *last_reduction;
68 
69 static int nshifts;
70 static short *shift_symbol;
71 
72 static short *redset;
73 static short *shiftset;
74 
75 static short **kernel_base;
76 static short **kernel_end;
77 static short *kernel_items;
78 
79 void
80 allocate_itemsets(void)
81 {
82     short *itemp;
83     short *item_end;
84     int symbol;
85     int i;
86     int count;
87     int max;
88     short *symbol_count;
89 
90     count = 0;
91     symbol_count = NEW2(nsyms, short);
92 
93     item_end = ritem + nitems;
94     for (itemp = ritem; itemp < item_end; itemp++)
95     {
96 	symbol = *itemp;
97 	if (symbol >= 0)
98 	{
99 	    count++;
100 	    symbol_count[symbol]++;
101 	}
102     }
103 
104     kernel_base = NEW2(nsyms, short *);
105     kernel_items = NEW2(count, short);
106 
107     count = 0;
108     max = 0;
109     for (i = 0; i < nsyms; i++)
110     {
111 	kernel_base[i] = kernel_items + count;
112 	count += symbol_count[i];
113 	if (max < symbol_count[i])
114 	    max = symbol_count[i];
115     }
116 
117     shift_symbol = symbol_count;
118     kernel_end = NEW2(nsyms, short *);
119 }
120 
121 void
122 allocate_storage(void)
123 {
124     allocate_itemsets();
125     shiftset = NEW2(nsyms, short);
126     redset = NEW2(nrules + 1, short);
127     state_set = NEW2(nitems, core *);
128 }
129 
130 void
131 append_states(void)
132 {
133     int i;
134     int j;
135     int symbol;
136 
137 #ifdef	TRACE
138     fprintf(stderr, "Entering append_states()\n");
139 #endif
140     for (i = 1; i < nshifts; i++)
141     {
142 	symbol = shift_symbol[i];
143 	j = i;
144 	while (j > 0 && shift_symbol[j - 1] > symbol)
145 	{
146 	    shift_symbol[j] = shift_symbol[j - 1];
147 	    j--;
148 	}
149 	shift_symbol[j] = symbol;
150     }
151 
152     for (i = 0; i < nshifts; i++)
153     {
154 	symbol = shift_symbol[i];
155 	shiftset[i] = get_state(symbol);
156     }
157 }
158 
159 void
160 free_storage(void)
161 {
162     FREE(shift_symbol);
163     FREE(redset);
164     FREE(shiftset);
165     FREE(kernel_base);
166     FREE(kernel_end);
167     FREE(kernel_items);
168     FREE(state_set);
169 }
170 
171 
172 void
173 generate_states(void)
174 {
175     allocate_storage();
176     itemset = NEW2(nitems, short);
177     ruleset = NEW2(WORDSIZE(nrules), unsigned);
178     set_first_derives();
179     initialize_states();
180 
181     while (this_state)
182     {
183 	closure(this_state->items, this_state->nitems);
184 	save_reductions();
185 	new_itemsets();
186 	append_states();
187 
188 	if (nshifts > 0)
189 	    save_shifts();
190 
191 	this_state = this_state->next;
192     }
193 
194     finalize_closure();
195     free_storage();
196 }
197 
198 
199 
200 int
201 get_state(int symbol)
202 {
203     int key;
204     short *isp1;
205     short *isp2;
206     short *iend;
207     core *sp;
208     int found;
209     int n;
210 
211 #ifdef	TRACE
212     fprintf(stderr, "Entering get_state(%d)\n", symbol);
213 #endif
214 
215     isp1 = kernel_base[symbol];
216     iend = kernel_end[symbol];
217     n = iend - isp1;
218 
219     key = *isp1;
220     assert(0 <= key && key < nitems);
221     sp = state_set[key];
222     if (sp)
223     {
224 	found = 0;
225 	while (!found)
226 	{
227 	    if (sp->nitems == n)
228 	    {
229 		found = 1;
230 		isp1 = kernel_base[symbol];
231 		isp2 = sp->items;
232 
233 		while (found && isp1 < iend)
234 		{
235 		    if (*isp1++ != *isp2++)
236 			found = 0;
237 		}
238 	    }
239 
240 	    if (!found)
241 	    {
242 		if (sp->link)
243 		{
244 		    sp = sp->link;
245 		}
246 		else
247 		{
248 		    sp = sp->link = new_state(symbol);
249 		    found = 1;
250 		}
251 	    }
252 	}
253     }
254     else
255     {
256 	state_set[key] = sp = new_state(symbol);
257     }
258 
259     return (sp->number);
260 }
261 
262 
263 void
264 initialize_states(void)
265 {
266     int i;
267     short *start_derives;
268     core *p;
269 
270     start_derives = derives[start_symbol];
271     for (i = 0; start_derives[i] >= 0; ++i)
272 	continue;
273 
274     p = (core *) MALLOC(sizeof(core) + i*sizeof(short));
275     if (p == 0) no_space();
276 
277     p->next = 0;
278     p->link = 0;
279     p->number = 0;
280     p->accessing_symbol = 0;
281     p->nitems = i;
282 
283     for (i = 0;  start_derives[i] >= 0; ++i)
284 	p->items[i] = rrhs[start_derives[i]];
285 
286     first_state = last_state = this_state = p;
287     nstates = 1;
288 }
289 
290 void
291 new_itemsets(void)
292 {
293     int i;
294     int shiftcount;
295     short *isp;
296     short *ksp;
297     int symbol;
298 
299     memset(kernel_end, 0, nsyms * sizeof(short *));
300 
301     shiftcount = 0;
302     isp = itemset;
303     while (isp < itemsetend)
304     {
305 	i = *isp++;
306 	symbol = ritem[i];
307 	if (symbol > 0)
308 	{
309 	    ksp = kernel_end[symbol];
310 	    if (!ksp)
311 	    {
312 		shift_symbol[shiftcount++] = symbol;
313 		ksp = kernel_base[symbol];
314 	    }
315 
316 	    *ksp++ = i + 1;
317 	    kernel_end[symbol] = ksp;
318 	}
319     }
320 
321     nshifts = shiftcount;
322 }
323 
324 
325 
326 core *
327 new_state(int symbol)
328 {
329     int n;
330     core *p;
331     short *isp1;
332     short *isp2;
333     short *iend;
334 
335 #ifdef	TRACE
336     fprintf(stderr, "Entering new_state(%d)\n", symbol);
337 #endif
338 
339     if (nstates >= MAXSHORT)
340 	fatal("too many states");
341 
342     isp1 = kernel_base[symbol];
343     iend = kernel_end[symbol];
344     n = iend - isp1;
345 
346     p = (core *) allocate((unsigned) (sizeof(core) + (n - 1) * sizeof(short)));
347     p->accessing_symbol = symbol;
348     p->number = nstates;
349     p->nitems = n;
350 
351     isp2 = p->items;
352     while (isp1 < iend)
353 	*isp2++ = *isp1++;
354 
355     last_state->next = p;
356     last_state = p;
357 
358     nstates++;
359 
360     return (p);
361 }
362 
363 
364 void
365 save_shifts(void)
366 {
367     shifts *p;
368     short *sp1;
369     short *sp2;
370     short *send;
371 
372     p = (shifts *) allocate((unsigned) (sizeof(shifts) +
373 			(nshifts - 1) * sizeof(short)));
374 
375     p->number = this_state->number;
376     p->nshifts = nshifts;
377 
378     sp1 = shiftset;
379     sp2 = p->shift;
380     send = shiftset + nshifts;
381 
382     while (sp1 < send)
383 	*sp2++ = *sp1++;
384 
385     if (last_shift)
386     {
387 	last_shift->next = p;
388 	last_shift = p;
389     }
390     else
391     {
392 	first_shift = p;
393 	last_shift = p;
394     }
395 }
396 
397 
398 void
399 save_reductions(void)
400 {
401     short *isp;
402     short *rp1;
403     short *rp2;
404     int item;
405     int count;
406     reductions *p;
407     short *rend;
408 
409     count = 0;
410     for (isp = itemset; isp < itemsetend; isp++)
411     {
412 	item = ritem[*isp];
413 	if (item < 0)
414 	{
415 	    redset[count++] = -item;
416 	}
417     }
418 
419     if (count)
420     {
421 	p = (reductions *) allocate((unsigned) (sizeof(reductions) +
422 					(count - 1) * sizeof(short)));
423 
424 	p->number = this_state->number;
425 	p->nreds = count;
426 
427 	rp1 = redset;
428 	rp2 = p->rules;
429 	rend = rp1 + count;
430 
431 	while (rp1 < rend)
432 	    *rp2++ = *rp1++;
433 
434 	if (last_reduction)
435 	{
436 	    last_reduction->next = p;
437 	    last_reduction = p;
438 	}
439 	else
440 	{
441 	    first_reduction = p;
442 	    last_reduction = p;
443 	}
444     }
445 }
446 
447 void
448 set_derives(void)
449 {
450     int i, k;
451     int lhs;
452     short *rules;
453 
454     derives = NEW2(nsyms, short *);
455     rules = NEW2(nvars + nrules, short);
456 
457     k = 0;
458     for (lhs = start_symbol; lhs < nsyms; lhs++)
459     {
460 	derives[lhs] = rules + k;
461 	for (i = 0; i < nrules; i++)
462 	{
463 	    if (rlhs[i] == lhs)
464 	    {
465 		rules[k] = i;
466 		k++;
467 	    }
468 	}
469 	rules[k] = -1;
470 	k++;
471     }
472 
473 #ifdef	DEBUG
474     print_derives();
475 #endif
476 }
477 
478 void
479 free_derives(void)
480 {
481     FREE(derives[start_symbol]);
482     FREE(derives);
483 }
484 
485 #ifdef	DEBUG
486 void
487 print_derives(void)
488 {
489     int i;
490     short *sp;
491 
492     printf("\nDERIVES\n\n");
493 
494     for (i = start_symbol; i < nsyms; i++)
495     {
496 	printf("%s derives ", symbol_name[i]);
497 	for (sp = derives[i]; *sp >= 0; sp++)
498 	{
499 	    printf("  %d", *sp);
500 	}
501 	putchar('\n');
502     }
503 
504     putchar('\n');
505 }
506 #endif
507 
508 void
509 set_nullable(void)
510 {
511     int i, j;
512     int empty;
513     int done;
514 
515     nullable = MALLOC(nsyms);
516     if (nullable == 0) no_space();
517 
518     memset(nullable, 0, nsyms);
519 
520     done = 0;
521     while (!done)
522     {
523 	done = 1;
524 	for (i = 1; i < nitems; i++)
525 	{
526 	    empty = 1;
527 	    while ((j = ritem[i]) >= 0)
528 	    {
529 		if (!nullable[j])
530 		    empty = 0;
531 		++i;
532 	    }
533 	    if (empty)
534 	    {
535 		j = rlhs[-j];
536 		if (!nullable[j])
537 		{
538 		    nullable[j] = 1;
539 		    done = 0;
540 		}
541 	    }
542 	}
543     }
544 
545 #ifdef DEBUG
546     for (i = 0; i < nsyms; i++)
547     {
548 	if (nullable[i])
549 	    printf("%s is nullable\n", symbol_name[i]);
550 	else
551 	    printf("%s is not nullable\n", symbol_name[i]);
552     }
553 #endif
554 }
555 
556 void
557 free_nullable(void)
558 {
559     FREE(nullable);
560 }
561 
562 void
563 lr0(void)
564 {
565     set_derives();
566     set_nullable();
567     generate_states();
568 }
569