1 /* $OpenBSD: lalr.c,v 1.8 2003/06/19 16:34:53 pvalchev Exp $ */ 2 /* $NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989 The Regents of the University of California. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Robert Paul Corbett. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 #if 0 38 static char sccsid[] = "@(#)lalr.c 5.3 (Berkeley) 6/1/90"; 39 #else 40 static char rcsid[] = "$OpenBSD: lalr.c,v 1.8 2003/06/19 16:34:53 pvalchev Exp $"; 41 #endif 42 #endif /* not lint */ 43 44 #include "defs.h" 45 46 typedef 47 struct shorts 48 { 49 struct shorts *next; 50 short value; 51 } 52 shorts; 53 54 int tokensetsize; 55 short *lookaheads; 56 short *LAruleno; 57 unsigned *LA; 58 short *accessing_symbol; 59 core **state_table; 60 shifts **shift_table; 61 reductions **reduction_table; 62 short *goto_map; 63 short *from_state; 64 short *to_state; 65 66 short **transpose(); 67 void set_state_table(void); 68 void set_accessing_symbol(void); 69 void set_shift_table(void); 70 void set_reduction_table(void); 71 void set_maxrhs(void); 72 void initialize_LA(void); 73 void set_goto_map(void); 74 void initialize_F(void); 75 void build_relations(void); 76 void compute_FOLLOWS(void); 77 void compute_lookaheads(void); 78 int map_goto(int, int); 79 void digraph(short **); 80 void add_lookback_edge(int, int, int); 81 void traverse(int); 82 83 static int infinity; 84 static int maxrhs; 85 static int ngotos; 86 static unsigned *F; 87 static short **includes; 88 static shorts **lookback; 89 static short **R; 90 static short *INDEX; 91 static short *VERTICES; 92 static int top; 93 94 void 95 lalr(void) 96 { 97 tokensetsize = WORDSIZE(ntokens); 98 99 set_state_table(); 100 set_accessing_symbol(); 101 set_shift_table(); 102 set_reduction_table(); 103 set_maxrhs(); 104 initialize_LA(); 105 set_goto_map(); 106 initialize_F(); 107 build_relations(); 108 compute_FOLLOWS(); 109 compute_lookaheads(); 110 } 111 112 113 void 114 set_state_table(void) 115 { 116 core *sp; 117 118 state_table = NEW2(nstates, core *); 119 for (sp = first_state; sp; sp = sp->next) 120 state_table[sp->number] = sp; 121 } 122 123 124 void 125 set_accessing_symbol(void) 126 { 127 core *sp; 128 129 accessing_symbol = NEW2(nstates, short); 130 for (sp = first_state; sp; sp = sp->next) 131 accessing_symbol[sp->number] = sp->accessing_symbol; 132 } 133 134 135 void 136 set_shift_table(void) 137 { 138 shifts *sp; 139 140 shift_table = NEW2(nstates, shifts *); 141 for (sp = first_shift; sp; sp = sp->next) 142 shift_table[sp->number] = sp; 143 } 144 145 146 void 147 set_reduction_table(void) 148 { 149 reductions *rp; 150 151 reduction_table = NEW2(nstates, reductions *); 152 for (rp = first_reduction; rp; rp = rp->next) 153 reduction_table[rp->number] = rp; 154 } 155 156 157 void 158 set_maxrhs(void) 159 { 160 short *itemp; 161 short *item_end; 162 int length; 163 int max; 164 165 length = 0; 166 max = 0; 167 item_end = ritem + nitems; 168 for (itemp = ritem; itemp < item_end; itemp++) 169 { 170 if (*itemp >= 0) 171 { 172 length++; 173 } 174 else 175 { 176 if (length > max) max = length; 177 length = 0; 178 } 179 } 180 181 maxrhs = max; 182 } 183 184 185 void 186 initialize_LA(void) 187 { 188 int i, j, k; 189 reductions *rp; 190 191 lookaheads = NEW2(nstates + 1, short); 192 193 k = 0; 194 for (i = 0; i < nstates; i++) 195 { 196 lookaheads[i] = k; 197 rp = reduction_table[i]; 198 if (rp) 199 k += rp->nreds; 200 } 201 lookaheads[nstates] = k; 202 203 LA = NEW2(k * tokensetsize, unsigned); 204 LAruleno = NEW2(k, short); 205 lookback = NEW2(k, shorts *); 206 207 k = 0; 208 for (i = 0; i < nstates; i++) 209 { 210 rp = reduction_table[i]; 211 if (rp) 212 { 213 for (j = 0; j < rp->nreds; j++) 214 { 215 LAruleno[k] = rp->rules[j]; 216 k++; 217 } 218 } 219 } 220 } 221 222 void 223 set_goto_map(void) 224 { 225 shifts *sp; 226 int i; 227 int symbol; 228 int k; 229 short *temp_map; 230 int state2; 231 int state1; 232 233 goto_map = NEW2(nvars + 1, short) - ntokens; 234 temp_map = NEW2(nvars + 1, short) - ntokens; 235 236 ngotos = 0; 237 for (sp = first_shift; sp; sp = sp->next) 238 { 239 for (i = sp->nshifts - 1; i >= 0; i--) 240 { 241 symbol = accessing_symbol[sp->shift[i]]; 242 243 if (ISTOKEN(symbol)) break; 244 245 if (ngotos == MAXSHORT) 246 fatal("too many gotos"); 247 248 ngotos++; 249 goto_map[symbol]++; 250 } 251 } 252 253 k = 0; 254 for (i = ntokens; i < nsyms; i++) 255 { 256 temp_map[i] = k; 257 k += goto_map[i]; 258 } 259 260 for (i = ntokens; i < nsyms; i++) 261 goto_map[i] = temp_map[i]; 262 263 goto_map[nsyms] = ngotos; 264 temp_map[nsyms] = ngotos; 265 266 from_state = NEW2(ngotos, short); 267 to_state = NEW2(ngotos, short); 268 269 for (sp = first_shift; sp; sp = sp->next) 270 { 271 state1 = sp->number; 272 for (i = sp->nshifts - 1; i >= 0; i--) 273 { 274 state2 = sp->shift[i]; 275 symbol = accessing_symbol[state2]; 276 277 if (ISTOKEN(symbol)) break; 278 279 k = temp_map[symbol]++; 280 from_state[k] = state1; 281 to_state[k] = state2; 282 } 283 } 284 285 FREE(temp_map + ntokens); 286 } 287 288 289 290 /* Map_goto maps a state/symbol pair into its numeric representation. */ 291 292 int 293 map_goto(int state, int symbol) 294 { 295 int high; 296 int low; 297 int middle; 298 int s; 299 300 low = goto_map[symbol]; 301 high = goto_map[symbol + 1]; 302 303 for (;;) 304 { 305 assert(low <= high); 306 middle = (low + high) >> 1; 307 s = from_state[middle]; 308 if (s == state) 309 return (middle); 310 else if (s < state) 311 low = middle + 1; 312 else 313 high = middle - 1; 314 } 315 } 316 317 318 void 319 initialize_F(void) 320 { 321 int i; 322 int j; 323 int k; 324 shifts *sp; 325 short *edge; 326 unsigned *rowp; 327 short *rp; 328 short **reads; 329 int nedges; 330 int stateno; 331 int symbol; 332 int nwords; 333 334 nwords = ngotos * tokensetsize; 335 F = NEW2(nwords, unsigned); 336 337 reads = NEW2(ngotos, short *); 338 edge = NEW2(ngotos + 1, short); 339 nedges = 0; 340 341 rowp = F; 342 for (i = 0; i < ngotos; i++) 343 { 344 stateno = to_state[i]; 345 sp = shift_table[stateno]; 346 347 if (sp) 348 { 349 k = sp->nshifts; 350 351 for (j = 0; j < k; j++) 352 { 353 symbol = accessing_symbol[sp->shift[j]]; 354 if (ISVAR(symbol)) 355 break; 356 SETBIT(rowp, symbol); 357 } 358 359 for (; j < k; j++) 360 { 361 symbol = accessing_symbol[sp->shift[j]]; 362 if (nullable[symbol]) 363 edge[nedges++] = map_goto(stateno, symbol); 364 } 365 366 if (nedges) 367 { 368 reads[i] = rp = NEW2(nedges + 1, short); 369 370 for (j = 0; j < nedges; j++) 371 rp[j] = edge[j]; 372 373 rp[nedges] = -1; 374 nedges = 0; 375 } 376 } 377 378 rowp += tokensetsize; 379 } 380 381 SETBIT(F, 0); 382 digraph(reads); 383 384 for (i = 0; i < ngotos; i++) 385 { 386 if (reads[i]) 387 FREE(reads[i]); 388 } 389 390 FREE(reads); 391 FREE(edge); 392 } 393 394 395 void 396 build_relations(void) 397 { 398 int i; 399 int j; 400 int k; 401 short *rulep; 402 short *rp; 403 shifts *sp; 404 int length; 405 int nedges; 406 int done; 407 int state1; 408 int stateno; 409 int symbol1; 410 int symbol2; 411 short *shortp; 412 short *edge; 413 short *states; 414 short **new_includes; 415 416 includes = NEW2(ngotos, short *); 417 edge = NEW2(ngotos + 1, short); 418 states = NEW2(maxrhs + 1, short); 419 420 for (i = 0; i < ngotos; i++) 421 { 422 nedges = 0; 423 state1 = from_state[i]; 424 symbol1 = accessing_symbol[to_state[i]]; 425 426 for (rulep = derives[symbol1]; *rulep >= 0; rulep++) 427 { 428 length = 1; 429 states[0] = state1; 430 stateno = state1; 431 432 for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) 433 { 434 symbol2 = *rp; 435 sp = shift_table[stateno]; 436 k = sp->nshifts; 437 438 for (j = 0; j < k; j++) 439 { 440 stateno = sp->shift[j]; 441 if (accessing_symbol[stateno] == symbol2) break; 442 } 443 444 states[length++] = stateno; 445 } 446 447 add_lookback_edge(stateno, *rulep, i); 448 449 length--; 450 done = 0; 451 while (!done) 452 { 453 done = 1; 454 rp--; 455 if (ISVAR(*rp)) 456 { 457 stateno = states[--length]; 458 edge[nedges++] = map_goto(stateno, *rp); 459 if (nullable[*rp] && length > 0) done = 0; 460 } 461 } 462 } 463 464 if (nedges) 465 { 466 includes[i] = shortp = NEW2(nedges + 1, short); 467 for (j = 0; j < nedges; j++) 468 shortp[j] = edge[j]; 469 shortp[nedges] = -1; 470 } 471 } 472 473 new_includes = transpose(includes, ngotos); 474 475 for (i = 0; i < ngotos; i++) 476 if (includes[i]) 477 FREE(includes[i]); 478 479 FREE(includes); 480 481 includes = new_includes; 482 483 FREE(edge); 484 FREE(states); 485 } 486 487 void 488 add_lookback_edge(int stateno, int ruleno, int gotono) 489 { 490 int i, k; 491 int found; 492 shorts *sp; 493 494 i = lookaheads[stateno]; 495 k = lookaheads[stateno + 1]; 496 found = 0; 497 while (!found && i < k) 498 { 499 if (LAruleno[i] == ruleno) 500 found = 1; 501 else 502 ++i; 503 } 504 assert(found); 505 506 sp = NEW(shorts); 507 sp->next = lookback[i]; 508 sp->value = gotono; 509 lookback[i] = sp; 510 } 511 512 513 514 short ** 515 transpose(short **R, int n) 516 { 517 short **new_R; 518 short **temp_R; 519 short *nedges; 520 short *sp; 521 int i; 522 int k; 523 524 nedges = NEW2(n, short); 525 526 for (i = 0; i < n; i++) 527 { 528 sp = R[i]; 529 if (sp) 530 { 531 while (*sp >= 0) 532 nedges[*sp++]++; 533 } 534 } 535 536 new_R = NEW2(n, short *); 537 temp_R = NEW2(n, short *); 538 539 for (i = 0; i < n; i++) 540 { 541 k = nedges[i]; 542 if (k > 0) 543 { 544 sp = NEW2(k + 1, short); 545 new_R[i] = sp; 546 temp_R[i] = sp; 547 sp[k] = -1; 548 } 549 } 550 551 FREE(nedges); 552 553 for (i = 0; i < n; i++) 554 { 555 sp = R[i]; 556 if (sp) 557 { 558 while (*sp >= 0) 559 *temp_R[*sp++]++ = i; 560 } 561 } 562 563 FREE(temp_R); 564 565 return (new_R); 566 } 567 568 569 void 570 compute_FOLLOWS(void) 571 { 572 digraph(includes); 573 } 574 575 void 576 compute_lookaheads(void) 577 { 578 int i, n; 579 unsigned *fp1, *fp2, *fp3; 580 shorts *sp, *next; 581 unsigned *rowp; 582 583 rowp = LA; 584 n = lookaheads[nstates]; 585 for (i = 0; i < n; i++) 586 { 587 fp3 = rowp + tokensetsize; 588 for (sp = lookback[i]; sp; sp = sp->next) 589 { 590 fp1 = rowp; 591 fp2 = F + tokensetsize * sp->value; 592 while (fp1 < fp3) 593 *fp1++ |= *fp2++; 594 } 595 rowp = fp3; 596 } 597 598 for (i = 0; i < n; i++) 599 for (sp = lookback[i]; sp; sp = next) 600 { 601 next = sp->next; 602 FREE(sp); 603 } 604 605 FREE(lookback); 606 FREE(F); 607 } 608 609 void 610 digraph(short **relation) 611 { 612 int i; 613 614 infinity = ngotos + 2; 615 INDEX = NEW2(ngotos + 1, short); 616 VERTICES = NEW2(ngotos + 1, short); 617 top = 0; 618 619 R = relation; 620 621 for (i = 0; i < ngotos; i++) 622 INDEX[i] = 0; 623 624 for (i = 0; i < ngotos; i++) 625 { 626 if (INDEX[i] == 0 && R[i]) 627 traverse(i); 628 } 629 630 FREE(INDEX); 631 FREE(VERTICES); 632 } 633 634 635 void 636 traverse(int i) 637 { 638 unsigned *fp1; 639 unsigned *fp2; 640 unsigned *fp3; 641 int j; 642 short *rp; 643 644 int height; 645 unsigned *base; 646 647 VERTICES[++top] = i; 648 INDEX[i] = height = top; 649 650 base = F + i * tokensetsize; 651 fp3 = base + tokensetsize; 652 653 rp = R[i]; 654 if (rp) 655 { 656 while ((j = *rp++) >= 0) 657 { 658 if (INDEX[j] == 0) 659 traverse(j); 660 661 if (INDEX[i] > INDEX[j]) 662 INDEX[i] = INDEX[j]; 663 664 fp1 = base; 665 fp2 = F + j * tokensetsize; 666 667 while (fp1 < fp3) 668 *fp1++ |= *fp2++; 669 } 670 } 671 672 if (INDEX[i] == height) 673 { 674 for (;;) 675 { 676 j = VERTICES[top--]; 677 INDEX[j] = infinity; 678 679 if (i == j) 680 break; 681 682 fp1 = base; 683 fp2 = F + j * tokensetsize; 684 685 while (fp1 < fp3) 686 *fp2++ = *fp1++; 687 } 688 } 689 } 690