1 /* dfa - DFA construction routines */ 2 3 /* Copyright (c) 1990 The Regents of the University of California. */ 4 /* All rights reserved. */ 5 6 /* This code is derived from software contributed to Berkeley by */ 7 /* Vern Paxson. */ 8 9 /* The United States Government has rights in this work pursuant */ 10 /* to contract no. DE-AC03-76SF00098 between the United States */ 11 /* Department of Energy and the University of California. */ 12 13 /* Redistribution and use in source and binary forms, with or without */ 14 /* modification, are permitted provided that the following conditions */ 15 /* are met: */ 16 17 /* 1. Redistributions of source code must retain the above copyright */ 18 /* notice, this list of conditions and the following disclaimer. */ 19 /* 2. Redistributions in binary form must reproduce the above copyright */ 20 /* notice, this list of conditions and the following disclaimer in the */ 21 /* documentation and/or other materials provided with the distribution. */ 22 23 /* Neither the name of the University nor the names of its contributors */ 24 /* may be used to endorse or promote products derived from this software */ 25 /* without specific prior written permission. */ 26 27 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 28 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 29 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 30 /* PURPOSE. */ 31 #include "flexdef.h" 32 __RCSID("$NetBSD: dfa.c,v 1.2 2016/01/09 17:38:57 christos Exp $"); 33 34 #include "tables.h" 35 36 /* declare functions that have forward references */ 37 38 void dump_associated_rules PROTO ((FILE *, int)); 39 void dump_transitions PROTO ((FILE *, int[])); 40 void sympartition PROTO ((int[], int, int[], int[])); 41 int symfollowset PROTO ((int[], int, int, int[])); 42 43 44 /* check_for_backing_up - check a DFA state for backing up 45 * 46 * synopsis 47 * void check_for_backing_up( int ds, int state[numecs] ); 48 * 49 * ds is the number of the state to check and state[] is its out-transitions, 50 * indexed by equivalence class. 51 */ 52 53 void check_for_backing_up (ds, state) 54 int ds; 55 int state[]; 56 { 57 if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ 58 ++num_backing_up; 59 60 if (backing_up_report) { 61 fprintf (backing_up_file, 62 _("State #%d is non-accepting -\n"), ds); 63 64 /* identify the state */ 65 dump_associated_rules (backing_up_file, ds); 66 67 /* Now identify it further using the out- and 68 * jam-transitions. 69 */ 70 dump_transitions (backing_up_file, state); 71 72 putc ('\n', backing_up_file); 73 } 74 } 75 } 76 77 78 /* check_trailing_context - check to see if NFA state set constitutes 79 * "dangerous" trailing context 80 * 81 * synopsis 82 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 83 * int accset[nacc+1], int nacc ); 84 * 85 * NOTES 86 * Trailing context is "dangerous" if both the head and the trailing 87 * part are of variable size \and/ there's a DFA state which contains 88 * both an accepting state for the head part of the rule and NFA states 89 * which occur after the beginning of the trailing context. 90 * 91 * When such a rule is matched, it's impossible to tell if having been 92 * in the DFA state indicates the beginning of the trailing context or 93 * further-along scanning of the pattern. In these cases, a warning 94 * message is issued. 95 * 96 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 97 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 98 */ 99 100 void check_trailing_context (nfa_states, num_states, accset, nacc) 101 int *nfa_states, num_states; 102 int *accset; 103 int nacc; 104 { 105 int i, j; 106 107 for (i = 1; i <= num_states; ++i) { 108 int ns = nfa_states[i]; 109 int type = state_type[ns]; 110 int ar = assoc_rule[ns]; 111 112 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ 113 } 114 115 else if (type == STATE_TRAILING_CONTEXT) { 116 /* Potential trouble. Scan set of accepting numbers 117 * for the one marking the end of the "head". We 118 * assume that this looping will be fairly cheap 119 * since it's rare that an accepting number set 120 * is large. 121 */ 122 for (j = 1; j <= nacc; ++j) 123 if (accset[j] & YY_TRAILING_HEAD_MASK) { 124 line_warning (_ 125 ("dangerous trailing context"), 126 rule_linenum[ar]); 127 return; 128 } 129 } 130 } 131 } 132 133 134 /* dump_associated_rules - list the rules associated with a DFA state 135 * 136 * Goes through the set of NFA states associated with the DFA and 137 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 138 * and writes a report to the given file. 139 */ 140 141 void dump_associated_rules (file, ds) 142 FILE *file; 143 int ds; 144 { 145 int i, j; 146 int num_associated_rules = 0; 147 int rule_set[MAX_ASSOC_RULES + 1]; 148 int *dset = dss[ds]; 149 int size = dfasiz[ds]; 150 151 for (i = 1; i <= size; ++i) { 152 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 153 154 for (j = 1; j <= num_associated_rules; ++j) 155 if (rule_num == rule_set[j]) 156 break; 157 158 if (j > num_associated_rules) { /* new rule */ 159 if (num_associated_rules < MAX_ASSOC_RULES) 160 rule_set[++num_associated_rules] = 161 rule_num; 162 } 163 } 164 165 qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp); 166 167 fprintf (file, _(" associated rule line numbers:")); 168 169 for (i = 1; i <= num_associated_rules; ++i) { 170 if (i % 8 == 1) 171 putc ('\n', file); 172 173 fprintf (file, "\t%d", rule_set[i]); 174 } 175 176 putc ('\n', file); 177 } 178 179 180 /* dump_transitions - list the transitions associated with a DFA state 181 * 182 * synopsis 183 * dump_transitions( FILE *file, int state[numecs] ); 184 * 185 * Goes through the set of out-transitions and lists them in human-readable 186 * form (i.e., not as equivalence classes); also lists jam transitions 187 * (i.e., all those which are not out-transitions, plus EOF). The dump 188 * is done to the given file. 189 */ 190 191 void dump_transitions (file, state) 192 FILE *file; 193 int state[]; 194 { 195 int i, ec; 196 int out_char_set[CSIZE]; 197 198 for (i = 0; i < csize; ++i) { 199 ec = ABS (ecgroup[i]); 200 out_char_set[i] = state[ec]; 201 } 202 203 fprintf (file, _(" out-transitions: ")); 204 205 list_character_set (file, out_char_set); 206 207 /* now invert the members of the set to get the jam transitions */ 208 for (i = 0; i < csize; ++i) 209 out_char_set[i] = !out_char_set[i]; 210 211 fprintf (file, _("\n jam-transitions: EOF ")); 212 213 list_character_set (file, out_char_set); 214 215 putc ('\n', file); 216 } 217 218 219 /* epsclosure - construct the epsilon closure of a set of ndfa states 220 * 221 * synopsis 222 * int *epsclosure( int t[num_states], int *numstates_addr, 223 * int accset[num_rules+1], int *nacc_addr, 224 * int *hashval_addr ); 225 * 226 * NOTES 227 * The epsilon closure is the set of all states reachable by an arbitrary 228 * number of epsilon transitions, which themselves do not have epsilon 229 * transitions going out, unioned with the set of states which have non-null 230 * accepting numbers. t is an array of size numstates of nfa state numbers. 231 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 232 * accset holds a list of the accepting numbers, and the size of accset is 233 * given by *nacc_addr. t may be subjected to reallocation if it is not 234 * large enough to hold the epsilon closure. 235 * 236 * hashval is the hash value for the dfa corresponding to the state set. 237 */ 238 239 int *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr) 240 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 241 { 242 int stkpos, ns, tsp; 243 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 244 int stkend, nstate; 245 static int did_stk_init = false, *stk; 246 247 #define MARK_STATE(state) \ 248 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) 249 250 #define IS_MARKED(state) (trans1[state] < 0) 251 252 #define UNMARK_STATE(state) \ 253 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) 254 255 #define CHECK_ACCEPT(state) \ 256 do{ \ 257 nfaccnum = accptnum[state]; \ 258 if ( nfaccnum != NIL ) \ 259 accset[++nacc] = nfaccnum; \ 260 }while(0) 261 262 #define DO_REALLOCATION() \ 263 do { \ 264 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 265 ++num_reallocs; \ 266 t = reallocate_integer_array( t, current_max_dfa_size ); \ 267 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 268 }while(0) \ 269 270 #define PUT_ON_STACK(state) \ 271 do { \ 272 if ( ++stkend >= current_max_dfa_size ) \ 273 DO_REALLOCATION(); \ 274 stk[stkend] = state; \ 275 MARK_STATE(state); \ 276 }while(0) 277 278 #define ADD_STATE(state) \ 279 do { \ 280 if ( ++numstates >= current_max_dfa_size ) \ 281 DO_REALLOCATION(); \ 282 t[numstates] = state; \ 283 hashval += state; \ 284 }while(0) 285 286 #define STACK_STATE(state) \ 287 do { \ 288 PUT_ON_STACK(state); \ 289 CHECK_ACCEPT(state); \ 290 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 291 ADD_STATE(state); \ 292 }while(0) 293 294 295 if (!did_stk_init) { 296 stk = allocate_integer_array (current_max_dfa_size); 297 did_stk_init = true; 298 } 299 300 nacc = stkend = hashval = 0; 301 302 for (nstate = 1; nstate <= numstates; ++nstate) { 303 ns = t[nstate]; 304 305 /* The state could be marked if we've already pushed it onto 306 * the stack. 307 */ 308 if (!IS_MARKED (ns)) { 309 PUT_ON_STACK (ns); 310 CHECK_ACCEPT (ns); 311 hashval += ns; 312 } 313 } 314 315 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 316 ns = stk[stkpos]; 317 transsym = transchar[ns]; 318 319 if (transsym == SYM_EPSILON) { 320 tsp = trans1[ns] + MARKER_DIFFERENCE; 321 322 if (tsp != NO_TRANSITION) { 323 if (!IS_MARKED (tsp)) 324 STACK_STATE (tsp); 325 326 tsp = trans2[ns]; 327 328 if (tsp != NO_TRANSITION 329 && !IS_MARKED (tsp)) 330 STACK_STATE (tsp); 331 } 332 } 333 } 334 335 /* Clear out "visit" markers. */ 336 337 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 338 if (IS_MARKED (stk[stkpos])) 339 UNMARK_STATE (stk[stkpos]); 340 else 341 flexfatal (_ 342 ("consistency check failed in epsclosure()")); 343 } 344 345 *ns_addr = numstates; 346 *hv_addr = hashval; 347 *nacc_addr = nacc; 348 349 return t; 350 } 351 352 353 /* increase_max_dfas - increase the maximum number of DFAs */ 354 355 void increase_max_dfas () 356 { 357 current_max_dfas += MAX_DFAS_INCREMENT; 358 359 ++num_reallocs; 360 361 base = reallocate_integer_array (base, current_max_dfas); 362 def = reallocate_integer_array (def, current_max_dfas); 363 dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); 364 accsiz = reallocate_integer_array (accsiz, current_max_dfas); 365 dhash = reallocate_integer_array (dhash, current_max_dfas); 366 dss = reallocate_int_ptr_array (dss, current_max_dfas); 367 dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas); 368 369 if (nultrans) 370 nultrans = 371 reallocate_integer_array (nultrans, 372 current_max_dfas); 373 } 374 375 376 /* ntod - convert an ndfa to a dfa 377 * 378 * Creates the dfa corresponding to the ndfa we've constructed. The 379 * dfa starts out in state #1. 380 */ 381 382 void ntod () 383 { 384 int *accset, ds, nacc, newds; 385 int sym, hashval, numstates, dsize; 386 int num_full_table_rows=0; /* used only for -f */ 387 int *nset, *dset; 388 int targptr, totaltrans, i, comstate, comfreq, targ; 389 int symlist[CSIZE + 1]; 390 int num_start_states; 391 int todo_head, todo_next; 392 393 struct yytbl_data *yynxt_tbl = 0; 394 flex_int32_t *yynxt_data = 0, yynxt_curr = 0; 395 396 /* Note that the following are indexed by *equivalence classes* 397 * and not by characters. Since equivalence classes are indexed 398 * beginning with 1, even if the scanner accepts NUL's, this 399 * means that (since every character is potentially in its own 400 * equivalence class) these arrays must have room for indices 401 * from 1 to CSIZE, so their size must be CSIZE + 1. 402 */ 403 int duplist[CSIZE + 1], state[CSIZE + 1]; 404 int targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1]; 405 406 /* accset needs to be large enough to hold all of the rules present 407 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. 408 */ 409 accset = allocate_integer_array ((num_rules + 1) * 2); 410 nset = allocate_integer_array (current_max_dfa_size); 411 412 /* The "todo" queue is represented by the head, which is the DFA 413 * state currently being processed, and the "next", which is the 414 * next DFA state number available (not in use). We depend on the 415 * fact that snstods() returns DFA's \in increasing order/, and thus 416 * need only know the bounds of the dfas to be processed. 417 */ 418 todo_head = todo_next = 0; 419 420 for (i = 0; i <= csize; ++i) { 421 duplist[i] = NIL; 422 symlist[i] = false; 423 } 424 425 for (i = 0; i <= num_rules; ++i) 426 accset[i] = NIL; 427 428 if (trace) { 429 dumpnfa (scset[1]); 430 fputs (_("\n\nDFA Dump:\n\n"), stderr); 431 } 432 433 inittbl (); 434 435 /* Check to see whether we should build a separate table for 436 * transitions on NUL characters. We don't do this for full-speed 437 * (-F) scanners, since for them we don't have a simple state 438 * number lying around with which to index the table. We also 439 * don't bother doing it for scanners unless (1) NUL is in its own 440 * equivalence class (indicated by a positive value of 441 * ecgroup[NUL]), (2) NUL's equivalence class is the last 442 * equivalence class, and (3) the number of equivalence classes is 443 * the same as the number of characters. This latter case comes 444 * about when useecs is false or when it's true but every character 445 * still manages to land in its own class (unlikely, but it's 446 * cheap to check for). If all these things are true then the 447 * character code needed to represent NUL's equivalence class for 448 * indexing the tables is going to take one more bit than the 449 * number of characters, and therefore we won't be assured of 450 * being able to fit it into a YY_CHAR variable. This rules out 451 * storing the transitions in a compressed table, since the code 452 * for interpreting them uses a YY_CHAR variable (perhaps it 453 * should just use an integer, though; this is worth pondering ... 454 * ###). 455 * 456 * Finally, for full tables, we want the number of entries in the 457 * table to be a power of two so the array references go fast (it 458 * will just take a shift to compute the major index). If 459 * encoding NUL's transitions in the table will spoil this, we 460 * give it its own table (note that this will be the case if we're 461 * not using equivalence classes). 462 */ 463 464 /* Note that the test for ecgroup[0] == numecs below accomplishes 465 * both (1) and (2) above 466 */ 467 if (!fullspd && ecgroup[0] == numecs) { 468 /* NUL is alone in its equivalence class, which is the 469 * last one. 470 */ 471 int use_NUL_table = (numecs == csize); 472 473 if (fulltbl && !use_NUL_table) { 474 /* We still may want to use the table if numecs 475 * is a power of 2. 476 */ 477 int power_of_two; 478 479 for (power_of_two = 1; power_of_two <= csize; 480 power_of_two *= 2) 481 if (numecs == power_of_two) { 482 use_NUL_table = true; 483 break; 484 } 485 } 486 487 if (use_NUL_table) 488 nultrans = 489 allocate_integer_array (current_max_dfas); 490 491 /* From now on, nultrans != nil indicates that we're 492 * saving null transitions for later, separate encoding. 493 */ 494 } 495 496 497 if (fullspd) { 498 for (i = 0; i <= numecs; ++i) 499 state[i] = 0; 500 501 place_state (state, 0, 0); 502 dfaacc[0].dfaacc_state = 0; 503 } 504 505 else if (fulltbl) { 506 if (nultrans) 507 /* We won't be including NUL's transitions in the 508 * table, so build it for entries from 0 .. numecs - 1. 509 */ 510 num_full_table_rows = numecs; 511 512 else 513 /* Take into account the fact that we'll be including 514 * the NUL entries in the transition table. Build it 515 * from 0 .. numecs. 516 */ 517 num_full_table_rows = numecs + 1; 518 519 /* Begin generating yy_nxt[][] 520 * This spans the entire LONG function. 521 * This table is tricky because we don't know how big it will be. 522 * So we'll have to realloc() on the way... 523 * we'll wait until we can calculate yynxt_tbl->td_hilen. 524 */ 525 yynxt_tbl = 526 (struct yytbl_data *) calloc (1, 527 sizeof (struct 528 yytbl_data)); 529 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); 530 yynxt_tbl->td_hilen = 1; 531 yynxt_tbl->td_lolen = num_full_table_rows; 532 yynxt_tbl->td_data = yynxt_data = 533 (flex_int32_t *) calloc (yynxt_tbl->td_lolen * 534 yynxt_tbl->td_hilen, 535 sizeof (flex_int32_t)); 536 yynxt_curr = 0; 537 538 buf_prints (&yydmap_buf, 539 "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", 540 long_align ? "flex_int32_t" : "flex_int16_t"); 541 542 /* Unless -Ca, declare it "short" because it's a real 543 * long-shot that that won't be large enough. 544 */ 545 if (gentables) 546 out_str_dec 547 ("static yyconst %s yy_nxt[][%d] =\n {\n", 548 long_align ? "flex_int32_t" : "flex_int16_t", 549 num_full_table_rows); 550 else { 551 out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); 552 out_str ("static yyconst %s *yy_nxt =0;\n", 553 long_align ? "flex_int32_t" : "flex_int16_t"); 554 } 555 556 557 if (gentables) 558 outn (" {"); 559 560 /* Generate 0 entries for state #0. */ 561 for (i = 0; i < num_full_table_rows; ++i) { 562 mk2data (0); 563 yynxt_data[yynxt_curr++] = 0; 564 } 565 566 dataflush (); 567 if (gentables) 568 outn (" },\n"); 569 } 570 571 /* Create the first states. */ 572 573 num_start_states = lastsc * 2; 574 575 for (i = 1; i <= num_start_states; ++i) { 576 numstates = 1; 577 578 /* For each start condition, make one state for the case when 579 * we're at the beginning of the line (the '^' operator) and 580 * one for the case when we're not. 581 */ 582 if (i % 2 == 1) 583 nset[numstates] = scset[(i / 2) + 1]; 584 else 585 nset[numstates] = 586 mkbranch (scbol[i / 2], scset[i / 2]); 587 588 nset = epsclosure (nset, &numstates, accset, &nacc, 589 &hashval); 590 591 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { 592 numas += nacc; 593 totnst += numstates; 594 ++todo_next; 595 596 if (variable_trailing_context_rules && nacc > 0) 597 check_trailing_context (nset, numstates, 598 accset, nacc); 599 } 600 } 601 602 if (!fullspd) { 603 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) 604 flexfatal (_ 605 ("could not create unique end-of-buffer state")); 606 607 ++numas; 608 ++num_start_states; 609 ++todo_next; 610 } 611 612 613 while (todo_head < todo_next) { 614 targptr = 0; 615 totaltrans = 0; 616 617 for (i = 1; i <= numecs; ++i) 618 state[i] = 0; 619 620 ds = ++todo_head; 621 622 dset = dss[ds]; 623 dsize = dfasiz[ds]; 624 625 if (trace) 626 fprintf (stderr, _("state # %d:\n"), ds); 627 628 sympartition (dset, dsize, symlist, duplist); 629 630 for (sym = 1; sym <= numecs; ++sym) { 631 if (symlist[sym]) { 632 symlist[sym] = 0; 633 634 if (duplist[sym] == NIL) { 635 /* Symbol has unique out-transitions. */ 636 numstates = 637 symfollowset (dset, dsize, 638 sym, nset); 639 nset = epsclosure (nset, 640 &numstates, 641 accset, &nacc, 642 &hashval); 643 644 if (snstods 645 (nset, numstates, accset, nacc, 646 hashval, &newds)) { 647 totnst = totnst + 648 numstates; 649 ++todo_next; 650 numas += nacc; 651 652 if (variable_trailing_context_rules && nacc > 0) 653 check_trailing_context 654 (nset, 655 numstates, 656 accset, 657 nacc); 658 } 659 660 state[sym] = newds; 661 662 if (trace) 663 fprintf (stderr, 664 "\t%d\t%d\n", sym, 665 newds); 666 667 targfreq[++targptr] = 1; 668 targstate[targptr] = newds; 669 ++numuniq; 670 } 671 672 else { 673 /* sym's equivalence class has the same 674 * transitions as duplist(sym)'s 675 * equivalence class. 676 */ 677 targ = state[duplist[sym]]; 678 state[sym] = targ; 679 680 if (trace) 681 fprintf (stderr, 682 "\t%d\t%d\n", sym, 683 targ); 684 685 /* Update frequency count for 686 * destination state. 687 */ 688 689 i = 0; 690 while (targstate[++i] != targ) ; 691 692 ++targfreq[i]; 693 ++numdup; 694 } 695 696 ++totaltrans; 697 duplist[sym] = NIL; 698 } 699 } 700 701 702 numsnpairs += totaltrans; 703 704 if (ds > num_start_states) 705 check_for_backing_up (ds, state); 706 707 if (nultrans) { 708 nultrans[ds] = state[NUL_ec]; 709 state[NUL_ec] = 0; /* remove transition */ 710 } 711 712 if (fulltbl) { 713 714 /* Each time we hit here, it's another td_hilen, so we realloc. */ 715 yynxt_tbl->td_hilen++; 716 yynxt_tbl->td_data = yynxt_data = 717 (flex_int32_t *) realloc (yynxt_data, 718 yynxt_tbl->td_hilen * 719 yynxt_tbl->td_lolen * 720 sizeof (flex_int32_t)); 721 722 723 if (gentables) 724 outn (" {"); 725 726 /* Supply array's 0-element. */ 727 if (ds == end_of_buffer_state) { 728 mk2data (-end_of_buffer_state); 729 yynxt_data[yynxt_curr++] = 730 -end_of_buffer_state; 731 } 732 else { 733 mk2data (end_of_buffer_state); 734 yynxt_data[yynxt_curr++] = 735 end_of_buffer_state; 736 } 737 738 for (i = 1; i < num_full_table_rows; ++i) { 739 /* Jams are marked by negative of state 740 * number. 741 */ 742 mk2data (state[i] ? state[i] : -ds); 743 yynxt_data[yynxt_curr++] = 744 state[i] ? state[i] : -ds; 745 } 746 747 dataflush (); 748 if (gentables) 749 outn (" },\n"); 750 } 751 752 else if (fullspd) 753 place_state (state, ds, totaltrans); 754 755 else if (ds == end_of_buffer_state) 756 /* Special case this state to make sure it does what 757 * it's supposed to, i.e., jam on end-of-buffer. 758 */ 759 stack1 (ds, 0, 0, JAMSTATE); 760 761 else { /* normal, compressed state */ 762 763 /* Determine which destination state is the most 764 * common, and how many transitions to it there are. 765 */ 766 767 comfreq = 0; 768 comstate = 0; 769 770 for (i = 1; i <= targptr; ++i) 771 if (targfreq[i] > comfreq) { 772 comfreq = targfreq[i]; 773 comstate = targstate[i]; 774 } 775 776 bldtbl (state, ds, totaltrans, comstate, comfreq); 777 } 778 } 779 780 if (fulltbl) { 781 dataend (); 782 if (tablesext) { 783 yytbl_data_compress (yynxt_tbl); 784 if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) 785 flexerror (_ 786 ("Could not write yynxt_tbl[][]")); 787 } 788 if (yynxt_tbl) { 789 yytbl_data_destroy (yynxt_tbl); 790 yynxt_tbl = 0; 791 } 792 } 793 794 else if (!fullspd) { 795 cmptmps (); /* create compressed template entries */ 796 797 /* Create tables for all the states with only one 798 * out-transition. 799 */ 800 while (onesp > 0) { 801 mk1tbl (onestate[onesp], onesym[onesp], 802 onenext[onesp], onedef[onesp]); 803 --onesp; 804 } 805 806 mkdeftbl (); 807 } 808 809 flex_free ((void *) accset); 810 flex_free ((void *) nset); 811 } 812 813 814 /* snstods - converts a set of ndfa states into a dfa state 815 * 816 * synopsis 817 * is_new_state = snstods( int sns[numstates], int numstates, 818 * int accset[num_rules+1], int nacc, 819 * int hashval, int *newds_addr ); 820 * 821 * On return, the dfa state number is in newds. 822 */ 823 824 int snstods (sns, numstates, accset, nacc, hashval, newds_addr) 825 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 826 { 827 int didsort = 0; 828 int i, j; 829 int newds, *oldsns; 830 831 for (i = 1; i <= lastdfa; ++i) 832 if (hashval == dhash[i]) { 833 if (numstates == dfasiz[i]) { 834 oldsns = dss[i]; 835 836 if (!didsort) { 837 /* We sort the states in sns so we 838 * can compare it to oldsns quickly. 839 */ 840 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 841 didsort = 1; 842 } 843 844 for (j = 1; j <= numstates; ++j) 845 if (sns[j] != oldsns[j]) 846 break; 847 848 if (j > numstates) { 849 ++dfaeql; 850 *newds_addr = i; 851 return 0; 852 } 853 854 ++hshcol; 855 } 856 857 else 858 ++hshsave; 859 } 860 861 /* Make a new dfa. */ 862 863 if (++lastdfa >= current_max_dfas) 864 increase_max_dfas (); 865 866 newds = lastdfa; 867 868 dss[newds] = allocate_integer_array (numstates + 1); 869 870 /* If we haven't already sorted the states in sns, we do so now, 871 * so that future comparisons with it can be made quickly. 872 */ 873 874 if (!didsort) 875 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp); 876 877 for (i = 1; i <= numstates; ++i) 878 dss[newds][i] = sns[i]; 879 880 dfasiz[newds] = numstates; 881 dhash[newds] = hashval; 882 883 if (nacc == 0) { 884 if (reject) 885 dfaacc[newds].dfaacc_set = (int *) 0; 886 else 887 dfaacc[newds].dfaacc_state = 0; 888 889 accsiz[newds] = 0; 890 } 891 892 else if (reject) { 893 /* We sort the accepting set in increasing order so the 894 * disambiguating rule that the first rule listed is considered 895 * match in the event of ties will work. 896 */ 897 898 qsort (&accset [1], nacc, sizeof (accset [1]), intcmp); 899 900 dfaacc[newds].dfaacc_set = 901 allocate_integer_array (nacc + 1); 902 903 /* Save the accepting set for later */ 904 for (i = 1; i <= nacc; ++i) { 905 dfaacc[newds].dfaacc_set[i] = accset[i]; 906 907 if (accset[i] <= num_rules) 908 /* Who knows, perhaps a REJECT can yield 909 * this rule. 910 */ 911 rule_useful[accset[i]] = true; 912 } 913 914 accsiz[newds] = nacc; 915 } 916 917 else { 918 /* Find lowest numbered rule so the disambiguating rule 919 * will work. 920 */ 921 j = num_rules + 1; 922 923 for (i = 1; i <= nacc; ++i) 924 if (accset[i] < j) 925 j = accset[i]; 926 927 dfaacc[newds].dfaacc_state = j; 928 929 if (j <= num_rules) 930 rule_useful[j] = true; 931 } 932 933 *newds_addr = newds; 934 935 return 1; 936 } 937 938 939 /* symfollowset - follow the symbol transitions one step 940 * 941 * synopsis 942 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 943 * int transsym, int nset[current_max_dfa_size] ); 944 */ 945 946 int symfollowset (ds, dsize, transsym, nset) 947 int ds[], dsize, transsym, nset[]; 948 { 949 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 950 951 numstates = 0; 952 953 for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ 954 ns = ds[i]; 955 sym = transchar[ns]; 956 tsp = trans1[ns]; 957 958 if (sym < 0) { /* it's a character class */ 959 sym = -sym; 960 ccllist = cclmap[sym]; 961 lenccl = ccllen[sym]; 962 963 if (cclng[sym]) { 964 for (j = 0; j < lenccl; ++j) { 965 /* Loop through negated character 966 * class. 967 */ 968 ch = ccltbl[ccllist + j]; 969 970 if (ch == 0) 971 ch = NUL_ec; 972 973 if (ch > transsym) 974 /* Transsym isn't in negated 975 * ccl. 976 */ 977 break; 978 979 else if (ch == transsym) 980 /* next 2 */ 981 goto bottom; 982 } 983 984 /* Didn't find transsym in ccl. */ 985 nset[++numstates] = tsp; 986 } 987 988 else 989 for (j = 0; j < lenccl; ++j) { 990 ch = ccltbl[ccllist + j]; 991 992 if (ch == 0) 993 ch = NUL_ec; 994 995 if (ch > transsym) 996 break; 997 else if (ch == transsym) { 998 nset[++numstates] = tsp; 999 break; 1000 } 1001 } 1002 } 1003 1004 else if (sym == SYM_EPSILON) { /* do nothing */ 1005 } 1006 1007 else if (ABS (ecgroup[sym]) == transsym) 1008 nset[++numstates] = tsp; 1009 1010 bottom:; 1011 } 1012 1013 return numstates; 1014 } 1015 1016 1017 /* sympartition - partition characters with same out-transitions 1018 * 1019 * synopsis 1020 * sympartition( int ds[current_max_dfa_size], int numstates, 1021 * int symlist[numecs], int duplist[numecs] ); 1022 */ 1023 1024 void sympartition (ds, numstates, symlist, duplist) 1025 int ds[], numstates; 1026 int symlist[], duplist[]; 1027 { 1028 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1029 1030 /* Partitioning is done by creating equivalence classes for those 1031 * characters which have out-transitions from the given state. Thus 1032 * we are really creating equivalence classes of equivalence classes. 1033 */ 1034 1035 for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ 1036 duplist[i] = i - 1; 1037 dupfwd[i] = i + 1; 1038 } 1039 1040 duplist[1] = NIL; 1041 dupfwd[numecs] = NIL; 1042 1043 for (i = 1; i <= numstates; ++i) { 1044 ns = ds[i]; 1045 tch = transchar[ns]; 1046 1047 if (tch != SYM_EPSILON) { 1048 if (tch < -lastccl || tch >= csize) { 1049 flexfatal (_ 1050 ("bad transition character detected in sympartition()")); 1051 } 1052 1053 if (tch >= 0) { /* character transition */ 1054 int ec = ecgroup[tch]; 1055 1056 mkechar (ec, dupfwd, duplist); 1057 symlist[ec] = 1; 1058 } 1059 1060 else { /* character class */ 1061 tch = -tch; 1062 1063 lenccl = ccllen[tch]; 1064 cclp = cclmap[tch]; 1065 mkeccl (ccltbl + cclp, lenccl, dupfwd, 1066 duplist, numecs, NUL_ec); 1067 1068 if (cclng[tch]) { 1069 j = 0; 1070 1071 for (k = 0; k < lenccl; ++k) { 1072 ich = ccltbl[cclp + k]; 1073 1074 if (ich == 0) 1075 ich = NUL_ec; 1076 1077 for (++j; j < ich; ++j) 1078 symlist[j] = 1; 1079 } 1080 1081 for (++j; j <= numecs; ++j) 1082 symlist[j] = 1; 1083 } 1084 1085 else 1086 for (k = 0; k < lenccl; ++k) { 1087 ich = ccltbl[cclp + k]; 1088 1089 if (ich == 0) 1090 ich = NUL_ec; 1091 1092 symlist[ich] = 1; 1093 } 1094 } 1095 } 1096 } 1097 } 1098