1 /* dfa - DFA construction routines */ 2 3 /* Copyright (c) 1990 The Regents of the University of California. */ 4 /* All rights reserved. */ 5 6 /* This code is derived from software contributed to Berkeley by */ 7 /* Vern Paxson. */ 8 9 /* The United States Government has rights in this work pursuant */ 10 /* to contract no. DE-AC03-76SF00098 between the United States */ 11 /* Department of Energy and the University of California. */ 12 13 /* Redistribution and use in source and binary forms, with or without */ 14 /* modification, are permitted provided that the following conditions */ 15 /* are met: */ 16 17 /* 1. Redistributions of source code must retain the above copyright */ 18 /* notice, this list of conditions and the following disclaimer. */ 19 /* 2. Redistributions in binary form must reproduce the above copyright */ 20 /* notice, this list of conditions and the following disclaimer in the */ 21 /* documentation and/or other materials provided with the distribution. */ 22 23 /* Neither the name of the University nor the names of its contributors */ 24 /* may be used to endorse or promote products derived from this software */ 25 /* without specific prior written permission. */ 26 27 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 28 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 29 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 30 /* PURPOSE. */ 31 #include "flexdef.h" 32 __RCSID("$NetBSD: dfa.c,v 1.3 2017/01/02 17:45:27 christos Exp $"); 33 34 #include "tables.h" 35 36 /* declare functions that have forward references */ 37 38 void dump_associated_rules(FILE *, int); 39 void dump_transitions(FILE *, int[]); 40 void sympartition(int[], int, int[], int[]); 41 int symfollowset(int[], int, int, int[]); 42 43 44 /* check_for_backing_up - check a DFA state for backing up 45 * 46 * synopsis 47 * void check_for_backing_up( int ds, int state[numecs] ); 48 * 49 * ds is the number of the state to check and state[] is its out-transitions, 50 * indexed by equivalence class. 51 */ 52 53 void check_for_backing_up (int ds, int state[]) 54 { 55 if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ 56 ++num_backing_up; 57 58 if (backing_up_report) { 59 fprintf (backing_up_file, 60 _("State #%d is non-accepting -\n"), ds); 61 62 /* identify the state */ 63 dump_associated_rules (backing_up_file, ds); 64 65 /* Now identify it further using the out- and 66 * jam-transitions. 67 */ 68 dump_transitions (backing_up_file, state); 69 70 putc ('\n', backing_up_file); 71 } 72 } 73 } 74 75 76 /* check_trailing_context - check to see if NFA state set constitutes 77 * "dangerous" trailing context 78 * 79 * synopsis 80 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 81 * int accset[nacc+1], int nacc ); 82 * 83 * NOTES 84 * Trailing context is "dangerous" if both the head and the trailing 85 * part are of variable size \and/ there's a DFA state which contains 86 * both an accepting state for the head part of the rule and NFA states 87 * which occur after the beginning of the trailing context. 88 * 89 * When such a rule is matched, it's impossible to tell if having been 90 * in the DFA state indicates the beginning of the trailing context or 91 * further-along scanning of the pattern. In these cases, a warning 92 * message is issued. 93 * 94 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 95 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 96 */ 97 98 void check_trailing_context (int *nfa_states, int num_states, int *accset, int nacc) 99 { 100 int i, j; 101 102 for (i = 1; i <= num_states; ++i) { 103 int ns = nfa_states[i]; 104 int type = state_type[ns]; 105 int ar = assoc_rule[ns]; 106 107 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ 108 } 109 110 else if (type == STATE_TRAILING_CONTEXT) { 111 /* Potential trouble. Scan set of accepting numbers 112 * for the one marking the end of the "head". We 113 * assume that this looping will be fairly cheap 114 * since it's rare that an accepting number set 115 * is large. 116 */ 117 for (j = 1; j <= nacc; ++j) 118 if (accset[j] & YY_TRAILING_HEAD_MASK) { 119 line_warning (_ 120 ("dangerous trailing context"), 121 rule_linenum[ar]); 122 return; 123 } 124 } 125 } 126 } 127 128 129 /* dump_associated_rules - list the rules associated with a DFA state 130 * 131 * Goes through the set of NFA states associated with the DFA and 132 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 133 * and writes a report to the given file. 134 */ 135 136 void dump_associated_rules (FILE *file, int ds) 137 { 138 int i, j; 139 int num_associated_rules = 0; 140 int rule_set[MAX_ASSOC_RULES + 1]; 141 int *dset = dss[ds]; 142 int size = dfasiz[ds]; 143 144 for (i = 1; i <= size; ++i) { 145 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 146 147 for (j = 1; j <= num_associated_rules; ++j) 148 if (rule_num == rule_set[j]) 149 break; 150 151 if (j > num_associated_rules) { /* new rule */ 152 if (num_associated_rules < MAX_ASSOC_RULES) 153 rule_set[++num_associated_rules] = 154 rule_num; 155 } 156 } 157 158 qsort (&rule_set [1], (size_t) num_associated_rules, sizeof (rule_set [1]), intcmp); 159 160 fprintf (file, _(" associated rule line numbers:")); 161 162 for (i = 1; i <= num_associated_rules; ++i) { 163 if (i % 8 == 1) 164 putc ('\n', file); 165 166 fprintf (file, "\t%d", rule_set[i]); 167 } 168 169 putc ('\n', file); 170 } 171 172 173 /* dump_transitions - list the transitions associated with a DFA state 174 * 175 * synopsis 176 * dump_transitions( FILE *file, int state[numecs] ); 177 * 178 * Goes through the set of out-transitions and lists them in human-readable 179 * form (i.e., not as equivalence classes); also lists jam transitions 180 * (i.e., all those which are not out-transitions, plus EOF). The dump 181 * is done to the given file. 182 */ 183 184 void dump_transitions (FILE *file, int state[]) 185 { 186 int i, ec; 187 int out_char_set[CSIZE]; 188 189 for (i = 0; i < csize; ++i) { 190 ec = ABS (ecgroup[i]); 191 out_char_set[i] = state[ec]; 192 } 193 194 fprintf (file, _(" out-transitions: ")); 195 196 list_character_set (file, out_char_set); 197 198 /* now invert the members of the set to get the jam transitions */ 199 for (i = 0; i < csize; ++i) 200 out_char_set[i] = !out_char_set[i]; 201 202 fprintf (file, _("\n jam-transitions: EOF ")); 203 204 list_character_set (file, out_char_set); 205 206 putc ('\n', file); 207 } 208 209 210 /* epsclosure - construct the epsilon closure of a set of ndfa states 211 * 212 * synopsis 213 * int *epsclosure( int t[num_states], int *numstates_addr, 214 * int accset[num_rules+1], int *nacc_addr, 215 * int *hashval_addr ); 216 * 217 * NOTES 218 * The epsilon closure is the set of all states reachable by an arbitrary 219 * number of epsilon transitions, which themselves do not have epsilon 220 * transitions going out, unioned with the set of states which have non-null 221 * accepting numbers. t is an array of size numstates of nfa state numbers. 222 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 223 * accset holds a list of the accepting numbers, and the size of accset is 224 * given by *nacc_addr. t may be subjected to reallocation if it is not 225 * large enough to hold the epsilon closure. 226 * 227 * hashval is the hash value for the dfa corresponding to the state set. 228 */ 229 230 int *epsclosure (int *t, int *ns_addr, int accset[], int *nacc_addr, int *hv_addr) 231 { 232 int stkpos, ns, tsp; 233 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 234 int stkend, nstate; 235 static int did_stk_init = false, *stk; 236 237 #define MARK_STATE(state) \ 238 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) 239 240 #define IS_MARKED(state) (trans1[state] < 0) 241 242 #define UNMARK_STATE(state) \ 243 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) 244 245 #define CHECK_ACCEPT(state) \ 246 do{ \ 247 nfaccnum = accptnum[state]; \ 248 if ( nfaccnum != NIL ) \ 249 accset[++nacc] = nfaccnum; \ 250 }while(0) 251 252 #define DO_REALLOCATION() \ 253 do { \ 254 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 255 ++num_reallocs; \ 256 t = reallocate_integer_array( t, current_max_dfa_size ); \ 257 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 258 }while(0) \ 259 260 #define PUT_ON_STACK(state) \ 261 do { \ 262 if ( ++stkend >= current_max_dfa_size ) \ 263 DO_REALLOCATION(); \ 264 stk[stkend] = state; \ 265 MARK_STATE(state); \ 266 }while(0) 267 268 #define ADD_STATE(state) \ 269 do { \ 270 if ( ++numstates >= current_max_dfa_size ) \ 271 DO_REALLOCATION(); \ 272 t[numstates] = state; \ 273 hashval += state; \ 274 }while(0) 275 276 #define STACK_STATE(state) \ 277 do { \ 278 PUT_ON_STACK(state); \ 279 CHECK_ACCEPT(state); \ 280 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 281 ADD_STATE(state); \ 282 }while(0) 283 284 285 if (!did_stk_init) { 286 stk = allocate_integer_array (current_max_dfa_size); 287 did_stk_init = true; 288 } 289 290 nacc = stkend = hashval = 0; 291 292 for (nstate = 1; nstate <= numstates; ++nstate) { 293 ns = t[nstate]; 294 295 /* The state could be marked if we've already pushed it onto 296 * the stack. 297 */ 298 if (!IS_MARKED (ns)) { 299 PUT_ON_STACK (ns); 300 CHECK_ACCEPT (ns); 301 hashval += ns; 302 } 303 } 304 305 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 306 ns = stk[stkpos]; 307 transsym = transchar[ns]; 308 309 if (transsym == SYM_EPSILON) { 310 tsp = trans1[ns] + MARKER_DIFFERENCE; 311 312 if (tsp != NO_TRANSITION) { 313 if (!IS_MARKED (tsp)) 314 STACK_STATE (tsp); 315 316 tsp = trans2[ns]; 317 318 if (tsp != NO_TRANSITION 319 && !IS_MARKED (tsp)) 320 STACK_STATE (tsp); 321 } 322 } 323 } 324 325 /* Clear out "visit" markers. */ 326 327 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 328 if (IS_MARKED (stk[stkpos])) 329 UNMARK_STATE (stk[stkpos]); 330 else 331 flexfatal (_ 332 ("consistency check failed in epsclosure()")); 333 } 334 335 *ns_addr = numstates; 336 *hv_addr = hashval; 337 *nacc_addr = nacc; 338 339 return t; 340 } 341 342 343 /* increase_max_dfas - increase the maximum number of DFAs */ 344 345 void increase_max_dfas (void) 346 { 347 current_max_dfas += MAX_DFAS_INCREMENT; 348 349 ++num_reallocs; 350 351 base = reallocate_integer_array (base, current_max_dfas); 352 def = reallocate_integer_array (def, current_max_dfas); 353 dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); 354 accsiz = reallocate_integer_array (accsiz, current_max_dfas); 355 dhash = reallocate_integer_array (dhash, current_max_dfas); 356 dss = reallocate_int_ptr_array (dss, current_max_dfas); 357 dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas); 358 359 if (nultrans) 360 nultrans = 361 reallocate_integer_array (nultrans, 362 current_max_dfas); 363 } 364 365 366 /* ntod - convert an ndfa to a dfa 367 * 368 * Creates the dfa corresponding to the ndfa we've constructed. The 369 * dfa starts out in state #1. 370 */ 371 372 void ntod (void) 373 { 374 int *accset, ds, nacc, newds; 375 int sym, hashval, numstates, dsize; 376 int num_full_table_rows=0; /* used only for -f */ 377 int *nset, *dset; 378 int targptr, totaltrans, i, comstate, comfreq, targ; 379 int symlist[CSIZE + 1]; 380 int num_start_states; 381 int todo_head, todo_next; 382 383 struct yytbl_data *yynxt_tbl = 0; 384 flex_int32_t *yynxt_data = 0, yynxt_curr = 0; 385 386 /* Note that the following are indexed by *equivalence classes* 387 * and not by characters. Since equivalence classes are indexed 388 * beginning with 1, even if the scanner accepts NUL's, this 389 * means that (since every character is potentially in its own 390 * equivalence class) these arrays must have room for indices 391 * from 1 to CSIZE, so their size must be CSIZE + 1. 392 */ 393 int duplist[CSIZE + 1], state[CSIZE + 1]; 394 int targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1]; 395 396 /* accset needs to be large enough to hold all of the rules present 397 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. 398 */ 399 accset = allocate_integer_array ((num_rules + 1) * 2); 400 nset = allocate_integer_array (current_max_dfa_size); 401 402 /* The "todo" queue is represented by the head, which is the DFA 403 * state currently being processed, and the "next", which is the 404 * next DFA state number available (not in use). We depend on the 405 * fact that snstods() returns DFA's \in increasing order/, and thus 406 * need only know the bounds of the dfas to be processed. 407 */ 408 todo_head = todo_next = 0; 409 410 for (i = 0; i <= csize; ++i) { 411 duplist[i] = NIL; 412 symlist[i] = false; 413 } 414 415 for (i = 0; i <= num_rules; ++i) 416 accset[i] = NIL; 417 418 if (trace) { 419 dumpnfa (scset[1]); 420 fputs (_("\n\nDFA Dump:\n\n"), stderr); 421 } 422 423 inittbl (); 424 425 /* Check to see whether we should build a separate table for 426 * transitions on NUL characters. We don't do this for full-speed 427 * (-F) scanners, since for them we don't have a simple state 428 * number lying around with which to index the table. We also 429 * don't bother doing it for scanners unless (1) NUL is in its own 430 * equivalence class (indicated by a positive value of 431 * ecgroup[NUL]), (2) NUL's equivalence class is the last 432 * equivalence class, and (3) the number of equivalence classes is 433 * the same as the number of characters. This latter case comes 434 * about when useecs is false or when it's true but every character 435 * still manages to land in its own class (unlikely, but it's 436 * cheap to check for). If all these things are true then the 437 * character code needed to represent NUL's equivalence class for 438 * indexing the tables is going to take one more bit than the 439 * number of characters, and therefore we won't be assured of 440 * being able to fit it into a YY_CHAR variable. This rules out 441 * storing the transitions in a compressed table, since the code 442 * for interpreting them uses a YY_CHAR variable (perhaps it 443 * should just use an integer, though; this is worth pondering ... 444 * ###). 445 * 446 * Finally, for full tables, we want the number of entries in the 447 * table to be a power of two so the array references go fast (it 448 * will just take a shift to compute the major index). If 449 * encoding NUL's transitions in the table will spoil this, we 450 * give it its own table (note that this will be the case if we're 451 * not using equivalence classes). 452 */ 453 454 /* Note that the test for ecgroup[0] == numecs below accomplishes 455 * both (1) and (2) above 456 */ 457 if (!fullspd && ecgroup[0] == numecs) { 458 /* NUL is alone in its equivalence class, which is the 459 * last one. 460 */ 461 int use_NUL_table = (numecs == csize); 462 463 if (fulltbl && !use_NUL_table) { 464 /* We still may want to use the table if numecs 465 * is a power of 2. 466 */ 467 int power_of_two; 468 469 for (power_of_two = 1; power_of_two <= csize; 470 power_of_two *= 2) 471 if (numecs == power_of_two) { 472 use_NUL_table = true; 473 break; 474 } 475 } 476 477 if (use_NUL_table) 478 nultrans = 479 allocate_integer_array (current_max_dfas); 480 481 /* From now on, nultrans != nil indicates that we're 482 * saving null transitions for later, separate encoding. 483 */ 484 } 485 486 487 if (fullspd) { 488 for (i = 0; i <= numecs; ++i) 489 state[i] = 0; 490 491 place_state (state, 0, 0); 492 dfaacc[0].dfaacc_state = 0; 493 } 494 495 else if (fulltbl) { 496 if (nultrans) 497 /* We won't be including NUL's transitions in the 498 * table, so build it for entries from 0 .. numecs - 1. 499 */ 500 num_full_table_rows = numecs; 501 502 else 503 /* Take into account the fact that we'll be including 504 * the NUL entries in the transition table. Build it 505 * from 0 .. numecs. 506 */ 507 num_full_table_rows = numecs + 1; 508 509 /* Begin generating yy_nxt[][] 510 * This spans the entire LONG function. 511 * This table is tricky because we don't know how big it will be. 512 * So we'll have to realloc() on the way... 513 * we'll wait until we can calculate yynxt_tbl->td_hilen. 514 */ 515 yynxt_tbl = calloc(1, sizeof (struct yytbl_data)); 516 517 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); 518 yynxt_tbl->td_hilen = 1; 519 yynxt_tbl->td_lolen = (flex_uint32_t) num_full_table_rows; 520 yynxt_tbl->td_data = yynxt_data = 521 calloc(yynxt_tbl->td_lolen * 522 yynxt_tbl->td_hilen, 523 sizeof (flex_int32_t)); 524 yynxt_curr = 0; 525 526 buf_prints (&yydmap_buf, 527 "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", 528 long_align ? "flex_int32_t" : "flex_int16_t"); 529 530 /* Unless -Ca, declare it "short" because it's a real 531 * long-shot that that won't be large enough. 532 */ 533 if (gentables) 534 out_str_dec 535 ("static const %s yy_nxt[][%d] =\n {\n", 536 long_align ? "flex_int32_t" : "flex_int16_t", 537 num_full_table_rows); 538 else { 539 out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); 540 out_str ("static const %s *yy_nxt =0;\n", 541 long_align ? "flex_int32_t" : "flex_int16_t"); 542 } 543 544 545 if (gentables) 546 outn (" {"); 547 548 /* Generate 0 entries for state #0. */ 549 for (i = 0; i < num_full_table_rows; ++i) { 550 mk2data (0); 551 yynxt_data[yynxt_curr++] = 0; 552 } 553 554 dataflush (); 555 if (gentables) 556 outn (" },\n"); 557 } 558 559 /* Create the first states. */ 560 561 num_start_states = lastsc * 2; 562 563 for (i = 1; i <= num_start_states; ++i) { 564 numstates = 1; 565 566 /* For each start condition, make one state for the case when 567 * we're at the beginning of the line (the '^' operator) and 568 * one for the case when we're not. 569 */ 570 if (i % 2 == 1) 571 nset[numstates] = scset[(i / 2) + 1]; 572 else 573 nset[numstates] = 574 mkbranch (scbol[i / 2], scset[i / 2]); 575 576 nset = epsclosure (nset, &numstates, accset, &nacc, 577 &hashval); 578 579 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { 580 numas += nacc; 581 totnst += numstates; 582 ++todo_next; 583 584 if (variable_trailing_context_rules && nacc > 0) 585 check_trailing_context (nset, numstates, 586 accset, nacc); 587 } 588 } 589 590 if (!fullspd) { 591 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) 592 flexfatal (_ 593 ("could not create unique end-of-buffer state")); 594 595 ++numas; 596 ++num_start_states; 597 ++todo_next; 598 } 599 600 601 while (todo_head < todo_next) { 602 targptr = 0; 603 totaltrans = 0; 604 605 for (i = 1; i <= numecs; ++i) 606 state[i] = 0; 607 608 ds = ++todo_head; 609 610 dset = dss[ds]; 611 dsize = dfasiz[ds]; 612 613 if (trace) 614 fprintf (stderr, _("state # %d:\n"), ds); 615 616 sympartition (dset, dsize, symlist, duplist); 617 618 for (sym = 1; sym <= numecs; ++sym) { 619 if (symlist[sym]) { 620 symlist[sym] = 0; 621 622 if (duplist[sym] == NIL) { 623 /* Symbol has unique out-transitions. */ 624 numstates = 625 symfollowset (dset, dsize, 626 sym, nset); 627 nset = epsclosure (nset, 628 &numstates, 629 accset, &nacc, 630 &hashval); 631 632 if (snstods 633 (nset, numstates, accset, nacc, 634 hashval, &newds)) { 635 totnst = totnst + 636 numstates; 637 ++todo_next; 638 numas += nacc; 639 640 if (variable_trailing_context_rules && nacc > 0) 641 check_trailing_context 642 (nset, 643 numstates, 644 accset, 645 nacc); 646 } 647 648 state[sym] = newds; 649 650 if (trace) 651 fprintf (stderr, 652 "\t%d\t%d\n", sym, 653 newds); 654 655 targfreq[++targptr] = 1; 656 targstate[targptr] = newds; 657 ++numuniq; 658 } 659 660 else { 661 /* sym's equivalence class has the same 662 * transitions as duplist(sym)'s 663 * equivalence class. 664 */ 665 targ = state[duplist[sym]]; 666 state[sym] = targ; 667 668 if (trace) 669 fprintf (stderr, 670 "\t%d\t%d\n", sym, 671 targ); 672 673 /* Update frequency count for 674 * destination state. 675 */ 676 677 i = 0; 678 while (targstate[++i] != targ) ; 679 680 ++targfreq[i]; 681 ++numdup; 682 } 683 684 ++totaltrans; 685 duplist[sym] = NIL; 686 } 687 } 688 689 690 numsnpairs += totaltrans; 691 692 if (ds > num_start_states) 693 check_for_backing_up (ds, state); 694 695 if (nultrans) { 696 nultrans[ds] = state[NUL_ec]; 697 state[NUL_ec] = 0; /* remove transition */ 698 } 699 700 if (fulltbl) { 701 702 /* Each time we hit here, it's another td_hilen, so we realloc. */ 703 yynxt_tbl->td_hilen++; 704 yynxt_tbl->td_data = yynxt_data = 705 realloc (yynxt_data, 706 yynxt_tbl->td_hilen * 707 yynxt_tbl->td_lolen * 708 sizeof (flex_int32_t)); 709 710 711 if (gentables) 712 outn (" {"); 713 714 /* Supply array's 0-element. */ 715 if (ds == end_of_buffer_state) { 716 mk2data (-end_of_buffer_state); 717 yynxt_data[yynxt_curr++] = 718 -end_of_buffer_state; 719 } 720 else { 721 mk2data (end_of_buffer_state); 722 yynxt_data[yynxt_curr++] = 723 end_of_buffer_state; 724 } 725 726 for (i = 1; i < num_full_table_rows; ++i) { 727 /* Jams are marked by negative of state 728 * number. 729 */ 730 mk2data (state[i] ? state[i] : -ds); 731 yynxt_data[yynxt_curr++] = 732 state[i] ? state[i] : -ds; 733 } 734 735 dataflush (); 736 if (gentables) 737 outn (" },\n"); 738 } 739 740 else if (fullspd) 741 place_state (state, ds, totaltrans); 742 743 else if (ds == end_of_buffer_state) 744 /* Special case this state to make sure it does what 745 * it's supposed to, i.e., jam on end-of-buffer. 746 */ 747 stack1 (ds, 0, 0, JAMSTATE); 748 749 else { /* normal, compressed state */ 750 751 /* Determine which destination state is the most 752 * common, and how many transitions to it there are. 753 */ 754 755 comfreq = 0; 756 comstate = 0; 757 758 for (i = 1; i <= targptr; ++i) 759 if (targfreq[i] > comfreq) { 760 comfreq = targfreq[i]; 761 comstate = targstate[i]; 762 } 763 764 bldtbl (state, ds, totaltrans, comstate, comfreq); 765 } 766 } 767 768 if (fulltbl) { 769 dataend (); 770 if (tablesext) { 771 yytbl_data_compress (yynxt_tbl); 772 if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) 773 flexerror (_ 774 ("Could not write yynxt_tbl[][]")); 775 } 776 if (yynxt_tbl) { 777 yytbl_data_destroy (yynxt_tbl); 778 yynxt_tbl = 0; 779 } 780 } 781 782 else if (!fullspd) { 783 cmptmps (); /* create compressed template entries */ 784 785 /* Create tables for all the states with only one 786 * out-transition. 787 */ 788 while (onesp > 0) { 789 mk1tbl (onestate[onesp], onesym[onesp], 790 onenext[onesp], onedef[onesp]); 791 --onesp; 792 } 793 794 mkdeftbl (); 795 } 796 797 free(accset); 798 free(nset); 799 } 800 801 802 /* snstods - converts a set of ndfa states into a dfa state 803 * 804 * synopsis 805 * is_new_state = snstods( int sns[numstates], int numstates, 806 * int accset[num_rules+1], int nacc, 807 * int hashval, int *newds_addr ); 808 * 809 * On return, the dfa state number is in newds. 810 */ 811 812 int snstods (int sns[], int numstates, int accset[], int nacc, int hashval, int *newds_addr) 813 { 814 int didsort = 0; 815 int i, j; 816 int newds, *oldsns; 817 818 for (i = 1; i <= lastdfa; ++i) 819 if (hashval == dhash[i]) { 820 if (numstates == dfasiz[i]) { 821 oldsns = dss[i]; 822 823 if (!didsort) { 824 /* We sort the states in sns so we 825 * can compare it to oldsns quickly. 826 */ 827 qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); 828 didsort = 1; 829 } 830 831 for (j = 1; j <= numstates; ++j) 832 if (sns[j] != oldsns[j]) 833 break; 834 835 if (j > numstates) { 836 ++dfaeql; 837 *newds_addr = i; 838 return 0; 839 } 840 841 ++hshcol; 842 } 843 844 else 845 ++hshsave; 846 } 847 848 /* Make a new dfa. */ 849 850 if (++lastdfa >= current_max_dfas) 851 increase_max_dfas (); 852 853 newds = lastdfa; 854 855 dss[newds] = allocate_integer_array (numstates + 1); 856 857 /* If we haven't already sorted the states in sns, we do so now, 858 * so that future comparisons with it can be made quickly. 859 */ 860 861 if (!didsort) 862 qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); 863 864 for (i = 1; i <= numstates; ++i) 865 dss[newds][i] = sns[i]; 866 867 dfasiz[newds] = numstates; 868 dhash[newds] = hashval; 869 870 if (nacc == 0) { 871 if (reject) 872 dfaacc[newds].dfaacc_set = NULL; 873 else 874 dfaacc[newds].dfaacc_state = 0; 875 876 accsiz[newds] = 0; 877 } 878 879 else if (reject) { 880 /* We sort the accepting set in increasing order so the 881 * disambiguating rule that the first rule listed is considered 882 * match in the event of ties will work. 883 */ 884 885 qsort (&accset [1], (size_t) nacc, sizeof (accset [1]), intcmp); 886 887 dfaacc[newds].dfaacc_set = 888 allocate_integer_array (nacc + 1); 889 890 /* Save the accepting set for later */ 891 for (i = 1; i <= nacc; ++i) { 892 dfaacc[newds].dfaacc_set[i] = accset[i]; 893 894 if (accset[i] <= num_rules) 895 /* Who knows, perhaps a REJECT can yield 896 * this rule. 897 */ 898 rule_useful[accset[i]] = true; 899 } 900 901 accsiz[newds] = nacc; 902 } 903 904 else { 905 /* Find lowest numbered rule so the disambiguating rule 906 * will work. 907 */ 908 j = num_rules + 1; 909 910 for (i = 1; i <= nacc; ++i) 911 if (accset[i] < j) 912 j = accset[i]; 913 914 dfaacc[newds].dfaacc_state = j; 915 916 if (j <= num_rules) 917 rule_useful[j] = true; 918 } 919 920 *newds_addr = newds; 921 922 return 1; 923 } 924 925 926 /* symfollowset - follow the symbol transitions one step 927 * 928 * synopsis 929 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 930 * int transsym, int nset[current_max_dfa_size] ); 931 */ 932 933 int symfollowset (int ds[], int dsize, int transsym, int nset[]) 934 { 935 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 936 937 numstates = 0; 938 939 for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ 940 ns = ds[i]; 941 sym = transchar[ns]; 942 tsp = trans1[ns]; 943 944 if (sym < 0) { /* it's a character class */ 945 sym = -sym; 946 ccllist = cclmap[sym]; 947 lenccl = ccllen[sym]; 948 949 if (cclng[sym]) { 950 for (j = 0; j < lenccl; ++j) { 951 /* Loop through negated character 952 * class. 953 */ 954 ch = ccltbl[ccllist + j]; 955 956 if (ch == 0) 957 ch = NUL_ec; 958 959 if (ch > transsym) 960 /* Transsym isn't in negated 961 * ccl. 962 */ 963 break; 964 965 else if (ch == transsym) 966 /* next 2 */ 967 goto bottom; 968 } 969 970 /* Didn't find transsym in ccl. */ 971 nset[++numstates] = tsp; 972 } 973 974 else 975 for (j = 0; j < lenccl; ++j) { 976 ch = ccltbl[ccllist + j]; 977 978 if (ch == 0) 979 ch = NUL_ec; 980 981 if (ch > transsym) 982 break; 983 else if (ch == transsym) { 984 nset[++numstates] = tsp; 985 break; 986 } 987 } 988 } 989 990 else if (sym == SYM_EPSILON) { /* do nothing */ 991 } 992 993 else if (ABS (ecgroup[sym]) == transsym) 994 nset[++numstates] = tsp; 995 996 bottom:; 997 } 998 999 return numstates; 1000 } 1001 1002 1003 /* sympartition - partition characters with same out-transitions 1004 * 1005 * synopsis 1006 * sympartition( int ds[current_max_dfa_size], int numstates, 1007 * int symlist[numecs], int duplist[numecs] ); 1008 */ 1009 1010 void sympartition (int ds[], int numstates, int symlist[], int duplist[]) 1011 { 1012 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1013 1014 /* Partitioning is done by creating equivalence classes for those 1015 * characters which have out-transitions from the given state. Thus 1016 * we are really creating equivalence classes of equivalence classes. 1017 */ 1018 1019 for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ 1020 duplist[i] = i - 1; 1021 dupfwd[i] = i + 1; 1022 } 1023 1024 duplist[1] = NIL; 1025 dupfwd[numecs] = NIL; 1026 1027 for (i = 1; i <= numstates; ++i) { 1028 ns = ds[i]; 1029 tch = transchar[ns]; 1030 1031 if (tch != SYM_EPSILON) { 1032 if (tch < -lastccl || tch >= csize) { 1033 flexfatal (_ 1034 ("bad transition character detected in sympartition()")); 1035 } 1036 1037 if (tch >= 0) { /* character transition */ 1038 int ec = ecgroup[tch]; 1039 1040 mkechar (ec, dupfwd, duplist); 1041 symlist[ec] = 1; 1042 } 1043 1044 else { /* character class */ 1045 tch = -tch; 1046 1047 lenccl = ccllen[tch]; 1048 cclp = cclmap[tch]; 1049 mkeccl (ccltbl + cclp, lenccl, dupfwd, 1050 duplist, numecs, NUL_ec); 1051 1052 if (cclng[tch]) { 1053 j = 0; 1054 1055 for (k = 0; k < lenccl; ++k) { 1056 ich = ccltbl[cclp + k]; 1057 1058 if (ich == 0) 1059 ich = NUL_ec; 1060 1061 for (++j; j < ich; ++j) 1062 symlist[j] = 1; 1063 } 1064 1065 for (++j; j <= numecs; ++j) 1066 symlist[j] = 1; 1067 } 1068 1069 else 1070 for (k = 0; k < lenccl; ++k) { 1071 ich = ccltbl[cclp + k]; 1072 1073 if (ich == 0) 1074 ich = NUL_ec; 1075 1076 symlist[ich] = 1; 1077 } 1078 } 1079 } 1080 } 1081 } 1082