1 /* dfa - DFA construction routines */ 2 3 /* Copyright (c) 1990 The Regents of the University of California. */ 4 /* All rights reserved. */ 5 6 /* This code is derived from software contributed to Berkeley by */ 7 /* Vern Paxson. */ 8 9 /* The United States Government has rights in this work pursuant */ 10 /* to contract no. DE-AC03-76SF00098 between the United States */ 11 /* Department of Energy and the University of California. */ 12 13 /* Redistribution and use in source and binary forms, with or without */ 14 /* modification, are permitted provided that the following conditions */ 15 /* are met: */ 16 17 /* 1. Redistributions of source code must retain the above copyright */ 18 /* notice, this list of conditions and the following disclaimer. */ 19 /* 2. Redistributions in binary form must reproduce the above copyright */ 20 /* notice, this list of conditions and the following disclaimer in the */ 21 /* documentation and/or other materials provided with the distribution. */ 22 23 /* Neither the name of the University nor the names of its contributors */ 24 /* may be used to endorse or promote products derived from this software */ 25 /* without specific prior written permission. */ 26 27 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 28 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 29 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 30 /* PURPOSE. */ 31 #include "flexdef.h" 32 __RCSID("$NetBSD: dfa.c,v 1.4 2018/12/23 16:27:17 christos Exp $"); 33 34 #include "tables.h" 35 36 /* declare functions that have forward references */ 37 38 void dump_associated_rules(FILE *, int); 39 void dump_transitions(FILE *, int[]); 40 void sympartition(int[], int, int[], int[]); 41 int symfollowset(int[], int, int, int[]); 42 43 44 /* check_for_backing_up - check a DFA state for backing up 45 * 46 * synopsis 47 * void check_for_backing_up( int ds, int state[numecs] ); 48 * 49 * ds is the number of the state to check and state[] is its out-transitions, 50 * indexed by equivalence class. 51 */ 52 53 void check_for_backing_up (int ds, int state[]) 54 { 55 if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) { /* state is non-accepting */ 56 ++num_backing_up; 57 58 if (backing_up_report) { 59 fprintf (backing_up_file, 60 _("State #%d is non-accepting -\n"), ds); 61 62 /* identify the state */ 63 dump_associated_rules (backing_up_file, ds); 64 65 /* Now identify it further using the out- and 66 * jam-transitions. 67 */ 68 dump_transitions (backing_up_file, state); 69 70 putc ('\n', backing_up_file); 71 } 72 } 73 } 74 75 76 /* check_trailing_context - check to see if NFA state set constitutes 77 * "dangerous" trailing context 78 * 79 * synopsis 80 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 81 * int accset[nacc+1], int nacc ); 82 * 83 * NOTES 84 * Trailing context is "dangerous" if both the head and the trailing 85 * part are of variable size \and/ there's a DFA state which contains 86 * both an accepting state for the head part of the rule and NFA states 87 * which occur after the beginning of the trailing context. 88 * 89 * When such a rule is matched, it's impossible to tell if having been 90 * in the DFA state indicates the beginning of the trailing context or 91 * further-along scanning of the pattern. In these cases, a warning 92 * message is issued. 93 * 94 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 95 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 96 */ 97 98 void check_trailing_context (int *nfa_states, int num_states, int *accset, int nacc) 99 { 100 int i, j; 101 102 for (i = 1; i <= num_states; ++i) { 103 int ns = nfa_states[i]; 104 int type = state_type[ns]; 105 int ar = assoc_rule[ns]; 106 107 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) { /* do nothing */ 108 } 109 110 else if (type == STATE_TRAILING_CONTEXT) { 111 /* Potential trouble. Scan set of accepting numbers 112 * for the one marking the end of the "head". We 113 * assume that this looping will be fairly cheap 114 * since it's rare that an accepting number set 115 * is large. 116 */ 117 for (j = 1; j <= nacc; ++j) 118 if (accset[j] & YY_TRAILING_HEAD_MASK) { 119 line_warning (_ 120 ("dangerous trailing context"), 121 rule_linenum[ar]); 122 return; 123 } 124 } 125 } 126 } 127 128 129 /* dump_associated_rules - list the rules associated with a DFA state 130 * 131 * Goes through the set of NFA states associated with the DFA and 132 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 133 * and writes a report to the given file. 134 */ 135 136 void dump_associated_rules (FILE *file, int ds) 137 { 138 int i, j; 139 int num_associated_rules = 0; 140 int rule_set[MAX_ASSOC_RULES + 1]; 141 int *dset = dss[ds]; 142 int size = dfasiz[ds]; 143 144 for (i = 1; i <= size; ++i) { 145 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 146 147 for (j = 1; j <= num_associated_rules; ++j) 148 if (rule_num == rule_set[j]) 149 break; 150 151 if (j > num_associated_rules) { /* new rule */ 152 if (num_associated_rules < MAX_ASSOC_RULES) 153 rule_set[++num_associated_rules] = 154 rule_num; 155 } 156 } 157 158 qsort (&rule_set [1], (size_t) num_associated_rules, sizeof (rule_set [1]), intcmp); 159 160 fprintf (file, _(" associated rule line numbers:")); 161 162 for (i = 1; i <= num_associated_rules; ++i) { 163 if (i % 8 == 1) 164 putc ('\n', file); 165 166 fprintf (file, "\t%d", rule_set[i]); 167 } 168 169 putc ('\n', file); 170 } 171 172 173 /* dump_transitions - list the transitions associated with a DFA state 174 * 175 * synopsis 176 * dump_transitions( FILE *file, int state[numecs] ); 177 * 178 * Goes through the set of out-transitions and lists them in human-readable 179 * form (i.e., not as equivalence classes); also lists jam transitions 180 * (i.e., all those which are not out-transitions, plus EOF). The dump 181 * is done to the given file. 182 */ 183 184 void dump_transitions (FILE *file, int state[]) 185 { 186 int i, ec; 187 int out_char_set[CSIZE]; 188 189 for (i = 0; i < csize; ++i) { 190 ec = ABS (ecgroup[i]); 191 out_char_set[i] = state[ec]; 192 } 193 194 fprintf (file, _(" out-transitions: ")); 195 196 list_character_set (file, out_char_set); 197 198 /* now invert the members of the set to get the jam transitions */ 199 for (i = 0; i < csize; ++i) 200 out_char_set[i] = !out_char_set[i]; 201 202 fprintf (file, _("\n jam-transitions: EOF ")); 203 204 list_character_set (file, out_char_set); 205 206 putc ('\n', file); 207 } 208 209 210 /* epsclosure - construct the epsilon closure of a set of ndfa states 211 * 212 * synopsis 213 * int *epsclosure( int t[num_states], int *numstates_addr, 214 * int accset[num_rules+1], int *nacc_addr, 215 * int *hashval_addr ); 216 * 217 * NOTES 218 * The epsilon closure is the set of all states reachable by an arbitrary 219 * number of epsilon transitions, which themselves do not have epsilon 220 * transitions going out, unioned with the set of states which have non-null 221 * accepting numbers. t is an array of size numstates of nfa state numbers. 222 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 223 * accset holds a list of the accepting numbers, and the size of accset is 224 * given by *nacc_addr. t may be subjected to reallocation if it is not 225 * large enough to hold the epsilon closure. 226 * 227 * hashval is the hash value for the dfa corresponding to the state set. 228 */ 229 230 int *epsclosure (int *t, int *ns_addr, int accset[], int *nacc_addr, int *hv_addr) 231 { 232 int stkpos, ns, tsp; 233 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 234 int stkend, nstate; 235 static int did_stk_init = false, *stk; 236 237 #define MARK_STATE(state) \ 238 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0) 239 240 #define IS_MARKED(state) (trans1[state] < 0) 241 242 #define UNMARK_STATE(state) \ 243 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0) 244 245 #define CHECK_ACCEPT(state) \ 246 do{ \ 247 nfaccnum = accptnum[state]; \ 248 if ( nfaccnum != NIL ) \ 249 accset[++nacc] = nfaccnum; \ 250 }while(0) 251 252 #define DO_REALLOCATION() \ 253 do { \ 254 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 255 ++num_reallocs; \ 256 t = reallocate_integer_array( t, current_max_dfa_size ); \ 257 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 258 }while(0) \ 259 260 #define PUT_ON_STACK(state) \ 261 do { \ 262 if ( ++stkend >= current_max_dfa_size ) \ 263 DO_REALLOCATION(); \ 264 stk[stkend] = state; \ 265 MARK_STATE(state); \ 266 }while(0) 267 268 #define ADD_STATE(state) \ 269 do { \ 270 if ( ++numstates >= current_max_dfa_size ) \ 271 DO_REALLOCATION(); \ 272 t[numstates] = state; \ 273 hashval += state; \ 274 }while(0) 275 276 #define STACK_STATE(state) \ 277 do { \ 278 PUT_ON_STACK(state); \ 279 CHECK_ACCEPT(state); \ 280 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 281 ADD_STATE(state); \ 282 }while(0) 283 284 285 if (!did_stk_init) { 286 stk = allocate_integer_array (current_max_dfa_size); 287 did_stk_init = true; 288 } 289 290 nacc = stkend = hashval = 0; 291 292 for (nstate = 1; nstate <= numstates; ++nstate) { 293 ns = t[nstate]; 294 295 /* The state could be marked if we've already pushed it onto 296 * the stack. 297 */ 298 if (!IS_MARKED (ns)) { 299 PUT_ON_STACK (ns); 300 CHECK_ACCEPT (ns); 301 hashval += ns; 302 } 303 } 304 305 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 306 ns = stk[stkpos]; 307 transsym = transchar[ns]; 308 309 if (transsym == SYM_EPSILON) { 310 tsp = trans1[ns] + MARKER_DIFFERENCE; 311 312 if (tsp != NO_TRANSITION) { 313 if (!IS_MARKED (tsp)) 314 STACK_STATE (tsp); 315 316 tsp = trans2[ns]; 317 318 if (tsp != NO_TRANSITION 319 && !IS_MARKED (tsp)) 320 STACK_STATE (tsp); 321 } 322 } 323 } 324 325 /* Clear out "visit" markers. */ 326 327 for (stkpos = 1; stkpos <= stkend; ++stkpos) { 328 if (IS_MARKED (stk[stkpos])) 329 UNMARK_STATE (stk[stkpos]); 330 else 331 flexfatal (_ 332 ("consistency check failed in epsclosure()")); 333 } 334 335 *ns_addr = numstates; 336 *hv_addr = hashval; 337 *nacc_addr = nacc; 338 339 return t; 340 } 341 342 343 /* increase_max_dfas - increase the maximum number of DFAs */ 344 345 void increase_max_dfas (void) 346 { 347 current_max_dfas += MAX_DFAS_INCREMENT; 348 349 ++num_reallocs; 350 351 base = reallocate_integer_array (base, current_max_dfas); 352 def = reallocate_integer_array (def, current_max_dfas); 353 dfasiz = reallocate_integer_array (dfasiz, current_max_dfas); 354 accsiz = reallocate_integer_array (accsiz, current_max_dfas); 355 dhash = reallocate_integer_array (dhash, current_max_dfas); 356 dss = reallocate_int_ptr_array (dss, current_max_dfas); 357 dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas); 358 359 if (nultrans) 360 nultrans = 361 reallocate_integer_array (nultrans, 362 current_max_dfas); 363 } 364 365 366 /* ntod - convert an ndfa to a dfa 367 * 368 * Creates the dfa corresponding to the ndfa we've constructed. The 369 * dfa starts out in state #1. 370 */ 371 372 void ntod (void) 373 { 374 int *accset, ds, nacc, newds; 375 int sym, hashval, numstates, dsize; 376 int num_full_table_rows=0; /* used only for -f */ 377 int *nset, *dset; 378 int targptr, totaltrans, i, comstate, comfreq, targ; 379 int symlist[CSIZE + 1]; 380 int num_start_states; 381 int todo_head, todo_next; 382 383 struct yytbl_data *yynxt_tbl = 0; 384 flex_int32_t *yynxt_data = 0, yynxt_curr = 0; 385 386 /* Note that the following are indexed by *equivalence classes* 387 * and not by characters. Since equivalence classes are indexed 388 * beginning with 1, even if the scanner accepts NUL's, this 389 * means that (since every character is potentially in its own 390 * equivalence class) these arrays must have room for indices 391 * from 1 to CSIZE, so their size must be CSIZE + 1. 392 */ 393 int duplist[CSIZE + 1], state[CSIZE + 1]; 394 int targfreq[CSIZE + 1] = {0}, targstate[CSIZE + 1]; 395 396 /* accset needs to be large enough to hold all of the rules present 397 * in the input, *plus* their YY_TRAILING_HEAD_MASK variants. 398 */ 399 accset = allocate_integer_array ((num_rules + 1) * 2); 400 nset = allocate_integer_array (current_max_dfa_size); 401 402 /* The "todo" queue is represented by the head, which is the DFA 403 * state currently being processed, and the "next", which is the 404 * next DFA state number available (not in use). We depend on the 405 * fact that snstods() returns DFA's \in increasing order/, and thus 406 * need only know the bounds of the dfas to be processed. 407 */ 408 todo_head = todo_next = 0; 409 410 for (i = 0; i <= csize; ++i) { 411 duplist[i] = NIL; 412 symlist[i] = false; 413 } 414 415 for (i = 0; i <= num_rules; ++i) 416 accset[i] = NIL; 417 418 if (trace) { 419 dumpnfa (scset[1]); 420 fputs (_("\n\nDFA Dump:\n\n"), stderr); 421 } 422 423 inittbl (); 424 425 /* Check to see whether we should build a separate table for 426 * transitions on NUL characters. We don't do this for full-speed 427 * (-F) scanners, since for them we don't have a simple state 428 * number lying around with which to index the table. We also 429 * don't bother doing it for scanners unless (1) NUL is in its own 430 * equivalence class (indicated by a positive value of 431 * ecgroup[NUL]), (2) NUL's equivalence class is the last 432 * equivalence class, and (3) the number of equivalence classes is 433 * the same as the number of characters. This latter case comes 434 * about when useecs is false or when it's true but every character 435 * still manages to land in its own class (unlikely, but it's 436 * cheap to check for). If all these things are true then the 437 * character code needed to represent NUL's equivalence class for 438 * indexing the tables is going to take one more bit than the 439 * number of characters, and therefore we won't be assured of 440 * being able to fit it into a YY_CHAR variable. This rules out 441 * storing the transitions in a compressed table, since the code 442 * for interpreting them uses a YY_CHAR variable (perhaps it 443 * should just use an integer, though; this is worth pondering ... 444 * ###). 445 * 446 * Finally, for full tables, we want the number of entries in the 447 * table to be a power of two so the array references go fast (it 448 * will just take a shift to compute the major index). If 449 * encoding NUL's transitions in the table will spoil this, we 450 * give it its own table (note that this will be the case if we're 451 * not using equivalence classes). 452 */ 453 454 /* Note that the test for ecgroup[0] == numecs below accomplishes 455 * both (1) and (2) above 456 */ 457 if (!fullspd && ecgroup[0] == numecs) { 458 /* NUL is alone in its equivalence class, which is the 459 * last one. 460 */ 461 int use_NUL_table = (numecs == csize); 462 463 if (fulltbl && !use_NUL_table) { 464 /* We still may want to use the table if numecs 465 * is a power of 2. 466 */ 467 if (numecs <= csize && is_power_of_2(numecs)) { 468 use_NUL_table = true; 469 } 470 } 471 472 if (use_NUL_table) 473 nultrans = 474 allocate_integer_array (current_max_dfas); 475 476 /* From now on, nultrans != nil indicates that we're 477 * saving null transitions for later, separate encoding. 478 */ 479 } 480 481 482 if (fullspd) { 483 for (i = 0; i <= numecs; ++i) 484 state[i] = 0; 485 486 place_state (state, 0, 0); 487 dfaacc[0].dfaacc_state = 0; 488 } 489 490 else if (fulltbl) { 491 if (nultrans) 492 /* We won't be including NUL's transitions in the 493 * table, so build it for entries from 0 .. numecs - 1. 494 */ 495 num_full_table_rows = numecs; 496 497 else 498 /* Take into account the fact that we'll be including 499 * the NUL entries in the transition table. Build it 500 * from 0 .. numecs. 501 */ 502 num_full_table_rows = numecs + 1; 503 504 /* Begin generating yy_nxt[][] 505 * This spans the entire LONG function. 506 * This table is tricky because we don't know how big it will be. 507 * So we'll have to realloc() on the way... 508 * we'll wait until we can calculate yynxt_tbl->td_hilen. 509 */ 510 yynxt_tbl = calloc(1, sizeof (struct yytbl_data)); 511 512 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT); 513 yynxt_tbl->td_hilen = 1; 514 yynxt_tbl->td_lolen = (flex_uint32_t) num_full_table_rows; 515 yynxt_tbl->td_data = yynxt_data = 516 calloc(yynxt_tbl->td_lolen * 517 yynxt_tbl->td_hilen, 518 sizeof (flex_int32_t)); 519 yynxt_curr = 0; 520 521 buf_prints (&yydmap_buf, 522 "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n", 523 long_align ? "flex_int32_t" : "flex_int16_t"); 524 525 /* Unless -Ca, declare it "short" because it's a real 526 * long-shot that that won't be large enough. 527 */ 528 if (gentables) 529 out_str_dec 530 ("static const %s yy_nxt[][%d] =\n {\n", 531 long_align ? "flex_int32_t" : "flex_int16_t", 532 num_full_table_rows); 533 else { 534 out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows); 535 out_str ("static const %s *yy_nxt =0;\n", 536 long_align ? "flex_int32_t" : "flex_int16_t"); 537 } 538 539 540 if (gentables) 541 outn (" {"); 542 543 /* Generate 0 entries for state #0. */ 544 for (i = 0; i < num_full_table_rows; ++i) { 545 mk2data (0); 546 yynxt_data[yynxt_curr++] = 0; 547 } 548 549 dataflush (); 550 if (gentables) 551 outn (" },\n"); 552 } 553 554 /* Create the first states. */ 555 556 num_start_states = lastsc * 2; 557 558 for (i = 1; i <= num_start_states; ++i) { 559 numstates = 1; 560 561 /* For each start condition, make one state for the case when 562 * we're at the beginning of the line (the '^' operator) and 563 * one for the case when we're not. 564 */ 565 if (i % 2 == 1) 566 nset[numstates] = scset[(i / 2) + 1]; 567 else 568 nset[numstates] = 569 mkbranch (scbol[i / 2], scset[i / 2]); 570 571 nset = epsclosure (nset, &numstates, accset, &nacc, 572 &hashval); 573 574 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) { 575 numas += nacc; 576 totnst += numstates; 577 ++todo_next; 578 579 if (variable_trailing_context_rules && nacc > 0) 580 check_trailing_context (nset, numstates, 581 accset, nacc); 582 } 583 } 584 585 if (!fullspd) { 586 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state)) 587 flexfatal (_ 588 ("could not create unique end-of-buffer state")); 589 590 ++numas; 591 ++num_start_states; 592 ++todo_next; 593 } 594 595 596 while (todo_head < todo_next) { 597 targptr = 0; 598 totaltrans = 0; 599 600 for (i = 1; i <= numecs; ++i) 601 state[i] = 0; 602 603 ds = ++todo_head; 604 605 dset = dss[ds]; 606 dsize = dfasiz[ds]; 607 608 if (trace) 609 fprintf (stderr, _("state # %d:\n"), ds); 610 611 sympartition (dset, dsize, symlist, duplist); 612 613 for (sym = 1; sym <= numecs; ++sym) { 614 if (symlist[sym]) { 615 symlist[sym] = 0; 616 617 if (duplist[sym] == NIL) { 618 /* Symbol has unique out-transitions. */ 619 numstates = 620 symfollowset (dset, dsize, 621 sym, nset); 622 nset = epsclosure (nset, 623 &numstates, 624 accset, &nacc, 625 &hashval); 626 627 if (snstods 628 (nset, numstates, accset, nacc, 629 hashval, &newds)) { 630 totnst = totnst + 631 numstates; 632 ++todo_next; 633 numas += nacc; 634 635 if (variable_trailing_context_rules && nacc > 0) 636 check_trailing_context 637 (nset, 638 numstates, 639 accset, 640 nacc); 641 } 642 643 state[sym] = newds; 644 645 if (trace) 646 fprintf (stderr, 647 "\t%d\t%d\n", sym, 648 newds); 649 650 targfreq[++targptr] = 1; 651 targstate[targptr] = newds; 652 ++numuniq; 653 } 654 655 else { 656 /* sym's equivalence class has the same 657 * transitions as duplist(sym)'s 658 * equivalence class. 659 */ 660 targ = state[duplist[sym]]; 661 state[sym] = targ; 662 663 if (trace) 664 fprintf (stderr, 665 "\t%d\t%d\n", sym, 666 targ); 667 668 /* Update frequency count for 669 * destination state. 670 */ 671 672 i = 0; 673 while (targstate[++i] != targ) ; 674 675 ++targfreq[i]; 676 ++numdup; 677 } 678 679 ++totaltrans; 680 duplist[sym] = NIL; 681 } 682 } 683 684 685 numsnpairs += totaltrans; 686 687 if (ds > num_start_states) 688 check_for_backing_up (ds, state); 689 690 if (nultrans) { 691 nultrans[ds] = state[NUL_ec]; 692 state[NUL_ec] = 0; /* remove transition */ 693 } 694 695 if (fulltbl) { 696 697 /* Each time we hit here, it's another td_hilen, so we realloc. */ 698 yynxt_tbl->td_hilen++; 699 yynxt_tbl->td_data = yynxt_data = 700 realloc (yynxt_data, 701 yynxt_tbl->td_hilen * 702 yynxt_tbl->td_lolen * 703 sizeof (flex_int32_t)); 704 705 706 if (gentables) 707 outn (" {"); 708 709 /* Supply array's 0-element. */ 710 if (ds == end_of_buffer_state) { 711 mk2data (-end_of_buffer_state); 712 yynxt_data[yynxt_curr++] = 713 -end_of_buffer_state; 714 } 715 else { 716 mk2data (end_of_buffer_state); 717 yynxt_data[yynxt_curr++] = 718 end_of_buffer_state; 719 } 720 721 for (i = 1; i < num_full_table_rows; ++i) { 722 /* Jams are marked by negative of state 723 * number. 724 */ 725 mk2data (state[i] ? state[i] : -ds); 726 yynxt_data[yynxt_curr++] = 727 state[i] ? state[i] : -ds; 728 } 729 730 dataflush (); 731 if (gentables) 732 outn (" },\n"); 733 } 734 735 else if (fullspd) 736 place_state (state, ds, totaltrans); 737 738 else if (ds == end_of_buffer_state) 739 /* Special case this state to make sure it does what 740 * it's supposed to, i.e., jam on end-of-buffer. 741 */ 742 stack1 (ds, 0, 0, JAMSTATE); 743 744 else { /* normal, compressed state */ 745 746 /* Determine which destination state is the most 747 * common, and how many transitions to it there are. 748 */ 749 750 comfreq = 0; 751 comstate = 0; 752 753 for (i = 1; i <= targptr; ++i) 754 if (targfreq[i] > comfreq) { 755 comfreq = targfreq[i]; 756 comstate = targstate[i]; 757 } 758 759 bldtbl (state, ds, totaltrans, comstate, comfreq); 760 } 761 } 762 763 if (fulltbl) { 764 dataend (); 765 if (tablesext) { 766 yytbl_data_compress (yynxt_tbl); 767 if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0) 768 flexerror (_ 769 ("Could not write yynxt_tbl[][]")); 770 } 771 if (yynxt_tbl) { 772 yytbl_data_destroy (yynxt_tbl); 773 yynxt_tbl = 0; 774 } 775 } 776 777 else if (!fullspd) { 778 cmptmps (); /* create compressed template entries */ 779 780 /* Create tables for all the states with only one 781 * out-transition. 782 */ 783 while (onesp > 0) { 784 mk1tbl (onestate[onesp], onesym[onesp], 785 onenext[onesp], onedef[onesp]); 786 --onesp; 787 } 788 789 mkdeftbl (); 790 } 791 792 free(accset); 793 free(nset); 794 } 795 796 797 /* snstods - converts a set of ndfa states into a dfa state 798 * 799 * synopsis 800 * is_new_state = snstods( int sns[numstates], int numstates, 801 * int accset[num_rules+1], int nacc, 802 * int hashval, int *newds_addr ); 803 * 804 * On return, the dfa state number is in newds. 805 */ 806 807 int snstods (int sns[], int numstates, int accset[], int nacc, int hashval, int *newds_addr) 808 { 809 int didsort = 0; 810 int i, j; 811 int newds, *oldsns; 812 813 for (i = 1; i <= lastdfa; ++i) 814 if (hashval == dhash[i]) { 815 if (numstates == dfasiz[i]) { 816 oldsns = dss[i]; 817 818 if (!didsort) { 819 /* We sort the states in sns so we 820 * can compare it to oldsns quickly. 821 */ 822 qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); 823 didsort = 1; 824 } 825 826 for (j = 1; j <= numstates; ++j) 827 if (sns[j] != oldsns[j]) 828 break; 829 830 if (j > numstates) { 831 ++dfaeql; 832 *newds_addr = i; 833 return 0; 834 } 835 836 ++hshcol; 837 } 838 839 else 840 ++hshsave; 841 } 842 843 /* Make a new dfa. */ 844 845 if (++lastdfa >= current_max_dfas) 846 increase_max_dfas (); 847 848 newds = lastdfa; 849 850 dss[newds] = allocate_integer_array (numstates + 1); 851 852 /* If we haven't already sorted the states in sns, we do so now, 853 * so that future comparisons with it can be made quickly. 854 */ 855 856 if (!didsort) 857 qsort (&sns [1], (size_t) numstates, sizeof (sns [1]), intcmp); 858 859 for (i = 1; i <= numstates; ++i) 860 dss[newds][i] = sns[i]; 861 862 dfasiz[newds] = numstates; 863 dhash[newds] = hashval; 864 865 if (nacc == 0) { 866 if (reject) 867 dfaacc[newds].dfaacc_set = NULL; 868 else 869 dfaacc[newds].dfaacc_state = 0; 870 871 accsiz[newds] = 0; 872 } 873 874 else if (reject) { 875 /* We sort the accepting set in increasing order so the 876 * disambiguating rule that the first rule listed is considered 877 * match in the event of ties will work. 878 */ 879 880 qsort (&accset [1], (size_t) nacc, sizeof (accset [1]), intcmp); 881 882 dfaacc[newds].dfaacc_set = 883 allocate_integer_array (nacc + 1); 884 885 /* Save the accepting set for later */ 886 for (i = 1; i <= nacc; ++i) { 887 dfaacc[newds].dfaacc_set[i] = accset[i]; 888 889 if (accset[i] <= num_rules) 890 /* Who knows, perhaps a REJECT can yield 891 * this rule. 892 */ 893 rule_useful[accset[i]] = true; 894 } 895 896 accsiz[newds] = nacc; 897 } 898 899 else { 900 /* Find lowest numbered rule so the disambiguating rule 901 * will work. 902 */ 903 j = num_rules + 1; 904 905 for (i = 1; i <= nacc; ++i) 906 if (accset[i] < j) 907 j = accset[i]; 908 909 dfaacc[newds].dfaacc_state = j; 910 911 if (j <= num_rules) 912 rule_useful[j] = true; 913 } 914 915 *newds_addr = newds; 916 917 return 1; 918 } 919 920 921 /* symfollowset - follow the symbol transitions one step 922 * 923 * synopsis 924 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 925 * int transsym, int nset[current_max_dfa_size] ); 926 */ 927 928 int symfollowset (int ds[], int dsize, int transsym, int nset[]) 929 { 930 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 931 932 numstates = 0; 933 934 for (i = 1; i <= dsize; ++i) { /* for each nfa state ns in the state set of ds */ 935 ns = ds[i]; 936 sym = transchar[ns]; 937 tsp = trans1[ns]; 938 939 if (sym < 0) { /* it's a character class */ 940 sym = -sym; 941 ccllist = cclmap[sym]; 942 lenccl = ccllen[sym]; 943 944 if (cclng[sym]) { 945 for (j = 0; j < lenccl; ++j) { 946 /* Loop through negated character 947 * class. 948 */ 949 ch = ccltbl[ccllist + j]; 950 951 if (ch == 0) 952 ch = NUL_ec; 953 954 if (ch > transsym) 955 /* Transsym isn't in negated 956 * ccl. 957 */ 958 break; 959 960 else if (ch == transsym) 961 /* next 2 */ 962 goto bottom; 963 } 964 965 /* Didn't find transsym in ccl. */ 966 nset[++numstates] = tsp; 967 } 968 969 else 970 for (j = 0; j < lenccl; ++j) { 971 ch = ccltbl[ccllist + j]; 972 973 if (ch == 0) 974 ch = NUL_ec; 975 976 if (ch > transsym) 977 break; 978 else if (ch == transsym) { 979 nset[++numstates] = tsp; 980 break; 981 } 982 } 983 } 984 985 else if (sym == SYM_EPSILON) { /* do nothing */ 986 } 987 988 else if (ABS (ecgroup[sym]) == transsym) 989 nset[++numstates] = tsp; 990 991 bottom:; 992 } 993 994 return numstates; 995 } 996 997 998 /* sympartition - partition characters with same out-transitions 999 * 1000 * synopsis 1001 * sympartition( int ds[current_max_dfa_size], int numstates, 1002 * int symlist[numecs], int duplist[numecs] ); 1003 */ 1004 1005 void sympartition (int ds[], int numstates, int symlist[], int duplist[]) 1006 { 1007 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1008 1009 /* Partitioning is done by creating equivalence classes for those 1010 * characters which have out-transitions from the given state. Thus 1011 * we are really creating equivalence classes of equivalence classes. 1012 */ 1013 1014 for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */ 1015 duplist[i] = i - 1; 1016 dupfwd[i] = i + 1; 1017 } 1018 1019 duplist[1] = NIL; 1020 dupfwd[numecs] = NIL; 1021 1022 for (i = 1; i <= numstates; ++i) { 1023 ns = ds[i]; 1024 tch = transchar[ns]; 1025 1026 if (tch != SYM_EPSILON) { 1027 if (tch < -lastccl || tch >= csize) { 1028 flexfatal (_ 1029 ("bad transition character detected in sympartition()")); 1030 } 1031 1032 if (tch >= 0) { /* character transition */ 1033 int ec = ecgroup[tch]; 1034 1035 mkechar (ec, dupfwd, duplist); 1036 symlist[ec] = 1; 1037 } 1038 1039 else { /* character class */ 1040 tch = -tch; 1041 1042 lenccl = ccllen[tch]; 1043 cclp = cclmap[tch]; 1044 mkeccl (ccltbl + cclp, lenccl, dupfwd, 1045 duplist, numecs, NUL_ec); 1046 1047 if (cclng[tch]) { 1048 j = 0; 1049 1050 for (k = 0; k < lenccl; ++k) { 1051 ich = ccltbl[cclp + k]; 1052 1053 if (ich == 0) 1054 ich = NUL_ec; 1055 1056 for (++j; j < ich; ++j) 1057 symlist[j] = 1; 1058 } 1059 1060 for (++j; j <= numecs; ++j) 1061 symlist[j] = 1; 1062 } 1063 1064 else 1065 for (k = 0; k < lenccl; ++k) { 1066 ich = ccltbl[cclp + k]; 1067 1068 if (ich == 0) 1069 ich = NUL_ec; 1070 1071 symlist[ich] = 1; 1072 } 1073 } 1074 } 1075 } 1076 } 1077