1 /* 2 tre-match-approx.c - TRE approximate regex matching engine 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 #ifdef HAVE_CONFIG_H 9 #include <config.h> 10 #endif /* HAVE_CONFIG_H */ 11 12 #include "tre-alloca.h" 13 14 #define __USE_STRING_INLINES 15 #undef __NO_INLINE__ 16 17 #include <assert.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <limits.h> 21 #ifdef HAVE_WCHAR_H 22 #include <wchar.h> 23 #endif /* HAVE_WCHAR_H */ 24 #ifdef HAVE_WCTYPE_H 25 #include <wctype.h> 26 #endif /* HAVE_WCTYPE_H */ 27 #ifndef TRE_WCHAR 28 #include <ctype.h> 29 #endif /* !TRE_WCHAR */ 30 #ifdef HAVE_MALLOC_H 31 #include <malloc.h> 32 #endif /* HAVE_MALLOC_H */ 33 #include <stdint.h> 34 35 #include "tre-internal.h" 36 #include "tre-match-utils.h" 37 #include "tre.h" 38 #include "xmalloc.h" 39 40 #define TRE_M_COST 0 41 #define TRE_M_NUM_INS 1 42 #define TRE_M_NUM_DEL 2 43 #define TRE_M_NUM_SUBST 3 44 #define TRE_M_NUM_ERR 4 45 #define TRE_M_LAST 5 46 47 #define TRE_M_MAX_DEPTH 3 48 49 typedef struct { 50 /* State in the TNFA transition table. */ 51 tre_tnfa_transition_t *state; 52 /* Position in input string. */ 53 int pos; 54 /* Tag values. */ 55 int *tags; 56 /* Matching parameters. */ 57 regaparams_t params; 58 /* Nesting depth of parameters. This is used as an index in 59 the `costs' array. */ 60 int depth; 61 /* Costs and counter values for different parameter nesting depths. */ 62 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST]; 63 } tre_tnfa_approx_reach_t; 64 65 66 #ifdef TRE_DEBUG 67 /* Prints the `reach' array in a readable fashion with DPRINT. */ 68 static void 69 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach, 70 int pos, size_t num_tags) 71 { 72 int id; 73 74 /* Print each state on one line. */ 75 DPRINT((" reach:\n")); 76 for (id = 0; id < tnfa->num_states; id++) 77 { 78 int i, j; 79 if (reach[id].pos < pos) 80 continue; /* Not reached. */ 81 DPRINT((" %03d, costs ", id)); 82 for (i = 0; i <= reach[id].depth; i++) 83 { 84 DPRINT(("[")); 85 for (j = 0; j < TRE_M_LAST; j++) 86 { 87 DPRINT(("%2d", reach[id].costs[i][j])); 88 if (j + 1 < TRE_M_LAST) 89 DPRINT((",")); 90 } 91 DPRINT(("]")); 92 if (i + 1 <= reach[id].depth) 93 DPRINT((", ")); 94 } 95 DPRINT(("\n tags ")); 96 for (i = 0; i < num_tags; i++) 97 { 98 DPRINT(("%02d", reach[id].tags[i])); 99 if (i + 1 < num_tags) 100 DPRINT((",")); 101 } 102 DPRINT(("\n")); 103 } 104 DPRINT(("\n")); 105 } 106 #endif /* TRE_DEBUG */ 107 108 109 /* Sets the matching parameters in `reach' to the ones defined in the `pa' 110 array. If `pa' specifies default values, they are taken from 111 `default_params'. */ 112 inline static void 113 tre_set_params(tre_tnfa_approx_reach_t *reach, 114 int *pa, regaparams_t default_params) 115 { 116 int value; 117 118 /* If depth is increased reset costs and counters to zero for the 119 new levels. */ 120 value = pa[TRE_PARAM_DEPTH]; 121 assert(value <= TRE_M_MAX_DEPTH); 122 if (value > reach->depth) 123 { 124 int i, j; 125 for (i = reach->depth + 1; i <= value; i++) 126 for (j = 0; j < TRE_M_LAST; j++) 127 reach->costs[i][j] = 0; 128 } 129 reach->depth = value; 130 131 /* Set insert cost. */ 132 value = pa[TRE_PARAM_COST_INS]; 133 if (value == TRE_PARAM_DEFAULT) 134 reach->params.cost_ins = default_params.cost_ins; 135 else if (value != TRE_PARAM_UNSET) 136 reach->params.cost_ins = value; 137 138 /* Set delete cost. */ 139 value = pa[TRE_PARAM_COST_DEL]; 140 if (value == TRE_PARAM_DEFAULT) 141 reach->params.cost_del = default_params.cost_del; 142 else if (value != TRE_PARAM_UNSET) 143 reach->params.cost_del = value; 144 145 /* Set substitute cost. */ 146 value = pa[TRE_PARAM_COST_SUBST]; 147 if (value == TRE_PARAM_DEFAULT) 148 reach->params.cost_subst = default_params.cost_subst; 149 else 150 reach->params.cost_subst = value; 151 152 /* Set maximum cost. */ 153 value = pa[TRE_PARAM_COST_MAX]; 154 if (value == TRE_PARAM_DEFAULT) 155 reach->params.max_cost = default_params.max_cost; 156 else if (value != TRE_PARAM_UNSET) 157 reach->params.max_cost = value; 158 159 /* Set maximum inserts. */ 160 value = pa[TRE_PARAM_MAX_INS]; 161 if (value == TRE_PARAM_DEFAULT) 162 reach->params.max_ins = default_params.max_ins; 163 else if (value != TRE_PARAM_UNSET) 164 reach->params.max_ins = value; 165 166 /* Set maximum deletes. */ 167 value = pa[TRE_PARAM_MAX_DEL]; 168 if (value == TRE_PARAM_DEFAULT) 169 reach->params.max_del = default_params.max_del; 170 else if (value != TRE_PARAM_UNSET) 171 reach->params.max_del = value; 172 173 /* Set maximum substitutes. */ 174 value = pa[TRE_PARAM_MAX_SUBST]; 175 if (value == TRE_PARAM_DEFAULT) 176 reach->params.max_subst = default_params.max_subst; 177 else if (value != TRE_PARAM_UNSET) 178 reach->params.max_subst = value; 179 180 /* Set maximum number of errors. */ 181 value = pa[TRE_PARAM_MAX_ERR]; 182 if (value == TRE_PARAM_DEFAULT) 183 reach->params.max_err = default_params.max_err; 184 else if (value != TRE_PARAM_UNSET) 185 reach->params.max_err = value; 186 } 187 188 reg_errcode_t 189 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len, 190 tre_str_type_t type, int *match_tags, 191 regamatch_t *match, regaparams_t default_params, 192 int eflags, int *match_end_ofs) 193 { 194 /* State variables required by GET_NEXT_WCHAR. */ 195 tre_char_t prev_c = 0, next_c = 0; 196 const char *str_byte = string; 197 int pos = -1; 198 unsigned int pos_add_next = 1; 199 #ifdef TRE_WCHAR 200 const wchar_t *str_wide = string; 201 #ifdef TRE_MBSTATE 202 mbstate_t mbstate; 203 #endif /* !TRE_WCHAR */ 204 #endif /* TRE_WCHAR */ 205 int reg_notbol = eflags & REG_NOTBOL; 206 int reg_noteol = eflags & REG_NOTEOL; 207 int reg_newline = tnfa->cflags & REG_NEWLINE; 208 size_t str_user_end = 0; 209 210 int prev_pos; 211 212 /* Number of tags. */ 213 size_t num_tags; 214 /* The reach tables. */ 215 tre_tnfa_approx_reach_t *reach, *reach_next; 216 /* Tag array for temporary use. */ 217 int *tmp_tags; 218 219 /* End offset of best match so far, or -1 if no match found yet. */ 220 int match_eo = -1; 221 /* Costs of the match. */ 222 int match_costs[TRE_M_LAST]; 223 224 /* Space for temporary data required for matching. */ 225 unsigned char *buf; 226 227 size_t i, id; 228 229 reg_errcode_t ret; 230 231 if (!match_tags) 232 num_tags = 0; 233 else 234 num_tags = tnfa->num_tags; 235 236 #ifdef TRE_MBSTATE 237 memset(&mbstate, '\0', sizeof(mbstate)); 238 #endif /* TRE_MBSTATE */ 239 240 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, " 241 "match_tags %p\n", 242 type, len, eflags, 243 match_tags)); 244 DPRINT(("max cost %d, ins %d, del %d, subst %d\n", 245 default_params.max_cost, 246 default_params.cost_ins, 247 default_params.cost_del, 248 default_params.cost_subst)); 249 250 /* Allocate memory for temporary data required for matching. This needs to 251 be done for every matching operation to be thread safe. This allocates 252 everything in a single large block from the stack frame using alloca() 253 or with malloc() if alloca is unavailable. */ 254 { 255 unsigned char *buf_cursor; 256 257 /* Ensure that tag_bytes*num_states cannot overflow, and that it don't 258 * contribute more than 1/8 of SIZE_MAX to total_bytes. */ 259 if (num_tags > SIZE_MAX/(8 * sizeof(*tmp_tags) * tnfa->num_states)) 260 return REG_ESPACE; 261 262 /* Likewise check reach_bytes. */ 263 if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_next))) 264 return REG_ESPACE; 265 266 /* Space needed for one array of tags. */ 267 size_t tag_bytes = sizeof(*tmp_tags) * num_tags; 268 /* Space needed for one reach table. */ 269 size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states; 270 /* Total space needed. */ 271 size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes; 272 /* Add some extra to make sure we can align the pointers. The multiplier 273 used here must be equal to the number of ALIGN calls below. */ 274 total_bytes += (sizeof(long) - 1) * 3; 275 276 /* Allocate the memory. */ 277 #ifdef TRE_USE_ALLOCA 278 buf = alloca(total_bytes); 279 #else /* !TRE_USE_ALLOCA */ 280 buf = xmalloc((unsigned)total_bytes); 281 #endif /* !TRE_USE_ALLOCA */ 282 if (!buf) 283 return REG_ESPACE; 284 memset(buf, 0, (size_t)total_bytes); 285 286 /* Allocate `tmp_tags' from `buf'. */ 287 tmp_tags = (void *)buf; 288 buf_cursor = buf + tag_bytes; 289 buf_cursor += ALIGN(buf_cursor, long); 290 291 /* Allocate `reach' from `buf'. */ 292 reach = (void *)buf_cursor; 293 buf_cursor += reach_bytes; 294 buf_cursor += ALIGN(buf_cursor, long); 295 296 /* Allocate `reach_next' from `buf'. */ 297 reach_next = (void *)buf_cursor; 298 buf_cursor += reach_bytes; 299 buf_cursor += ALIGN(buf_cursor, long); 300 301 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */ 302 for (i = 0; i < tnfa->num_states; i++) 303 { 304 reach[i].tags = (void *)buf_cursor; 305 buf_cursor += tag_bytes; 306 reach_next[i].tags = (void *)buf_cursor; 307 buf_cursor += tag_bytes; 308 } 309 assert(buf_cursor <= buf + total_bytes); 310 } 311 312 for (i = 0; i < TRE_M_LAST; i++) 313 match_costs[i] = INT_MAX; 314 315 /* Mark the reach arrays empty. */ 316 for (i = 0; i < tnfa->num_states; i++) 317 reach[i].pos = reach_next[i].pos = -2; 318 319 prev_pos = pos; 320 GET_NEXT_WCHAR(); 321 pos = 0; 322 323 while (/*CONSTCOND*/(void)1,1) 324 { 325 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c)); 326 327 /* Add initial states to `reach_next' if an exact match has not yet 328 been found. */ 329 if (match_costs[TRE_M_COST] > 0) 330 { 331 tre_tnfa_transition_t *trans; 332 DPRINT((" init")); 333 for (trans = tnfa->initial; trans->state; trans++) 334 { 335 int stateid = trans->state_id; 336 337 /* If this state is not currently in `reach_next', add it 338 there. */ 339 if (reach_next[stateid].pos < pos) 340 { 341 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 342 { 343 /* Assertions failed, don't add this state. */ 344 DPRINT((" !%d (assert)", stateid)); 345 continue; 346 } 347 DPRINT((" %d", stateid)); 348 reach_next[stateid].state = trans->state; 349 reach_next[stateid].pos = pos; 350 351 /* Compute tag values after this transition. */ 352 for (i = 0; i < num_tags; i++) 353 reach_next[stateid].tags[i] = -1; 354 355 if (trans->tags) 356 for (i = 0; trans->tags[i] >= 0; i++) 357 if ((size_t)trans->tags[i] < num_tags) 358 reach_next[stateid].tags[trans->tags[i]] = pos; 359 360 /* Set the parameters, depth, and costs. */ 361 reach_next[stateid].params = default_params; 362 reach_next[stateid].depth = 0; 363 for (i = 0; i < TRE_M_LAST; i++) 364 reach_next[stateid].costs[0][i] = 0; 365 if (trans->params) 366 tre_set_params(&reach_next[stateid], trans->params, 367 default_params); 368 369 /* If this is the final state, mark the exact match. */ 370 if (trans->state == tnfa->final) 371 { 372 match_eo = pos; 373 for (i = 0; i < num_tags; i++) 374 match_tags[i] = reach_next[stateid].tags[i]; 375 for (i = 0; i < TRE_M_LAST; i++) 376 match_costs[i] = 0; 377 } 378 } 379 } 380 DPRINT(("\n")); 381 } 382 383 384 /* Handle inserts. This is done by pretending there's an epsilon 385 transition from each state in `reach' back to the same state. 386 We don't need to worry about the final state here; this will never 387 give a better match than what we already have. */ 388 for (id = 0; id < tnfa->num_states; id++) 389 { 390 int depth; 391 int cost, cost0; 392 393 if (reach[id].pos != prev_pos) 394 { 395 DPRINT((" insert: %d not reached\n", id)); 396 continue; /* Not reached. */ 397 } 398 399 depth = reach[id].depth; 400 401 /* Compute and check cost at current depth. */ 402 cost = reach[id].costs[depth][TRE_M_COST]; 403 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 404 cost += reach[id].params.cost_ins; 405 if (cost > reach[id].params.max_cost) 406 continue; /* Cost too large. */ 407 408 /* Check number of inserts at current depth. */ 409 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1 410 > reach[id].params.max_ins) 411 continue; /* Too many inserts. */ 412 413 /* Check total number of errors at current depth. */ 414 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 415 > reach[id].params.max_err) 416 continue; /* Too many errors. */ 417 418 /* Compute overall cost. */ 419 cost0 = cost; 420 if (depth > 0) 421 { 422 cost0 = reach[id].costs[0][TRE_M_COST]; 423 if (reach[id].params.cost_ins != TRE_PARAM_UNSET) 424 cost0 += reach[id].params.cost_ins; 425 else 426 cost0 += default_params.cost_ins; 427 } 428 429 DPRINT((" insert: from %d to %d, cost %d: ", id, id, 430 reach[id].costs[depth][TRE_M_COST])); 431 if (reach_next[id].pos == pos 432 && (cost0 >= reach_next[id].costs[0][TRE_M_COST])) 433 { 434 DPRINT(("lose\n")); 435 continue; 436 } 437 DPRINT(("win\n")); 438 439 /* Copy state, position, tags, parameters, and depth. */ 440 reach_next[id].state = reach[id].state; 441 reach_next[id].pos = pos; 442 for (i = 0; i < num_tags; i++) 443 reach_next[id].tags[i] = reach[id].tags[i]; 444 reach_next[id].params = reach[id].params; 445 reach_next[id].depth = reach[id].depth; 446 447 /* Set the costs after this transition. */ 448 memcpy(reach_next[id].costs, reach[id].costs, 449 sizeof(reach_next[id].costs[0][0]) 450 * TRE_M_LAST * (depth + 1)); 451 reach_next[id].costs[depth][TRE_M_COST] = cost; 452 reach_next[id].costs[depth][TRE_M_NUM_INS]++; 453 reach_next[id].costs[depth][TRE_M_NUM_ERR]++; 454 if (depth > 0) 455 { 456 reach_next[id].costs[0][TRE_M_COST] = cost0; 457 reach_next[id].costs[0][TRE_M_NUM_INS]++; 458 reach_next[id].costs[0][TRE_M_NUM_ERR]++; 459 } 460 461 } 462 463 464 /* Handle deletes. This is done by traversing through the whole TNFA 465 pretending that all transitions are epsilon transitions, until 466 no more states can be reached with better costs. */ 467 { 468 /* XXX - dynamic ringbuffer size */ 469 tre_tnfa_approx_reach_t *ringbuffer[512]; 470 tre_tnfa_approx_reach_t **deque_start, **deque_end; 471 472 deque_start = deque_end = ringbuffer; 473 474 /* Add all states in `reach_next' to the deque. */ 475 for (id = 0; id < tnfa->num_states; id++) 476 { 477 if (reach_next[id].pos != pos) 478 continue; 479 *deque_end = &reach_next[id]; 480 deque_end++; 481 assert(deque_end != deque_start); 482 } 483 484 /* Repeat until the deque is empty. */ 485 while (deque_end != deque_start) 486 { 487 tre_tnfa_approx_reach_t *reach_p; 488 int depth; 489 int cost, cost0; 490 tre_tnfa_transition_t *trans; 491 492 /* Pop the first item off the deque. */ 493 reach_p = *deque_start; 494 id = reach_p - reach_next; 495 depth = reach_p->depth; 496 497 /* Compute cost at current depth. */ 498 cost = reach_p->costs[depth][TRE_M_COST]; 499 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 500 cost += reach_p->params.cost_del; 501 502 /* Check cost, number of deletes, and total number of errors 503 at current depth. */ 504 if (cost > reach_p->params.max_cost 505 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1 506 > reach_p->params.max_del) 507 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1 508 > reach_p->params.max_err)) 509 { 510 /* Too many errors or cost too large. */ 511 DPRINT((" delete: from %03d: cost too large\n", id)); 512 deque_start++; 513 if (deque_start >= (ringbuffer + 512)) 514 deque_start = ringbuffer; 515 continue; 516 } 517 518 /* Compute overall cost. */ 519 cost0 = cost; 520 if (depth > 0) 521 { 522 cost0 = reach_p->costs[0][TRE_M_COST]; 523 if (reach_p->params.cost_del != TRE_PARAM_UNSET) 524 cost0 += reach_p->params.cost_del; 525 else 526 cost0 += default_params.cost_del; 527 } 528 529 for (trans = reach_p->state; trans->state; trans++) 530 { 531 int dest_id = trans->state_id; 532 DPRINT((" delete: from %03d to %03d, cost %d (%d): ", 533 id, dest_id, cost0, reach_p->params.max_cost)); 534 535 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions)) 536 { 537 DPRINT(("assertion failed\n")); 538 continue; 539 } 540 541 /* Compute tag values after this transition. */ 542 for (i = 0; i < num_tags; i++) 543 tmp_tags[i] = reach_p->tags[i]; 544 if (trans->tags) 545 for (i = 0; trans->tags[i] >= 0; i++) 546 if ((size_t)trans->tags[i] < num_tags) 547 tmp_tags[trans->tags[i]] = pos; 548 549 /* If another path has also reached this state, choose the one 550 with the smallest cost or best tags if costs are equal. */ 551 if (reach_next[dest_id].pos == pos 552 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 553 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 554 && (!match_tags 555 || !tre_tag_order(num_tags, 556 tnfa->tag_directions, 557 tmp_tags, 558 reach_next[dest_id].tags))))) 559 { 560 DPRINT(("lose, cost0 %d, have %d\n", 561 cost0, reach_next[dest_id].costs[0][TRE_M_COST])); 562 continue; 563 } 564 DPRINT(("win\n")); 565 566 /* Set state, position, tags, parameters, depth, and costs. */ 567 reach_next[dest_id].state = trans->state; 568 reach_next[dest_id].pos = pos; 569 for (i = 0; i < num_tags; i++) 570 reach_next[dest_id].tags[i] = tmp_tags[i]; 571 572 reach_next[dest_id].params = reach_p->params; 573 if (trans->params) 574 tre_set_params(&reach_next[dest_id], trans->params, 575 default_params); 576 577 reach_next[dest_id].depth = reach_p->depth; 578 memcpy(&reach_next[dest_id].costs, 579 reach_p->costs, 580 sizeof(reach_p->costs[0][0]) 581 * TRE_M_LAST * (depth + 1)); 582 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 583 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++; 584 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++; 585 if (depth > 0) 586 { 587 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 588 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++; 589 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++; 590 } 591 592 if (trans->state == tnfa->final 593 && (match_eo < 0 594 || match_costs[TRE_M_COST] > cost0 595 || (match_costs[TRE_M_COST] == cost0 596 && (num_tags > 0 597 && tmp_tags[0] <= match_tags[0])))) 598 { 599 DPRINT((" setting new match at %d, cost %d\n", 600 pos, cost0)); 601 match_eo = pos; 602 memcpy(match_costs, reach_next[dest_id].costs[0], 603 sizeof(match_costs[0]) * TRE_M_LAST); 604 for (i = 0; i < num_tags; i++) 605 match_tags[i] = tmp_tags[i]; 606 } 607 608 /* Add to the end of the deque. */ 609 *deque_end = &reach_next[dest_id]; 610 deque_end++; 611 if (deque_end >= (ringbuffer + 512)) 612 deque_end = ringbuffer; 613 assert(deque_end != deque_start); 614 } 615 deque_start++; 616 if (deque_start >= (ringbuffer + 512)) 617 deque_start = ringbuffer; 618 } 619 620 } 621 622 #ifdef TRE_DEBUG 623 tre_print_reach(tnfa, reach_next, pos, num_tags); 624 #endif /* TRE_DEBUG */ 625 626 /* Check for end of string. */ 627 if (len < 0) 628 { 629 if (type == STR_USER) 630 { 631 if (str_user_end) 632 break; 633 } 634 else if (next_c == L'\0') 635 break; 636 } 637 else 638 { 639 if (pos >= len) 640 break; 641 } 642 643 prev_pos = pos; 644 GET_NEXT_WCHAR(); 645 646 /* Swap `reach' and `reach_next'. */ 647 { 648 tre_tnfa_approx_reach_t *tmp; 649 tmp = reach; 650 reach = reach_next; 651 reach_next = tmp; 652 } 653 654 /* Handle exact matches and substitutions. */ 655 for (id = 0; id < tnfa->num_states; id++) 656 { 657 tre_tnfa_transition_t *trans; 658 659 if (reach[id].pos < prev_pos) 660 continue; /* Not reached. */ 661 for (trans = reach[id].state; trans->state; trans++) 662 { 663 int dest_id; 664 int depth; 665 int cost, cost0, err; 666 667 if (trans->assertions 668 && (CHECK_ASSERTIONS(trans->assertions) 669 || CHECK_CHAR_CLASSES(trans, tnfa, eflags))) 670 { 671 DPRINT((" exact, from %d: assert failed\n", id)); 672 continue; 673 } 674 675 depth = reach[id].depth; 676 dest_id = trans->state_id; 677 678 cost = reach[id].costs[depth][TRE_M_COST]; 679 cost0 = reach[id].costs[0][TRE_M_COST]; 680 err = 0; 681 682 if (trans->code_min > (tre_cint_t)prev_c 683 || trans->code_max < (tre_cint_t)prev_c) 684 { 685 /* Handle substitutes. The required character was not in 686 the string, so match it in place of whatever was supposed 687 to be there and increase costs accordingly. */ 688 err = 1; 689 690 /* Compute and check cost at current depth. */ 691 cost = reach[id].costs[depth][TRE_M_COST]; 692 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 693 cost += reach[id].params.cost_subst; 694 if (cost > reach[id].params.max_cost) 695 continue; /* Cost too large. */ 696 697 /* Check number of substitutes at current depth. */ 698 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1 699 > reach[id].params.max_subst) 700 continue; /* Too many substitutes. */ 701 702 /* Check total number of errors at current depth. */ 703 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1 704 > reach[id].params.max_err) 705 continue; /* Too many errors. */ 706 707 /* Compute overall cost. */ 708 cost0 = cost; 709 if (depth > 0) 710 { 711 cost0 = reach[id].costs[0][TRE_M_COST]; 712 if (reach[id].params.cost_subst != TRE_PARAM_UNSET) 713 cost0 += reach[id].params.cost_subst; 714 else 715 cost0 += default_params.cost_subst; 716 } 717 DPRINT((" subst, from %03d to %03d, cost %d: ", 718 id, dest_id, cost0)); 719 } 720 else 721 DPRINT((" exact, from %03d to %03d, cost %d: ", 722 id, dest_id, cost0)); 723 724 /* Compute tag values after this transition. */ 725 for (i = 0; i < num_tags; i++) 726 tmp_tags[i] = reach[id].tags[i]; 727 if (trans->tags) 728 for (i = 0; trans->tags[i] >= 0; i++) 729 if ((size_t)trans->tags[i] < num_tags) 730 tmp_tags[trans->tags[i]] = pos; 731 732 /* If another path has also reached this state, choose the 733 one with the smallest cost or best tags if costs are equal. */ 734 if (reach_next[dest_id].pos == pos 735 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST] 736 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST] 737 && !tre_tag_order(num_tags, tnfa->tag_directions, 738 tmp_tags, 739 reach_next[dest_id].tags)))) 740 { 741 DPRINT(("lose\n")); 742 continue; 743 } 744 DPRINT(("win %d %d\n", 745 reach_next[dest_id].pos, 746 reach_next[dest_id].costs[0][TRE_M_COST])); 747 748 /* Set state, position, tags, and depth. */ 749 reach_next[dest_id].state = trans->state; 750 reach_next[dest_id].pos = pos; 751 for (i = 0; i < num_tags; i++) 752 reach_next[dest_id].tags[i] = tmp_tags[i]; 753 reach_next[dest_id].depth = reach[id].depth; 754 755 /* Set parameters. */ 756 reach_next[dest_id].params = reach[id].params; 757 if (trans->params) 758 tre_set_params(&reach_next[dest_id], trans->params, 759 default_params); 760 761 /* Set the costs after this transition. */ 762 memcpy(&reach_next[dest_id].costs, 763 reach[id].costs, 764 sizeof(reach[id].costs[0][0]) 765 * TRE_M_LAST * (depth + 1)); 766 reach_next[dest_id].costs[depth][TRE_M_COST] = cost; 767 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err; 768 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err; 769 if (depth > 0) 770 { 771 reach_next[dest_id].costs[0][TRE_M_COST] = cost0; 772 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err; 773 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err; 774 } 775 776 if (trans->state == tnfa->final 777 && (match_eo < 0 778 || cost0 < match_costs[TRE_M_COST] 779 || (cost0 == match_costs[TRE_M_COST] 780 && num_tags > 0 && tmp_tags[0] <= match_tags[0]))) 781 { 782 DPRINT((" setting new match at %d, cost %d\n", 783 pos, cost0)); 784 match_eo = pos; 785 for (i = 0; i < TRE_M_LAST; i++) 786 match_costs[i] = reach_next[dest_id].costs[0][i]; 787 for (i = 0; i < num_tags; i++) 788 match_tags[i] = tmp_tags[i]; 789 } 790 } 791 } 792 } 793 794 DPRINT(("match end offset = %d, match cost = %d\n", match_eo, 795 match_costs[TRE_M_COST])); 796 797 match->cost = match_costs[TRE_M_COST]; 798 match->num_ins = match_costs[TRE_M_NUM_INS]; 799 match->num_del = match_costs[TRE_M_NUM_DEL]; 800 match->num_subst = match_costs[TRE_M_NUM_SUBST]; 801 *match_end_ofs = match_eo; 802 803 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; 804 error_exit: 805 #ifndef TRE_USE_ALLOCA 806 if (buf) 807 xfree(buf); 808 #endif /* !TRE_USE_ALLOCA */ 809 return ret; 810 } 811