1 /*
2 tre-match-approx.c - TRE approximate regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif /* HAVE_CONFIG_H */
11
12 #include "tre-alloca.h"
13
14 #define __USE_STRING_INLINES
15 #undef __NO_INLINE__
16
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #ifdef HAVE_WCHAR_H
22 #include <wchar.h>
23 #endif /* HAVE_WCHAR_H */
24 #ifdef HAVE_WCTYPE_H
25 #include <wctype.h>
26 #endif /* HAVE_WCTYPE_H */
27 #ifndef TRE_WCHAR
28 #include <ctype.h>
29 #endif /* !TRE_WCHAR */
30 #ifdef HAVE_MALLOC_H
31 #include <malloc.h>
32 #endif /* HAVE_MALLOC_H */
33 #include <stdint.h>
34
35 #include "tre-internal.h"
36 #include "tre-match-utils.h"
37 #include "tre.h"
38 #include "xmalloc.h"
39
40 #define TRE_M_COST 0
41 #define TRE_M_NUM_INS 1
42 #define TRE_M_NUM_DEL 2
43 #define TRE_M_NUM_SUBST 3
44 #define TRE_M_NUM_ERR 4
45 #define TRE_M_LAST 5
46
47 #define TRE_M_MAX_DEPTH 3
48
49 typedef struct {
50 /* State in the TNFA transition table. */
51 tre_tnfa_transition_t *state;
52 /* Position in input string. */
53 int pos;
54 /* Tag values. */
55 int *tags;
56 /* Matching parameters. */
57 regaparams_t params;
58 /* Nesting depth of parameters. This is used as an index in
59 the `costs' array. */
60 int depth;
61 /* Costs and counter values for different parameter nesting depths. */
62 int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
63 } tre_tnfa_approx_reach_t;
64
65
66 #ifdef TRE_DEBUG
67 /* Prints the `reach' array in a readable fashion with DPRINT. */
68 static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_approx_reach_t * reach,int pos,size_t num_tags)69 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
70 int pos, size_t num_tags)
71 {
72 int id;
73
74 /* Print each state on one line. */
75 DPRINT((" reach:\n"));
76 for (id = 0; id < tnfa->num_states; id++)
77 {
78 int i, j;
79 if (reach[id].pos < pos)
80 continue; /* Not reached. */
81 DPRINT((" %03d, costs ", id));
82 for (i = 0; i <= reach[id].depth; i++)
83 {
84 DPRINT(("["));
85 for (j = 0; j < TRE_M_LAST; j++)
86 {
87 DPRINT(("%2d", reach[id].costs[i][j]));
88 if (j + 1 < TRE_M_LAST)
89 DPRINT((","));
90 }
91 DPRINT(("]"));
92 if (i + 1 <= reach[id].depth)
93 DPRINT((", "));
94 }
95 DPRINT(("\n tags "));
96 for (i = 0; i < num_tags; i++)
97 {
98 DPRINT(("%02d", reach[id].tags[i]));
99 if (i + 1 < num_tags)
100 DPRINT((","));
101 }
102 DPRINT(("\n"));
103 }
104 DPRINT(("\n"));
105 }
106 #endif /* TRE_DEBUG */
107
108
109 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
110 array. If `pa' specifies default values, they are taken from
111 `default_params'. */
112 inline static void
tre_set_params(tre_tnfa_approx_reach_t * reach,int * pa,regaparams_t default_params)113 tre_set_params(tre_tnfa_approx_reach_t *reach,
114 int *pa, regaparams_t default_params)
115 {
116 int value;
117
118 /* If depth is increased reset costs and counters to zero for the
119 new levels. */
120 value = pa[TRE_PARAM_DEPTH];
121 assert(value <= TRE_M_MAX_DEPTH);
122 if (value > reach->depth)
123 {
124 int i, j;
125 for (i = reach->depth + 1; i <= value; i++)
126 for (j = 0; j < TRE_M_LAST; j++)
127 reach->costs[i][j] = 0;
128 }
129 reach->depth = value;
130
131 /* Set insert cost. */
132 value = pa[TRE_PARAM_COST_INS];
133 if (value == TRE_PARAM_DEFAULT)
134 reach->params.cost_ins = default_params.cost_ins;
135 else if (value != TRE_PARAM_UNSET)
136 reach->params.cost_ins = value;
137
138 /* Set delete cost. */
139 value = pa[TRE_PARAM_COST_DEL];
140 if (value == TRE_PARAM_DEFAULT)
141 reach->params.cost_del = default_params.cost_del;
142 else if (value != TRE_PARAM_UNSET)
143 reach->params.cost_del = value;
144
145 /* Set substitute cost. */
146 value = pa[TRE_PARAM_COST_SUBST];
147 if (value == TRE_PARAM_DEFAULT)
148 reach->params.cost_subst = default_params.cost_subst;
149 else
150 reach->params.cost_subst = value;
151
152 /* Set maximum cost. */
153 value = pa[TRE_PARAM_COST_MAX];
154 if (value == TRE_PARAM_DEFAULT)
155 reach->params.max_cost = default_params.max_cost;
156 else if (value != TRE_PARAM_UNSET)
157 reach->params.max_cost = value;
158
159 /* Set maximum inserts. */
160 value = pa[TRE_PARAM_MAX_INS];
161 if (value == TRE_PARAM_DEFAULT)
162 reach->params.max_ins = default_params.max_ins;
163 else if (value != TRE_PARAM_UNSET)
164 reach->params.max_ins = value;
165
166 /* Set maximum deletes. */
167 value = pa[TRE_PARAM_MAX_DEL];
168 if (value == TRE_PARAM_DEFAULT)
169 reach->params.max_del = default_params.max_del;
170 else if (value != TRE_PARAM_UNSET)
171 reach->params.max_del = value;
172
173 /* Set maximum substitutes. */
174 value = pa[TRE_PARAM_MAX_SUBST];
175 if (value == TRE_PARAM_DEFAULT)
176 reach->params.max_subst = default_params.max_subst;
177 else if (value != TRE_PARAM_UNSET)
178 reach->params.max_subst = value;
179
180 /* Set maximum number of errors. */
181 value = pa[TRE_PARAM_MAX_ERR];
182 if (value == TRE_PARAM_DEFAULT)
183 reach->params.max_err = default_params.max_err;
184 else if (value != TRE_PARAM_UNSET)
185 reach->params.max_err = value;
186 }
187
188 reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,regamatch_t * match,regaparams_t default_params,int eflags,int * match_end_ofs)189 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
190 tre_str_type_t type, int *match_tags,
191 regamatch_t *match, regaparams_t default_params,
192 int eflags, int *match_end_ofs)
193 {
194 /* State variables required by GET_NEXT_WCHAR. */
195 tre_char_t prev_c = 0, next_c = 0;
196 const char *str_byte = string;
197 int pos = -1;
198 unsigned int pos_add_next = 1;
199 #ifdef TRE_WCHAR
200 const wchar_t *str_wide = string;
201 #ifdef TRE_MBSTATE
202 mbstate_t mbstate;
203 #endif /* !TRE_WCHAR */
204 #endif /* TRE_WCHAR */
205 int reg_notbol = eflags & REG_NOTBOL;
206 int reg_noteol = eflags & REG_NOTEOL;
207 int reg_newline = tnfa->cflags & REG_NEWLINE;
208 size_t str_user_end = 0;
209
210 int prev_pos;
211
212 /* Number of tags. */
213 size_t num_tags;
214 /* The reach tables. */
215 tre_tnfa_approx_reach_t *reach, *reach_next;
216 /* Tag array for temporary use. */
217 int *tmp_tags;
218
219 /* End offset of best match so far, or -1 if no match found yet. */
220 int match_eo = -1;
221 /* Costs of the match. */
222 int match_costs[TRE_M_LAST];
223
224 /* Space for temporary data required for matching. */
225 unsigned char *buf;
226
227 size_t i, id;
228
229 reg_errcode_t ret;
230
231 if (!match_tags)
232 num_tags = 0;
233 else
234 num_tags = tnfa->num_tags;
235
236 #ifdef TRE_MBSTATE
237 memset(&mbstate, '\0', sizeof(mbstate));
238 #endif /* TRE_MBSTATE */
239
240 DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
241 "match_tags %p\n",
242 type, len, eflags,
243 match_tags));
244 DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
245 default_params.max_cost,
246 default_params.cost_ins,
247 default_params.cost_del,
248 default_params.cost_subst));
249
250 /* Allocate memory for temporary data required for matching. This needs to
251 be done for every matching operation to be thread safe. This allocates
252 everything in a single large block from the stack frame using alloca()
253 or with malloc() if alloca is unavailable. */
254 {
255 unsigned char *buf_cursor;
256
257 /* Ensure that tag_bytes*num_states cannot overflow, and that it don't
258 * contribute more than 1/8 of SIZE_MAX to total_bytes. */
259 if (num_tags > SIZE_MAX/(8 * sizeof(*tmp_tags) * tnfa->num_states))
260 return REG_ESPACE;
261
262 /* Likewise check reach_bytes. */
263 if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_next)))
264 return REG_ESPACE;
265
266 /* Space needed for one array of tags. */
267 size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
268 /* Space needed for one reach table. */
269 size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
270 /* Total space needed. */
271 size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
272 /* Add some extra to make sure we can align the pointers. The multiplier
273 used here must be equal to the number of ALIGN calls below. */
274 total_bytes += (sizeof(long) - 1) * 3;
275
276 /* Allocate the memory. */
277 #ifdef TRE_USE_ALLOCA
278 buf = alloca(total_bytes);
279 #else /* !TRE_USE_ALLOCA */
280 buf = xmalloc((unsigned)total_bytes);
281 #endif /* !TRE_USE_ALLOCA */
282 if (!buf)
283 return REG_ESPACE;
284 memset(buf, 0, (size_t)total_bytes);
285
286 /* Allocate `tmp_tags' from `buf'. */
287 tmp_tags = (void *)buf;
288 buf_cursor = buf + tag_bytes;
289 buf_cursor += ALIGN(buf_cursor, long);
290
291 /* Allocate `reach' from `buf'. */
292 reach = (void *)buf_cursor;
293 buf_cursor += reach_bytes;
294 buf_cursor += ALIGN(buf_cursor, long);
295
296 /* Allocate `reach_next' from `buf'. */
297 reach_next = (void *)buf_cursor;
298 buf_cursor += reach_bytes;
299 buf_cursor += ALIGN(buf_cursor, long);
300
301 /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
302 for (i = 0; i < tnfa->num_states; i++)
303 {
304 reach[i].tags = (void *)buf_cursor;
305 buf_cursor += tag_bytes;
306 reach_next[i].tags = (void *)buf_cursor;
307 buf_cursor += tag_bytes;
308 }
309 assert(buf_cursor <= buf + total_bytes);
310 }
311
312 for (i = 0; i < TRE_M_LAST; i++)
313 match_costs[i] = INT_MAX;
314
315 /* Mark the reach arrays empty. */
316 for (i = 0; i < tnfa->num_states; i++)
317 reach[i].pos = reach_next[i].pos = -2;
318
319 prev_pos = pos;
320 GET_NEXT_WCHAR();
321 pos = 0;
322
323 while (/*CONSTCOND*/(void)1,1)
324 {
325 DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
326
327 /* Add initial states to `reach_next' if an exact match has not yet
328 been found. */
329 if (match_costs[TRE_M_COST] > 0)
330 {
331 tre_tnfa_transition_t *trans;
332 DPRINT((" init"));
333 for (trans = tnfa->initial; trans->state; trans++)
334 {
335 int stateid = trans->state_id;
336
337 /* If this state is not currently in `reach_next', add it
338 there. */
339 if (reach_next[stateid].pos < pos)
340 {
341 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
342 {
343 /* Assertions failed, don't add this state. */
344 DPRINT((" !%d (assert)", stateid));
345 continue;
346 }
347 DPRINT((" %d", stateid));
348 reach_next[stateid].state = trans->state;
349 reach_next[stateid].pos = pos;
350
351 /* Compute tag values after this transition. */
352 for (i = 0; i < num_tags; i++)
353 reach_next[stateid].tags[i] = -1;
354
355 if (trans->tags)
356 for (i = 0; trans->tags[i] >= 0; i++)
357 if ((size_t)trans->tags[i] < num_tags)
358 reach_next[stateid].tags[trans->tags[i]] = pos;
359
360 /* Set the parameters, depth, and costs. */
361 reach_next[stateid].params = default_params;
362 reach_next[stateid].depth = 0;
363 for (i = 0; i < TRE_M_LAST; i++)
364 reach_next[stateid].costs[0][i] = 0;
365 if (trans->params)
366 tre_set_params(&reach_next[stateid], trans->params,
367 default_params);
368
369 /* If this is the final state, mark the exact match. */
370 if (trans->state == tnfa->final)
371 {
372 match_eo = pos;
373 for (i = 0; i < num_tags; i++)
374 match_tags[i] = reach_next[stateid].tags[i];
375 for (i = 0; i < TRE_M_LAST; i++)
376 match_costs[i] = 0;
377 }
378 }
379 }
380 DPRINT(("\n"));
381 }
382
383
384 /* Handle inserts. This is done by pretending there's an epsilon
385 transition from each state in `reach' back to the same state.
386 We don't need to worry about the final state here; this will never
387 give a better match than what we already have. */
388 for (id = 0; id < tnfa->num_states; id++)
389 {
390 int depth;
391 int cost, cost0;
392
393 if (reach[id].pos != prev_pos)
394 {
395 DPRINT((" insert: %d not reached\n", id));
396 continue; /* Not reached. */
397 }
398
399 depth = reach[id].depth;
400
401 /* Compute and check cost at current depth. */
402 cost = reach[id].costs[depth][TRE_M_COST];
403 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
404 cost += reach[id].params.cost_ins;
405 if (cost > reach[id].params.max_cost)
406 continue; /* Cost too large. */
407
408 /* Check number of inserts at current depth. */
409 if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
410 > reach[id].params.max_ins)
411 continue; /* Too many inserts. */
412
413 /* Check total number of errors at current depth. */
414 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
415 > reach[id].params.max_err)
416 continue; /* Too many errors. */
417
418 /* Compute overall cost. */
419 cost0 = cost;
420 if (depth > 0)
421 {
422 cost0 = reach[id].costs[0][TRE_M_COST];
423 if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
424 cost0 += reach[id].params.cost_ins;
425 else
426 cost0 += default_params.cost_ins;
427 }
428
429 DPRINT((" insert: from %d to %d, cost %d: ", id, id,
430 reach[id].costs[depth][TRE_M_COST]));
431 if (reach_next[id].pos == pos
432 && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
433 {
434 DPRINT(("lose\n"));
435 continue;
436 }
437 DPRINT(("win\n"));
438
439 /* Copy state, position, tags, parameters, and depth. */
440 reach_next[id].state = reach[id].state;
441 reach_next[id].pos = pos;
442 for (i = 0; i < num_tags; i++)
443 reach_next[id].tags[i] = reach[id].tags[i];
444 reach_next[id].params = reach[id].params;
445 reach_next[id].depth = reach[id].depth;
446
447 /* Set the costs after this transition. */
448 memcpy(reach_next[id].costs, reach[id].costs,
449 sizeof(reach_next[id].costs[0][0])
450 * TRE_M_LAST * (depth + 1));
451 reach_next[id].costs[depth][TRE_M_COST] = cost;
452 reach_next[id].costs[depth][TRE_M_NUM_INS]++;
453 reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
454 if (depth > 0)
455 {
456 reach_next[id].costs[0][TRE_M_COST] = cost0;
457 reach_next[id].costs[0][TRE_M_NUM_INS]++;
458 reach_next[id].costs[0][TRE_M_NUM_ERR]++;
459 }
460
461 }
462
463
464 /* Handle deletes. This is done by traversing through the whole TNFA
465 pretending that all transitions are epsilon transitions, until
466 no more states can be reached with better costs. */
467 {
468 /* XXX - dynamic ringbuffer size */
469 tre_tnfa_approx_reach_t *ringbuffer[512];
470 tre_tnfa_approx_reach_t **deque_start, **deque_end;
471
472 deque_start = deque_end = ringbuffer;
473
474 /* Add all states in `reach_next' to the deque. */
475 for (id = 0; id < tnfa->num_states; id++)
476 {
477 if (reach_next[id].pos != pos)
478 continue;
479 *deque_end = &reach_next[id];
480 deque_end++;
481 assert(deque_end != deque_start);
482 }
483
484 /* Repeat until the deque is empty. */
485 while (deque_end != deque_start)
486 {
487 tre_tnfa_approx_reach_t *reach_p;
488 int depth;
489 int cost, cost0;
490 tre_tnfa_transition_t *trans;
491
492 /* Pop the first item off the deque. */
493 reach_p = *deque_start;
494 id = reach_p - reach_next;
495 depth = reach_p->depth;
496
497 /* Compute cost at current depth. */
498 cost = reach_p->costs[depth][TRE_M_COST];
499 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
500 cost += reach_p->params.cost_del;
501
502 /* Check cost, number of deletes, and total number of errors
503 at current depth. */
504 if (cost > reach_p->params.max_cost
505 || (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
506 > reach_p->params.max_del)
507 || (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
508 > reach_p->params.max_err))
509 {
510 /* Too many errors or cost too large. */
511 DPRINT((" delete: from %03d: cost too large\n", id));
512 deque_start++;
513 if (deque_start >= (ringbuffer + 512))
514 deque_start = ringbuffer;
515 continue;
516 }
517
518 /* Compute overall cost. */
519 cost0 = cost;
520 if (depth > 0)
521 {
522 cost0 = reach_p->costs[0][TRE_M_COST];
523 if (reach_p->params.cost_del != TRE_PARAM_UNSET)
524 cost0 += reach_p->params.cost_del;
525 else
526 cost0 += default_params.cost_del;
527 }
528
529 for (trans = reach_p->state; trans->state; trans++)
530 {
531 int dest_id = trans->state_id;
532 DPRINT((" delete: from %03d to %03d, cost %d (%d): ",
533 id, dest_id, cost0, reach_p->params.max_cost));
534
535 if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
536 {
537 DPRINT(("assertion failed\n"));
538 continue;
539 }
540
541 /* Compute tag values after this transition. */
542 for (i = 0; i < num_tags; i++)
543 tmp_tags[i] = reach_p->tags[i];
544 if (trans->tags)
545 for (i = 0; trans->tags[i] >= 0; i++)
546 if ((size_t)trans->tags[i] < num_tags)
547 tmp_tags[trans->tags[i]] = pos;
548
549 /* If another path has also reached this state, choose the one
550 with the smallest cost or best tags if costs are equal. */
551 if (reach_next[dest_id].pos == pos
552 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
553 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
554 && (!match_tags
555 || !tre_tag_order(num_tags,
556 tnfa->tag_directions,
557 tmp_tags,
558 reach_next[dest_id].tags)))))
559 {
560 DPRINT(("lose, cost0 %d, have %d\n",
561 cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
562 continue;
563 }
564 DPRINT(("win\n"));
565
566 /* Set state, position, tags, parameters, depth, and costs. */
567 reach_next[dest_id].state = trans->state;
568 reach_next[dest_id].pos = pos;
569 for (i = 0; i < num_tags; i++)
570 reach_next[dest_id].tags[i] = tmp_tags[i];
571
572 reach_next[dest_id].params = reach_p->params;
573 if (trans->params)
574 tre_set_params(&reach_next[dest_id], trans->params,
575 default_params);
576
577 reach_next[dest_id].depth = reach_p->depth;
578 memcpy(&reach_next[dest_id].costs,
579 reach_p->costs,
580 sizeof(reach_p->costs[0][0])
581 * TRE_M_LAST * (depth + 1));
582 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
583 reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
584 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
585 if (depth > 0)
586 {
587 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
588 reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
589 reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
590 }
591
592 if (trans->state == tnfa->final
593 && (match_eo < 0
594 || match_costs[TRE_M_COST] > cost0
595 || (match_costs[TRE_M_COST] == cost0
596 && (num_tags > 0
597 && tmp_tags[0] <= match_tags[0]))))
598 {
599 DPRINT((" setting new match at %d, cost %d\n",
600 pos, cost0));
601 match_eo = pos;
602 memcpy(match_costs, reach_next[dest_id].costs[0],
603 sizeof(match_costs[0]) * TRE_M_LAST);
604 for (i = 0; i < num_tags; i++)
605 match_tags[i] = tmp_tags[i];
606 }
607
608 /* Add to the end of the deque. */
609 *deque_end = &reach_next[dest_id];
610 deque_end++;
611 if (deque_end >= (ringbuffer + 512))
612 deque_end = ringbuffer;
613 assert(deque_end != deque_start);
614 }
615 deque_start++;
616 if (deque_start >= (ringbuffer + 512))
617 deque_start = ringbuffer;
618 }
619
620 }
621
622 #ifdef TRE_DEBUG
623 tre_print_reach(tnfa, reach_next, pos, num_tags);
624 #endif /* TRE_DEBUG */
625
626 /* Check for end of string. */
627 if (len < 0)
628 {
629 if (type == STR_USER)
630 {
631 if (str_user_end)
632 break;
633 }
634 else if (next_c == L'\0')
635 break;
636 }
637 else
638 {
639 if (pos >= len)
640 break;
641 }
642
643 prev_pos = pos;
644 GET_NEXT_WCHAR();
645
646 /* Swap `reach' and `reach_next'. */
647 {
648 tre_tnfa_approx_reach_t *tmp;
649 tmp = reach;
650 reach = reach_next;
651 reach_next = tmp;
652 }
653
654 /* Handle exact matches and substitutions. */
655 for (id = 0; id < tnfa->num_states; id++)
656 {
657 tre_tnfa_transition_t *trans;
658
659 if (reach[id].pos < prev_pos)
660 continue; /* Not reached. */
661 for (trans = reach[id].state; trans->state; trans++)
662 {
663 int dest_id;
664 int depth;
665 int cost, cost0, err;
666
667 if (trans->assertions
668 && (CHECK_ASSERTIONS(trans->assertions)
669 || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
670 {
671 DPRINT((" exact, from %d: assert failed\n", id));
672 continue;
673 }
674
675 depth = reach[id].depth;
676 dest_id = trans->state_id;
677
678 cost = reach[id].costs[depth][TRE_M_COST];
679 cost0 = reach[id].costs[0][TRE_M_COST];
680 err = 0;
681
682 if (trans->code_min > (tre_cint_t)prev_c
683 || trans->code_max < (tre_cint_t)prev_c)
684 {
685 /* Handle substitutes. The required character was not in
686 the string, so match it in place of whatever was supposed
687 to be there and increase costs accordingly. */
688 err = 1;
689
690 /* Compute and check cost at current depth. */
691 cost = reach[id].costs[depth][TRE_M_COST];
692 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
693 cost += reach[id].params.cost_subst;
694 if (cost > reach[id].params.max_cost)
695 continue; /* Cost too large. */
696
697 /* Check number of substitutes at current depth. */
698 if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
699 > reach[id].params.max_subst)
700 continue; /* Too many substitutes. */
701
702 /* Check total number of errors at current depth. */
703 if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
704 > reach[id].params.max_err)
705 continue; /* Too many errors. */
706
707 /* Compute overall cost. */
708 cost0 = cost;
709 if (depth > 0)
710 {
711 cost0 = reach[id].costs[0][TRE_M_COST];
712 if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
713 cost0 += reach[id].params.cost_subst;
714 else
715 cost0 += default_params.cost_subst;
716 }
717 DPRINT((" subst, from %03d to %03d, cost %d: ",
718 id, dest_id, cost0));
719 }
720 else
721 DPRINT((" exact, from %03d to %03d, cost %d: ",
722 id, dest_id, cost0));
723
724 /* Compute tag values after this transition. */
725 for (i = 0; i < num_tags; i++)
726 tmp_tags[i] = reach[id].tags[i];
727 if (trans->tags)
728 for (i = 0; trans->tags[i] >= 0; i++)
729 if ((size_t)trans->tags[i] < num_tags)
730 tmp_tags[trans->tags[i]] = pos;
731
732 /* If another path has also reached this state, choose the
733 one with the smallest cost or best tags if costs are equal. */
734 if (reach_next[dest_id].pos == pos
735 && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
736 || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
737 && !tre_tag_order(num_tags, tnfa->tag_directions,
738 tmp_tags,
739 reach_next[dest_id].tags))))
740 {
741 DPRINT(("lose\n"));
742 continue;
743 }
744 DPRINT(("win %d %d\n",
745 reach_next[dest_id].pos,
746 reach_next[dest_id].costs[0][TRE_M_COST]));
747
748 /* Set state, position, tags, and depth. */
749 reach_next[dest_id].state = trans->state;
750 reach_next[dest_id].pos = pos;
751 for (i = 0; i < num_tags; i++)
752 reach_next[dest_id].tags[i] = tmp_tags[i];
753 reach_next[dest_id].depth = reach[id].depth;
754
755 /* Set parameters. */
756 reach_next[dest_id].params = reach[id].params;
757 if (trans->params)
758 tre_set_params(&reach_next[dest_id], trans->params,
759 default_params);
760
761 /* Set the costs after this transition. */
762 memcpy(&reach_next[dest_id].costs,
763 reach[id].costs,
764 sizeof(reach[id].costs[0][0])
765 * TRE_M_LAST * (depth + 1));
766 reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
767 reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
768 reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
769 if (depth > 0)
770 {
771 reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
772 reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
773 reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
774 }
775
776 if (trans->state == tnfa->final
777 && (match_eo < 0
778 || cost0 < match_costs[TRE_M_COST]
779 || (cost0 == match_costs[TRE_M_COST]
780 && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
781 {
782 DPRINT((" setting new match at %d, cost %d\n",
783 pos, cost0));
784 match_eo = pos;
785 for (i = 0; i < TRE_M_LAST; i++)
786 match_costs[i] = reach_next[dest_id].costs[0][i];
787 for (i = 0; i < num_tags; i++)
788 match_tags[i] = tmp_tags[i];
789 }
790 }
791 }
792 }
793
794 DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
795 match_costs[TRE_M_COST]));
796
797 match->cost = match_costs[TRE_M_COST];
798 match->num_ins = match_costs[TRE_M_NUM_INS];
799 match->num_del = match_costs[TRE_M_NUM_DEL];
800 match->num_subst = match_costs[TRE_M_NUM_SUBST];
801 *match_end_ofs = match_eo;
802
803 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
804 error_exit:
805 #ifndef TRE_USE_ALLOCA
806 if (buf)
807 xfree(buf);
808 #endif /* !TRE_USE_ALLOCA */
809 return ret;
810 }
811