xref: /netbsd-src/external/bsd/tre/dist/lib/tre-match-approx.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*
2   tre-match-approx.c - TRE approximate regex matching engine
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif /* HAVE_CONFIG_H */
11 
12 #include "tre-alloca.h"
13 
14 #define __USE_STRING_INLINES
15 #undef __NO_INLINE__
16 
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #ifdef HAVE_WCHAR_H
22 #include <wchar.h>
23 #endif /* HAVE_WCHAR_H */
24 #ifdef HAVE_WCTYPE_H
25 #include <wctype.h>
26 #endif /* HAVE_WCTYPE_H */
27 #ifndef TRE_WCHAR
28 #include <ctype.h>
29 #endif /* !TRE_WCHAR */
30 #ifdef HAVE_MALLOC_H
31 #include <malloc.h>
32 #endif /* HAVE_MALLOC_H */
33 
34 #include "tre-internal.h"
35 #include "tre-match-utils.h"
36 #include "tre.h"
37 #include "xmalloc.h"
38 
39 #define TRE_M_COST	0
40 #define TRE_M_NUM_INS	1
41 #define TRE_M_NUM_DEL	2
42 #define TRE_M_NUM_SUBST 3
43 #define TRE_M_NUM_ERR	4
44 #define TRE_M_LAST	5
45 
46 #define TRE_M_MAX_DEPTH 3
47 
48 typedef struct {
49   /* State in the TNFA transition table. */
50   tre_tnfa_transition_t *state;
51   /* Position in input string. */
52   int pos;
53   /* Tag values. */
54   int *tags;
55   /* Matching parameters. */
56   regaparams_t params;
57   /* Nesting depth of parameters.  This is used as an index in
58      the `costs' array. */
59   int depth;
60   /* Costs and counter values for different parameter nesting depths. */
61   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
62 } tre_tnfa_approx_reach_t;
63 
64 
65 #ifdef TRE_DEBUG
66 /* Prints the `reach' array in a readable fashion with DPRINT. */
67 static void
68 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
69 		int pos, size_t num_tags)
70 {
71   int id;
72 
73   /* Print each state on one line. */
74   DPRINT(("  reach:\n"));
75   for (id = 0; id < tnfa->num_states; id++)
76     {
77       int i, j;
78       if (reach[id].pos < pos)
79 	continue;  /* Not reached. */
80       DPRINT(("	 %03d, costs ", id));
81       for (i = 0; i <= reach[id].depth; i++)
82 	{
83 	  DPRINT(("["));
84 	  for (j = 0; j < TRE_M_LAST; j++)
85 	    {
86 	      DPRINT(("%2d", reach[id].costs[i][j]));
87 	      if (j + 1 < TRE_M_LAST)
88 		DPRINT((","));
89 	    }
90 	  DPRINT(("]"));
91 	  if (i + 1 <= reach[id].depth)
92 	    DPRINT((", "));
93 	}
94       DPRINT(("\n	tags "));
95       for (i = 0; i < num_tags; i++)
96 	{
97 	  DPRINT(("%02d", reach[id].tags[i]));
98 	  if (i + 1 < num_tags)
99 	    DPRINT((","));
100 	}
101       DPRINT(("\n"));
102     }
103   DPRINT(("\n"));
104 }
105 #endif /* TRE_DEBUG */
106 
107 
108 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
109    array.  If `pa' specifies default values, they are taken from
110    `default_params'. */
111 inline static void
112 tre_set_params(tre_tnfa_approx_reach_t *reach,
113 	       int *pa, regaparams_t default_params)
114 {
115   int value;
116 
117   /* If depth is increased reset costs and counters to zero for the
118      new levels. */
119   value = pa[TRE_PARAM_DEPTH];
120   assert(value <= TRE_M_MAX_DEPTH);
121   if (value > reach->depth)
122     {
123       int i, j;
124       for (i = reach->depth + 1; i <= value; i++)
125 	for (j = 0; j < TRE_M_LAST; j++)
126 	  reach->costs[i][j] = 0;
127     }
128   reach->depth = value;
129 
130   /* Set insert cost. */
131   value = pa[TRE_PARAM_COST_INS];
132   if (value == TRE_PARAM_DEFAULT)
133     reach->params.cost_ins = default_params.cost_ins;
134   else if (value != TRE_PARAM_UNSET)
135     reach->params.cost_ins = value;
136 
137   /* Set delete cost. */
138   value = pa[TRE_PARAM_COST_DEL];
139   if (value == TRE_PARAM_DEFAULT)
140     reach->params.cost_del = default_params.cost_del;
141   else if (value != TRE_PARAM_UNSET)
142     reach->params.cost_del = value;
143 
144   /* Set substitute cost. */
145   value = pa[TRE_PARAM_COST_SUBST];
146   if (value == TRE_PARAM_DEFAULT)
147     reach->params.cost_subst = default_params.cost_subst;
148   else
149     reach->params.cost_subst = value;
150 
151   /* Set maximum cost. */
152   value = pa[TRE_PARAM_COST_MAX];
153   if (value == TRE_PARAM_DEFAULT)
154     reach->params.max_cost = default_params.max_cost;
155   else if (value != TRE_PARAM_UNSET)
156     reach->params.max_cost = value;
157 
158   /* Set maximum inserts. */
159   value = pa[TRE_PARAM_MAX_INS];
160   if (value == TRE_PARAM_DEFAULT)
161     reach->params.max_ins = default_params.max_ins;
162   else if (value != TRE_PARAM_UNSET)
163     reach->params.max_ins = value;
164 
165   /* Set maximum deletes. */
166   value = pa[TRE_PARAM_MAX_DEL];
167   if (value == TRE_PARAM_DEFAULT)
168     reach->params.max_del = default_params.max_del;
169   else if (value != TRE_PARAM_UNSET)
170     reach->params.max_del = value;
171 
172   /* Set maximum substitutes. */
173   value = pa[TRE_PARAM_MAX_SUBST];
174   if (value == TRE_PARAM_DEFAULT)
175     reach->params.max_subst = default_params.max_subst;
176   else if (value != TRE_PARAM_UNSET)
177     reach->params.max_subst = value;
178 
179   /* Set maximum number of errors. */
180   value = pa[TRE_PARAM_MAX_ERR];
181   if (value == TRE_PARAM_DEFAULT)
182     reach->params.max_err = default_params.max_err;
183   else if (value != TRE_PARAM_UNSET)
184     reach->params.max_err = value;
185 }
186 
187 reg_errcode_t
188 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
189 		    tre_str_type_t type, int *match_tags,
190 		    regamatch_t *match, regaparams_t default_params,
191 		    int eflags, int *match_end_ofs)
192 {
193   /* State variables required by GET_NEXT_WCHAR. */
194   tre_char_t prev_c = 0, next_c = 0;
195   const char *str_byte = string;
196   int pos = -1;
197   unsigned int pos_add_next = 1;
198 #ifdef TRE_WCHAR
199   const wchar_t *str_wide = string;
200 #ifdef TRE_MBSTATE
201   mbstate_t mbstate;
202 #endif /* !TRE_WCHAR */
203 #endif /* TRE_WCHAR */
204   int reg_notbol = eflags & REG_NOTBOL;
205   int reg_noteol = eflags & REG_NOTEOL;
206   int reg_newline = tnfa->cflags & REG_NEWLINE;
207   size_t str_user_end = 0;
208 
209   int prev_pos;
210 
211   /* Number of tags. */
212   size_t num_tags;
213   /* The reach tables. */
214   tre_tnfa_approx_reach_t *reach, *reach_next;
215   /* Tag array for temporary use. */
216   int *tmp_tags;
217 
218   /* End offset of best match so far, or -1 if no match found yet. */
219   int match_eo = -1;
220   /* Costs of the match. */
221   int match_costs[TRE_M_LAST];
222 
223   /* Space for temporary data required for matching. */
224   unsigned char *buf;
225 
226   size_t i, id;
227 
228   if (!match_tags)
229     num_tags = 0;
230   else
231     num_tags = tnfa->num_tags;
232 
233 #ifdef TRE_MBSTATE
234   memset(&mbstate, '\0', sizeof(mbstate));
235 #endif /* TRE_MBSTATE */
236 
237   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
238 	  "match_tags %p\n",
239 	  type, len, eflags,
240 	  match_tags));
241   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
242 	  default_params.max_cost,
243 	  default_params.cost_ins,
244 	  default_params.cost_del,
245 	  default_params.cost_subst));
246 
247   /* Allocate memory for temporary data required for matching.	This needs to
248      be done for every matching operation to be thread safe.  This allocates
249      everything in a single large block from the stack frame using alloca()
250      or with malloc() if alloca is unavailable. */
251   {
252     unsigned char *buf_cursor;
253     /* Space needed for one array of tags. */
254     size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
255     /* Space needed for one reach table. */
256     size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
257     /* Total space needed. */
258     size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
259     /* Add some extra to make sure we can align the pointers.  The multiplier
260        used here must be equal to the number of ALIGN calls below. */
261     total_bytes += (sizeof(long) - 1) * 3;
262 
263     /* Allocate the memory. */
264 #ifdef TRE_USE_ALLOCA
265     buf = alloca(total_bytes);
266 #else /* !TRE_USE_ALLOCA */
267     buf = xmalloc((unsigned)total_bytes);
268 #endif /* !TRE_USE_ALLOCA */
269     if (!buf)
270       return REG_ESPACE;
271     memset(buf, 0, (size_t)total_bytes);
272 
273     /* Allocate `tmp_tags' from `buf'. */
274     tmp_tags = (void *)buf;
275     buf_cursor = buf + tag_bytes;
276     buf_cursor += ALIGN(buf_cursor, long);
277 
278     /* Allocate `reach' from `buf'. */
279     reach = (void *)buf_cursor;
280     buf_cursor += reach_bytes;
281     buf_cursor += ALIGN(buf_cursor, long);
282 
283     /* Allocate `reach_next' from `buf'. */
284     reach_next = (void *)buf_cursor;
285     buf_cursor += reach_bytes;
286     buf_cursor += ALIGN(buf_cursor, long);
287 
288     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
289     for (i = 0; i < tnfa->num_states; i++)
290       {
291 	reach[i].tags = (void *)buf_cursor;
292 	buf_cursor += tag_bytes;
293 	reach_next[i].tags = (void *)buf_cursor;
294 	buf_cursor += tag_bytes;
295       }
296     assert(buf_cursor <= buf + total_bytes);
297   }
298 
299   for (i = 0; i < TRE_M_LAST; i++)
300     match_costs[i] = INT_MAX;
301 
302   /* Mark the reach arrays empty. */
303   for (i = 0; i < tnfa->num_states; i++)
304     reach[i].pos = reach_next[i].pos = -2;
305 
306   prev_pos = pos;
307   GET_NEXT_WCHAR();
308   pos = 0;
309 
310   while (/*CONSTCOND*/1)
311     {
312       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
313 
314       /* Add initial states to `reach_next' if an exact match has not yet
315 	 been found. */
316       if (match_costs[TRE_M_COST] > 0)
317 	{
318 	  tre_tnfa_transition_t *trans;
319 	  DPRINT(("  init"));
320 	  for (trans = tnfa->initial; trans->state; trans++)
321 	    {
322 	      int stateid = trans->state_id;
323 
324 	      /* If this state is not currently in `reach_next', add it
325 		 there. */
326 	      if (reach_next[stateid].pos < pos)
327 		{
328 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
329 		    {
330 		      /* Assertions failed, don't add this state. */
331 		      DPRINT((" !%d (assert)", stateid));
332 		      continue;
333 		    }
334 		  DPRINT((" %d", stateid));
335 		  reach_next[stateid].state = trans->state;
336 		  reach_next[stateid].pos = pos;
337 
338 		  /* Compute tag values after this transition. */
339 		  for (i = 0; i < num_tags; i++)
340 		    reach_next[stateid].tags[i] = -1;
341 
342 		  if (trans->tags)
343 		    for (i = 0; trans->tags[i] >= 0; i++)
344 		      if ((size_t)trans->tags[i] < num_tags)
345 			reach_next[stateid].tags[trans->tags[i]] = pos;
346 
347 		  /* Set the parameters, depth, and costs. */
348 		  reach_next[stateid].params = default_params;
349 		  reach_next[stateid].depth = 0;
350 		  for (i = 0; i < TRE_M_LAST; i++)
351 		    reach_next[stateid].costs[0][i] = 0;
352 		  if (trans->params)
353 		    tre_set_params(&reach_next[stateid], trans->params,
354 				   default_params);
355 
356 		  /* If this is the final state, mark the exact match. */
357 		  if (trans->state == tnfa->final)
358 		    {
359 		      match_eo = pos;
360 		      for (i = 0; i < num_tags; i++)
361 			match_tags[i] = reach_next[stateid].tags[i];
362 		      for (i = 0; i < TRE_M_LAST; i++)
363 			match_costs[i] = 0;
364 		    }
365 		}
366 	    }
367 	    DPRINT(("\n"));
368 	}
369 
370 
371       /* Handle inserts.  This is done by pretending there's an epsilon
372 	 transition from each state in `reach' back to the same state.
373 	 We don't need to worry about the final state here; this will never
374 	 give a better match than what we already have. */
375       for (id = 0; id < tnfa->num_states; id++)
376 	{
377 	  int depth;
378 	  int cost, cost0;
379 
380 	  if (reach[id].pos != prev_pos)
381 	    {
382 	      DPRINT(("	 insert: %d not reached\n", id));
383 	      continue;	 /* Not reached. */
384 	    }
385 
386 	  depth = reach[id].depth;
387 
388 	  /* Compute and check cost at current depth. */
389 	  cost = reach[id].costs[depth][TRE_M_COST];
390 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
391 	    cost += reach[id].params.cost_ins;
392 	  if (cost > reach[id].params.max_cost)
393 	    continue;  /* Cost too large. */
394 
395 	  /* Check number of inserts at current depth. */
396 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
397 	      > reach[id].params.max_ins)
398 	    continue;  /* Too many inserts. */
399 
400 	  /* Check total number of errors at current depth. */
401 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
402 	      > reach[id].params.max_err)
403 	    continue;  /* Too many errors. */
404 
405 	  /* Compute overall cost. */
406 	  cost0 = cost;
407 	  if (depth > 0)
408 	    {
409 	      cost0 = reach[id].costs[0][TRE_M_COST];
410 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
411 		cost0 += reach[id].params.cost_ins;
412 	      else
413 		cost0 += default_params.cost_ins;
414 	    }
415 
416 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
417 		  reach[id].costs[depth][TRE_M_COST]));
418 	  if (reach_next[id].pos == pos
419 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
420 	    {
421 	      DPRINT(("lose\n"));
422 	      continue;
423 	    }
424 	  DPRINT(("win\n"));
425 
426 	  /* Copy state, position, tags, parameters, and depth. */
427 	  reach_next[id].state = reach[id].state;
428 	  reach_next[id].pos = pos;
429 	  for (i = 0; i < num_tags; i++)
430 	    reach_next[id].tags[i] = reach[id].tags[i];
431 	  reach_next[id].params = reach[id].params;
432 	  reach_next[id].depth = reach[id].depth;
433 
434 	  /* Set the costs after this transition. */
435 	  memcpy(reach_next[id].costs, reach[id].costs,
436 		 sizeof(reach_next[id].costs[0][0])
437 		 * TRE_M_LAST * (depth + 1));
438 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
439 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
440 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
441 	  if (depth > 0)
442 	    {
443 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
444 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
445 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
446 	    }
447 
448 	}
449 
450 
451       /* Handle deletes.  This is done by traversing through the whole TNFA
452 	 pretending that all transitions are epsilon transitions, until
453 	 no more states can be reached with better costs. */
454       {
455 	/* XXX - dynamic ringbuffer size */
456 	tre_tnfa_approx_reach_t *ringbuffer[512];
457 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
458 
459 	deque_start = deque_end = ringbuffer;
460 
461 	/* Add all states in `reach_next' to the deque. */
462 	for (id = 0; id < tnfa->num_states; id++)
463 	  {
464 	    if (reach_next[id].pos != pos)
465 	      continue;
466 	    *deque_end = &reach_next[id];
467 	    deque_end++;
468 	    assert(deque_end != deque_start);
469 	  }
470 
471 	/* Repeat until the deque is empty. */
472 	while (deque_end != deque_start)
473 	  {
474 	    tre_tnfa_approx_reach_t *reach_p;
475 	    int depth;
476 	    int cost, cost0;
477 	    tre_tnfa_transition_t *trans;
478 
479 	    /* Pop the first item off the deque. */
480 	    reach_p = *deque_start;
481 	    id = reach_p - reach_next;
482 	    depth = reach_p->depth;
483 
484 	    /* Compute cost at current depth. */
485 	    cost = reach_p->costs[depth][TRE_M_COST];
486 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
487 	      cost += reach_p->params.cost_del;
488 
489 	    /* Check cost, number of deletes, and total number of errors
490 	       at current depth. */
491 	    if (cost > reach_p->params.max_cost
492 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
493 		    > reach_p->params.max_del)
494 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
495 		    > reach_p->params.max_err))
496 	      {
497 		/* Too many errors or cost too large. */
498 		DPRINT(("  delete: from %03d: cost too large\n", id));
499 		deque_start++;
500 		if (deque_start >= (ringbuffer + 512))
501 		  deque_start = ringbuffer;
502 		continue;
503 	      }
504 
505 	    /* Compute overall cost. */
506 	    cost0 = cost;
507 	    if (depth > 0)
508 	      {
509 		cost0 = reach_p->costs[0][TRE_M_COST];
510 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
511 		  cost0 += reach_p->params.cost_del;
512 		else
513 		  cost0 += default_params.cost_del;
514 	      }
515 
516 	    for (trans = reach_p->state; trans->state; trans++)
517 	      {
518 		int dest_id = trans->state_id;
519 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
520 			id, dest_id, cost0, reach_p->params.max_cost));
521 
522 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
523 		  {
524 		    DPRINT(("assertion failed\n"));
525 		    continue;
526 		  }
527 
528 		/* Compute tag values after this transition. */
529 		for (i = 0; i < num_tags; i++)
530 		  tmp_tags[i] = reach_p->tags[i];
531 		if (trans->tags)
532 		  for (i = 0; trans->tags[i] >= 0; i++)
533 		    if ((size_t)trans->tags[i] < num_tags)
534 		      tmp_tags[trans->tags[i]] = pos;
535 
536 		/* If another path has also reached this state, choose the one
537 		   with the smallest cost or best tags if costs are equal. */
538 		if (reach_next[dest_id].pos == pos
539 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
540 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
541 			    && (!match_tags
542 				|| !tre_tag_order(num_tags,
543 						  tnfa->tag_directions,
544 						  tmp_tags,
545 						  reach_next[dest_id].tags)))))
546 		  {
547 		    DPRINT(("lose, cost0 %d, have %d\n",
548 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
549 		    continue;
550 		  }
551 		DPRINT(("win\n"));
552 
553 		/* Set state, position, tags, parameters, depth, and costs. */
554 		reach_next[dest_id].state = trans->state;
555 		reach_next[dest_id].pos = pos;
556 		for (i = 0; i < num_tags; i++)
557 		  reach_next[dest_id].tags[i] = tmp_tags[i];
558 
559 		reach_next[dest_id].params = reach_p->params;
560 		if (trans->params)
561 		  tre_set_params(&reach_next[dest_id], trans->params,
562 				 default_params);
563 
564 		reach_next[dest_id].depth = reach_p->depth;
565 		memcpy(&reach_next[dest_id].costs,
566 		       reach_p->costs,
567 		       sizeof(reach_p->costs[0][0])
568 		       * TRE_M_LAST * (depth + 1));
569 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
570 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
571 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
572 		if (depth > 0)
573 		  {
574 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
575 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
576 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
577 		  }
578 
579 		if (trans->state == tnfa->final
580 		    && (match_eo < 0
581 			|| match_costs[TRE_M_COST] > cost0
582 			|| (match_costs[TRE_M_COST] == cost0
583 			    && (num_tags > 0
584 				&& tmp_tags[0] <= match_tags[0]))))
585 		  {
586 		    DPRINT(("	 setting new match at %d, cost %d\n",
587 			    pos, cost0));
588 		    match_eo = pos;
589 		    memcpy(match_costs, reach_next[dest_id].costs[0],
590 			   sizeof(match_costs[0]) * TRE_M_LAST);
591 		    for (i = 0; i < num_tags; i++)
592 		      match_tags[i] = tmp_tags[i];
593 		  }
594 
595 		/* Add to the end of the deque. */
596 		*deque_end = &reach_next[dest_id];
597 		deque_end++;
598 		if (deque_end >= (ringbuffer + 512))
599 		  deque_end = ringbuffer;
600 		assert(deque_end != deque_start);
601 	      }
602 	    deque_start++;
603 	    if (deque_start >= (ringbuffer + 512))
604 	      deque_start = ringbuffer;
605 	  }
606 
607       }
608 
609 #ifdef TRE_DEBUG
610       tre_print_reach(tnfa, reach_next, pos, num_tags);
611 #endif /* TRE_DEBUG */
612 
613       /* Check for end of string. */
614       if (len < 0)
615 	{
616 	  if (type == STR_USER)
617 	    {
618 	      if (str_user_end)
619 		break;
620 	    }
621 	  else if (next_c == L'\0')
622 	    break;
623 	}
624       else
625 	{
626 	  if (pos >= len)
627 	    break;
628 	}
629 
630       prev_pos = pos;
631       GET_NEXT_WCHAR();
632 
633       /* Swap `reach' and `reach_next'. */
634       {
635 	tre_tnfa_approx_reach_t *tmp;
636 	tmp = reach;
637 	reach = reach_next;
638 	reach_next = tmp;
639       }
640 
641       /* Handle exact matches and substitutions. */
642       for (id = 0; id < tnfa->num_states; id++)
643 	{
644 	  tre_tnfa_transition_t *trans;
645 
646 	  if (reach[id].pos < prev_pos)
647 	    continue;  /* Not reached. */
648 	  for (trans = reach[id].state; trans->state; trans++)
649 	    {
650 	      int dest_id;
651 	      int depth;
652 	      int cost, cost0, err;
653 
654 	      if (trans->assertions
655 		  && (CHECK_ASSERTIONS(trans->assertions)
656 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
657 		{
658 		  DPRINT(("  exact,  from %d: assert failed\n", id));
659 		  continue;
660 		}
661 
662 	      depth = reach[id].depth;
663 	      dest_id = trans->state_id;
664 
665 	      cost = reach[id].costs[depth][TRE_M_COST];
666 	      cost0 = reach[id].costs[0][TRE_M_COST];
667 	      err = 0;
668 
669 	      if (trans->code_min > (tre_cint_t)prev_c
670 		  || trans->code_max < (tre_cint_t)prev_c)
671 		{
672 		  /* Handle substitutes.  The required character was not in
673 		     the string, so match it in place of whatever was supposed
674 		     to be there and increase costs accordingly. */
675 		  err = 1;
676 
677 		  /* Compute and check cost at current depth. */
678 		  cost = reach[id].costs[depth][TRE_M_COST];
679 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
680 		    cost += reach[id].params.cost_subst;
681 		  if (cost > reach[id].params.max_cost)
682 		    continue; /* Cost too large. */
683 
684 		  /* Check number of substitutes at current depth. */
685 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
686 		      > reach[id].params.max_subst)
687 		    continue; /* Too many substitutes. */
688 
689 		  /* Check total number of errors at current depth. */
690 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
691 		      > reach[id].params.max_err)
692 		    continue; /* Too many errors. */
693 
694 		  /* Compute overall cost. */
695 		  cost0 = cost;
696 		  if (depth > 0)
697 		    {
698 		      cost0 = reach[id].costs[0][TRE_M_COST];
699 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
700 			cost0 += reach[id].params.cost_subst;
701 		      else
702 			cost0 += default_params.cost_subst;
703 		    }
704 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
705 			  id, dest_id, cost0));
706 		}
707 	      else
708 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
709 			id, dest_id, cost0));
710 
711 	      /* Compute tag values after this transition. */
712 	      for (i = 0; i < num_tags; i++)
713 		tmp_tags[i] = reach[id].tags[i];
714 	      if (trans->tags)
715 		for (i = 0; trans->tags[i] >= 0; i++)
716 		  if ((size_t)trans->tags[i] < num_tags)
717 		    tmp_tags[trans->tags[i]] = pos;
718 
719 	      /* If another path has also reached this state, choose the
720 		 one with the smallest cost or best tags if costs are equal. */
721 	      if (reach_next[dest_id].pos == pos
722 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
723 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
724 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
725 					    tmp_tags,
726 					    reach_next[dest_id].tags))))
727 		{
728 		  DPRINT(("lose\n"));
729 		  continue;
730 		}
731 	      DPRINT(("win %d %d\n",
732 		      reach_next[dest_id].pos,
733 		      reach_next[dest_id].costs[0][TRE_M_COST]));
734 
735 	      /* Set state, position, tags, and depth. */
736 	      reach_next[dest_id].state = trans->state;
737 	      reach_next[dest_id].pos = pos;
738 	      for (i = 0; i < num_tags; i++)
739 		reach_next[dest_id].tags[i] = tmp_tags[i];
740 	      reach_next[dest_id].depth = reach[id].depth;
741 
742 	      /* Set parameters. */
743 	      reach_next[dest_id].params = reach[id].params;
744 	      if (trans->params)
745 		tre_set_params(&reach_next[dest_id], trans->params,
746 			       default_params);
747 
748 	      /* Set the costs after this transition. */
749 		memcpy(&reach_next[dest_id].costs,
750 		       reach[id].costs,
751 		       sizeof(reach[id].costs[0][0])
752 		       * TRE_M_LAST * (depth + 1));
753 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
754 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
755 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
756 	      if (depth > 0)
757 		{
758 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
759 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
760 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
761 		}
762 
763 	      if (trans->state == tnfa->final
764 		  && (match_eo < 0
765 		      || cost0 < match_costs[TRE_M_COST]
766 		      || (cost0 == match_costs[TRE_M_COST]
767 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
768 		{
769 		  DPRINT(("    setting new match at %d, cost %d\n",
770 			  pos, cost0));
771 		  match_eo = pos;
772 		  for (i = 0; i < TRE_M_LAST; i++)
773 		    match_costs[i] = reach_next[dest_id].costs[0][i];
774 		  for (i = 0; i < num_tags; i++)
775 		    match_tags[i] = tmp_tags[i];
776 		}
777 	    }
778 	}
779     }
780 
781   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
782 	  match_costs[TRE_M_COST]));
783 
784 #ifndef TRE_USE_ALLOCA
785   if (buf)
786     xfree(buf);
787 #endif /* !TRE_USE_ALLOCA */
788 
789   match->cost = match_costs[TRE_M_COST];
790   match->num_ins = match_costs[TRE_M_NUM_INS];
791   match->num_del = match_costs[TRE_M_NUM_DEL];
792   match->num_subst = match_costs[TRE_M_NUM_SUBST];
793   *match_end_ofs = match_eo;
794 
795   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
796 }
797