xref: /netbsd-src/external/bsd/tre/dist/lib/tre-match-approx.c (revision 7cb13d6054cb3cdc330e3f0f9b7035db7b54013b)
1 /*
2   tre-match-approx.c - TRE approximate regex matching engine
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif /* HAVE_CONFIG_H */
11 
12 #include "tre-alloca.h"
13 
14 #define __USE_STRING_INLINES
15 #undef __NO_INLINE__
16 
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #ifdef HAVE_WCHAR_H
22 #include <wchar.h>
23 #endif /* HAVE_WCHAR_H */
24 #ifdef HAVE_WCTYPE_H
25 #include <wctype.h>
26 #endif /* HAVE_WCTYPE_H */
27 #ifndef TRE_WCHAR
28 #include <ctype.h>
29 #endif /* !TRE_WCHAR */
30 #ifdef HAVE_MALLOC_H
31 #include <malloc.h>
32 #endif /* HAVE_MALLOC_H */
33 #include <stdint.h>
34 
35 #include "tre-internal.h"
36 #include "tre-match-utils.h"
37 #include "tre.h"
38 #include "xmalloc.h"
39 
40 #define TRE_M_COST	0
41 #define TRE_M_NUM_INS	1
42 #define TRE_M_NUM_DEL	2
43 #define TRE_M_NUM_SUBST 3
44 #define TRE_M_NUM_ERR	4
45 #define TRE_M_LAST	5
46 
47 #define TRE_M_MAX_DEPTH 3
48 
49 typedef struct {
50   /* State in the TNFA transition table. */
51   tre_tnfa_transition_t *state;
52   /* Position in input string. */
53   int pos;
54   /* Tag values. */
55   int *tags;
56   /* Matching parameters. */
57   regaparams_t params;
58   /* Nesting depth of parameters.  This is used as an index in
59      the `costs' array. */
60   int depth;
61   /* Costs and counter values for different parameter nesting depths. */
62   int costs[TRE_M_MAX_DEPTH + 1][TRE_M_LAST];
63 } tre_tnfa_approx_reach_t;
64 
65 
66 #ifdef TRE_DEBUG
67 /* Prints the `reach' array in a readable fashion with DPRINT. */
68 static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_approx_reach_t * reach,int pos,size_t num_tags)69 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_approx_reach_t *reach,
70 		int pos, size_t num_tags)
71 {
72   int id;
73 
74   /* Print each state on one line. */
75   DPRINT(("  reach:\n"));
76   for (id = 0; id < tnfa->num_states; id++)
77     {
78       int i, j;
79       if (reach[id].pos < pos)
80 	continue;  /* Not reached. */
81       DPRINT(("	 %03d, costs ", id));
82       for (i = 0; i <= reach[id].depth; i++)
83 	{
84 	  DPRINT(("["));
85 	  for (j = 0; j < TRE_M_LAST; j++)
86 	    {
87 	      DPRINT(("%2d", reach[id].costs[i][j]));
88 	      if (j + 1 < TRE_M_LAST)
89 		DPRINT((","));
90 	    }
91 	  DPRINT(("]"));
92 	  if (i + 1 <= reach[id].depth)
93 	    DPRINT((", "));
94 	}
95       DPRINT(("\n	tags "));
96       for (i = 0; i < num_tags; i++)
97 	{
98 	  DPRINT(("%02d", reach[id].tags[i]));
99 	  if (i + 1 < num_tags)
100 	    DPRINT((","));
101 	}
102       DPRINT(("\n"));
103     }
104   DPRINT(("\n"));
105 }
106 #endif /* TRE_DEBUG */
107 
108 
109 /* Sets the matching parameters in `reach' to the ones defined in the `pa'
110    array.  If `pa' specifies default values, they are taken from
111    `default_params'. */
112 inline static void
tre_set_params(tre_tnfa_approx_reach_t * reach,int * pa,regaparams_t default_params)113 tre_set_params(tre_tnfa_approx_reach_t *reach,
114 	       int *pa, regaparams_t default_params)
115 {
116   int value;
117 
118   /* If depth is increased reset costs and counters to zero for the
119      new levels. */
120   value = pa[TRE_PARAM_DEPTH];
121   assert(value <= TRE_M_MAX_DEPTH);
122   if (value > reach->depth)
123     {
124       int i, j;
125       for (i = reach->depth + 1; i <= value; i++)
126 	for (j = 0; j < TRE_M_LAST; j++)
127 	  reach->costs[i][j] = 0;
128     }
129   reach->depth = value;
130 
131   /* Set insert cost. */
132   value = pa[TRE_PARAM_COST_INS];
133   if (value == TRE_PARAM_DEFAULT)
134     reach->params.cost_ins = default_params.cost_ins;
135   else if (value != TRE_PARAM_UNSET)
136     reach->params.cost_ins = value;
137 
138   /* Set delete cost. */
139   value = pa[TRE_PARAM_COST_DEL];
140   if (value == TRE_PARAM_DEFAULT)
141     reach->params.cost_del = default_params.cost_del;
142   else if (value != TRE_PARAM_UNSET)
143     reach->params.cost_del = value;
144 
145   /* Set substitute cost. */
146   value = pa[TRE_PARAM_COST_SUBST];
147   if (value == TRE_PARAM_DEFAULT)
148     reach->params.cost_subst = default_params.cost_subst;
149   else
150     reach->params.cost_subst = value;
151 
152   /* Set maximum cost. */
153   value = pa[TRE_PARAM_COST_MAX];
154   if (value == TRE_PARAM_DEFAULT)
155     reach->params.max_cost = default_params.max_cost;
156   else if (value != TRE_PARAM_UNSET)
157     reach->params.max_cost = value;
158 
159   /* Set maximum inserts. */
160   value = pa[TRE_PARAM_MAX_INS];
161   if (value == TRE_PARAM_DEFAULT)
162     reach->params.max_ins = default_params.max_ins;
163   else if (value != TRE_PARAM_UNSET)
164     reach->params.max_ins = value;
165 
166   /* Set maximum deletes. */
167   value = pa[TRE_PARAM_MAX_DEL];
168   if (value == TRE_PARAM_DEFAULT)
169     reach->params.max_del = default_params.max_del;
170   else if (value != TRE_PARAM_UNSET)
171     reach->params.max_del = value;
172 
173   /* Set maximum substitutes. */
174   value = pa[TRE_PARAM_MAX_SUBST];
175   if (value == TRE_PARAM_DEFAULT)
176     reach->params.max_subst = default_params.max_subst;
177   else if (value != TRE_PARAM_UNSET)
178     reach->params.max_subst = value;
179 
180   /* Set maximum number of errors. */
181   value = pa[TRE_PARAM_MAX_ERR];
182   if (value == TRE_PARAM_DEFAULT)
183     reach->params.max_err = default_params.max_err;
184   else if (value != TRE_PARAM_UNSET)
185     reach->params.max_err = value;
186 }
187 
188 reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,regamatch_t * match,regaparams_t default_params,int eflags,int * match_end_ofs)189 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
190 		    tre_str_type_t type, int *match_tags,
191 		    regamatch_t *match, regaparams_t default_params,
192 		    int eflags, int *match_end_ofs)
193 {
194   /* State variables required by GET_NEXT_WCHAR. */
195   tre_char_t prev_c = 0, next_c = 0;
196   const char *str_byte = string;
197   int pos = -1;
198   unsigned int pos_add_next = 1;
199 #ifdef TRE_WCHAR
200   const wchar_t *str_wide = string;
201 #ifdef TRE_MBSTATE
202   mbstate_t mbstate;
203 #endif /* !TRE_WCHAR */
204 #endif /* TRE_WCHAR */
205   int reg_notbol = eflags & REG_NOTBOL;
206   int reg_noteol = eflags & REG_NOTEOL;
207   int reg_newline = tnfa->cflags & REG_NEWLINE;
208   size_t str_user_end = 0;
209 
210   int prev_pos;
211 
212   /* Number of tags. */
213   size_t num_tags;
214   /* The reach tables. */
215   tre_tnfa_approx_reach_t *reach, *reach_next;
216   /* Tag array for temporary use. */
217   int *tmp_tags;
218 
219   /* End offset of best match so far, or -1 if no match found yet. */
220   int match_eo = -1;
221   /* Costs of the match. */
222   int match_costs[TRE_M_LAST];
223 
224   /* Space for temporary data required for matching. */
225   unsigned char *buf;
226 
227   size_t i, id;
228 
229   reg_errcode_t ret;
230 
231   if (!match_tags)
232     num_tags = 0;
233   else
234     num_tags = tnfa->num_tags;
235 
236 #ifdef TRE_MBSTATE
237   memset(&mbstate, '\0', sizeof(mbstate));
238 #endif /* TRE_MBSTATE */
239 
240   DPRINT(("tre_tnfa_run_approx, input type %d, len %d, eflags %d, "
241 	  "match_tags %p\n",
242 	  type, len, eflags,
243 	  match_tags));
244   DPRINT(("max cost %d, ins %d, del %d, subst %d\n",
245 	  default_params.max_cost,
246 	  default_params.cost_ins,
247 	  default_params.cost_del,
248 	  default_params.cost_subst));
249 
250   /* Allocate memory for temporary data required for matching.	This needs to
251      be done for every matching operation to be thread safe.  This allocates
252      everything in a single large block from the stack frame using alloca()
253      or with malloc() if alloca is unavailable. */
254   {
255     unsigned char *buf_cursor;
256 
257     /* Ensure that tag_bytes*num_states cannot overflow, and that it don't
258      * contribute more than 1/8 of SIZE_MAX to total_bytes. */
259     if (num_tags > SIZE_MAX/(8 * sizeof(*tmp_tags) * tnfa->num_states))
260       return REG_ESPACE;
261 
262     /* Likewise check reach_bytes. */
263     if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_next)))
264       return REG_ESPACE;
265 
266     /* Space needed for one array of tags. */
267     size_t tag_bytes = sizeof(*tmp_tags) * num_tags;
268     /* Space needed for one reach table. */
269     size_t reach_bytes = sizeof(*reach_next) * tnfa->num_states;
270     /* Total space needed. */
271     size_t total_bytes = reach_bytes * 2 + (tnfa->num_states * 2 + 1 ) * tag_bytes;
272     /* Add some extra to make sure we can align the pointers.  The multiplier
273        used here must be equal to the number of ALIGN calls below. */
274     total_bytes += (sizeof(long) - 1) * 3;
275 
276     /* Allocate the memory. */
277 #ifdef TRE_USE_ALLOCA
278     buf = alloca(total_bytes);
279 #else /* !TRE_USE_ALLOCA */
280     buf = xmalloc((unsigned)total_bytes);
281 #endif /* !TRE_USE_ALLOCA */
282     if (!buf)
283       return REG_ESPACE;
284     memset(buf, 0, (size_t)total_bytes);
285 
286     /* Allocate `tmp_tags' from `buf'. */
287     tmp_tags = (void *)buf;
288     buf_cursor = buf + tag_bytes;
289     buf_cursor += ALIGN(buf_cursor, long);
290 
291     /* Allocate `reach' from `buf'. */
292     reach = (void *)buf_cursor;
293     buf_cursor += reach_bytes;
294     buf_cursor += ALIGN(buf_cursor, long);
295 
296     /* Allocate `reach_next' from `buf'. */
297     reach_next = (void *)buf_cursor;
298     buf_cursor += reach_bytes;
299     buf_cursor += ALIGN(buf_cursor, long);
300 
301     /* Allocate tag arrays for `reach' and `reach_next' from `buf'. */
302     for (i = 0; i < tnfa->num_states; i++)
303       {
304 	reach[i].tags = (void *)buf_cursor;
305 	buf_cursor += tag_bytes;
306 	reach_next[i].tags = (void *)buf_cursor;
307 	buf_cursor += tag_bytes;
308       }
309     assert(buf_cursor <= buf + total_bytes);
310   }
311 
312   for (i = 0; i < TRE_M_LAST; i++)
313     match_costs[i] = INT_MAX;
314 
315   /* Mark the reach arrays empty. */
316   for (i = 0; i < tnfa->num_states; i++)
317     reach[i].pos = reach_next[i].pos = -2;
318 
319   prev_pos = pos;
320   GET_NEXT_WCHAR();
321   pos = 0;
322 
323   while (/*CONSTCOND*/(void)1,1)
324     {
325       DPRINT(("%03d:%2lc/%05d\n", pos, (tre_cint_t)next_c, (int)next_c));
326 
327       /* Add initial states to `reach_next' if an exact match has not yet
328 	 been found. */
329       if (match_costs[TRE_M_COST] > 0)
330 	{
331 	  tre_tnfa_transition_t *trans;
332 	  DPRINT(("  init"));
333 	  for (trans = tnfa->initial; trans->state; trans++)
334 	    {
335 	      int stateid = trans->state_id;
336 
337 	      /* If this state is not currently in `reach_next', add it
338 		 there. */
339 	      if (reach_next[stateid].pos < pos)
340 		{
341 		  if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
342 		    {
343 		      /* Assertions failed, don't add this state. */
344 		      DPRINT((" !%d (assert)", stateid));
345 		      continue;
346 		    }
347 		  DPRINT((" %d", stateid));
348 		  reach_next[stateid].state = trans->state;
349 		  reach_next[stateid].pos = pos;
350 
351 		  /* Compute tag values after this transition. */
352 		  for (i = 0; i < num_tags; i++)
353 		    reach_next[stateid].tags[i] = -1;
354 
355 		  if (trans->tags)
356 		    for (i = 0; trans->tags[i] >= 0; i++)
357 		      if ((size_t)trans->tags[i] < num_tags)
358 			reach_next[stateid].tags[trans->tags[i]] = pos;
359 
360 		  /* Set the parameters, depth, and costs. */
361 		  reach_next[stateid].params = default_params;
362 		  reach_next[stateid].depth = 0;
363 		  for (i = 0; i < TRE_M_LAST; i++)
364 		    reach_next[stateid].costs[0][i] = 0;
365 		  if (trans->params)
366 		    tre_set_params(&reach_next[stateid], trans->params,
367 				   default_params);
368 
369 		  /* If this is the final state, mark the exact match. */
370 		  if (trans->state == tnfa->final)
371 		    {
372 		      match_eo = pos;
373 		      for (i = 0; i < num_tags; i++)
374 			match_tags[i] = reach_next[stateid].tags[i];
375 		      for (i = 0; i < TRE_M_LAST; i++)
376 			match_costs[i] = 0;
377 		    }
378 		}
379 	    }
380 	    DPRINT(("\n"));
381 	}
382 
383 
384       /* Handle inserts.  This is done by pretending there's an epsilon
385 	 transition from each state in `reach' back to the same state.
386 	 We don't need to worry about the final state here; this will never
387 	 give a better match than what we already have. */
388       for (id = 0; id < tnfa->num_states; id++)
389 	{
390 	  int depth;
391 	  int cost, cost0;
392 
393 	  if (reach[id].pos != prev_pos)
394 	    {
395 	      DPRINT(("	 insert: %d not reached\n", id));
396 	      continue;	 /* Not reached. */
397 	    }
398 
399 	  depth = reach[id].depth;
400 
401 	  /* Compute and check cost at current depth. */
402 	  cost = reach[id].costs[depth][TRE_M_COST];
403 	  if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
404 	    cost += reach[id].params.cost_ins;
405 	  if (cost > reach[id].params.max_cost)
406 	    continue;  /* Cost too large. */
407 
408 	  /* Check number of inserts at current depth. */
409 	  if (reach[id].costs[depth][TRE_M_NUM_INS] + 1
410 	      > reach[id].params.max_ins)
411 	    continue;  /* Too many inserts. */
412 
413 	  /* Check total number of errors at current depth. */
414 	  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
415 	      > reach[id].params.max_err)
416 	    continue;  /* Too many errors. */
417 
418 	  /* Compute overall cost. */
419 	  cost0 = cost;
420 	  if (depth > 0)
421 	    {
422 	      cost0 = reach[id].costs[0][TRE_M_COST];
423 	      if (reach[id].params.cost_ins != TRE_PARAM_UNSET)
424 		cost0 += reach[id].params.cost_ins;
425 	      else
426 		cost0 += default_params.cost_ins;
427 	    }
428 
429 	  DPRINT(("  insert: from %d to %d, cost %d: ", id, id,
430 		  reach[id].costs[depth][TRE_M_COST]));
431 	  if (reach_next[id].pos == pos
432 	      && (cost0 >= reach_next[id].costs[0][TRE_M_COST]))
433 	    {
434 	      DPRINT(("lose\n"));
435 	      continue;
436 	    }
437 	  DPRINT(("win\n"));
438 
439 	  /* Copy state, position, tags, parameters, and depth. */
440 	  reach_next[id].state = reach[id].state;
441 	  reach_next[id].pos = pos;
442 	  for (i = 0; i < num_tags; i++)
443 	    reach_next[id].tags[i] = reach[id].tags[i];
444 	  reach_next[id].params = reach[id].params;
445 	  reach_next[id].depth = reach[id].depth;
446 
447 	  /* Set the costs after this transition. */
448 	  memcpy(reach_next[id].costs, reach[id].costs,
449 		 sizeof(reach_next[id].costs[0][0])
450 		 * TRE_M_LAST * (depth + 1));
451 	  reach_next[id].costs[depth][TRE_M_COST] = cost;
452 	  reach_next[id].costs[depth][TRE_M_NUM_INS]++;
453 	  reach_next[id].costs[depth][TRE_M_NUM_ERR]++;
454 	  if (depth > 0)
455 	    {
456 	      reach_next[id].costs[0][TRE_M_COST] = cost0;
457 	      reach_next[id].costs[0][TRE_M_NUM_INS]++;
458 	      reach_next[id].costs[0][TRE_M_NUM_ERR]++;
459 	    }
460 
461 	}
462 
463 
464       /* Handle deletes.  This is done by traversing through the whole TNFA
465 	 pretending that all transitions are epsilon transitions, until
466 	 no more states can be reached with better costs. */
467       {
468 	/* XXX - dynamic ringbuffer size */
469 	tre_tnfa_approx_reach_t *ringbuffer[512];
470 	tre_tnfa_approx_reach_t **deque_start, **deque_end;
471 
472 	deque_start = deque_end = ringbuffer;
473 
474 	/* Add all states in `reach_next' to the deque. */
475 	for (id = 0; id < tnfa->num_states; id++)
476 	  {
477 	    if (reach_next[id].pos != pos)
478 	      continue;
479 	    *deque_end = &reach_next[id];
480 	    deque_end++;
481 	    assert(deque_end != deque_start);
482 	  }
483 
484 	/* Repeat until the deque is empty. */
485 	while (deque_end != deque_start)
486 	  {
487 	    tre_tnfa_approx_reach_t *reach_p;
488 	    int depth;
489 	    int cost, cost0;
490 	    tre_tnfa_transition_t *trans;
491 
492 	    /* Pop the first item off the deque. */
493 	    reach_p = *deque_start;
494 	    id = reach_p - reach_next;
495 	    depth = reach_p->depth;
496 
497 	    /* Compute cost at current depth. */
498 	    cost = reach_p->costs[depth][TRE_M_COST];
499 	    if (reach_p->params.cost_del != TRE_PARAM_UNSET)
500 	      cost += reach_p->params.cost_del;
501 
502 	    /* Check cost, number of deletes, and total number of errors
503 	       at current depth. */
504 	    if (cost > reach_p->params.max_cost
505 		|| (reach_p->costs[depth][TRE_M_NUM_DEL] + 1
506 		    > reach_p->params.max_del)
507 		|| (reach_p->costs[depth][TRE_M_NUM_ERR] + 1
508 		    > reach_p->params.max_err))
509 	      {
510 		/* Too many errors or cost too large. */
511 		DPRINT(("  delete: from %03d: cost too large\n", id));
512 		deque_start++;
513 		if (deque_start >= (ringbuffer + 512))
514 		  deque_start = ringbuffer;
515 		continue;
516 	      }
517 
518 	    /* Compute overall cost. */
519 	    cost0 = cost;
520 	    if (depth > 0)
521 	      {
522 		cost0 = reach_p->costs[0][TRE_M_COST];
523 		if (reach_p->params.cost_del != TRE_PARAM_UNSET)
524 		  cost0 += reach_p->params.cost_del;
525 		else
526 		  cost0 += default_params.cost_del;
527 	      }
528 
529 	    for (trans = reach_p->state; trans->state; trans++)
530 	      {
531 		int dest_id = trans->state_id;
532 		DPRINT(("  delete: from %03d to %03d, cost %d (%d): ",
533 			id, dest_id, cost0, reach_p->params.max_cost));
534 
535 		if (trans->assertions && CHECK_ASSERTIONS(trans->assertions))
536 		  {
537 		    DPRINT(("assertion failed\n"));
538 		    continue;
539 		  }
540 
541 		/* Compute tag values after this transition. */
542 		for (i = 0; i < num_tags; i++)
543 		  tmp_tags[i] = reach_p->tags[i];
544 		if (trans->tags)
545 		  for (i = 0; trans->tags[i] >= 0; i++)
546 		    if ((size_t)trans->tags[i] < num_tags)
547 		      tmp_tags[trans->tags[i]] = pos;
548 
549 		/* If another path has also reached this state, choose the one
550 		   with the smallest cost or best tags if costs are equal. */
551 		if (reach_next[dest_id].pos == pos
552 		    && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
553 			|| (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
554 			    && (!match_tags
555 				|| !tre_tag_order(num_tags,
556 						  tnfa->tag_directions,
557 						  tmp_tags,
558 						  reach_next[dest_id].tags)))))
559 		  {
560 		    DPRINT(("lose, cost0 %d, have %d\n",
561 			    cost0, reach_next[dest_id].costs[0][TRE_M_COST]));
562 		    continue;
563 		  }
564 		DPRINT(("win\n"));
565 
566 		/* Set state, position, tags, parameters, depth, and costs. */
567 		reach_next[dest_id].state = trans->state;
568 		reach_next[dest_id].pos = pos;
569 		for (i = 0; i < num_tags; i++)
570 		  reach_next[dest_id].tags[i] = tmp_tags[i];
571 
572 		reach_next[dest_id].params = reach_p->params;
573 		if (trans->params)
574 		  tre_set_params(&reach_next[dest_id], trans->params,
575 				 default_params);
576 
577 		reach_next[dest_id].depth = reach_p->depth;
578 		memcpy(&reach_next[dest_id].costs,
579 		       reach_p->costs,
580 		       sizeof(reach_p->costs[0][0])
581 		       * TRE_M_LAST * (depth + 1));
582 		reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
583 		reach_next[dest_id].costs[depth][TRE_M_NUM_DEL]++;
584 		reach_next[dest_id].costs[depth][TRE_M_NUM_ERR]++;
585 		if (depth > 0)
586 		  {
587 		    reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
588 		    reach_next[dest_id].costs[0][TRE_M_NUM_DEL]++;
589 		    reach_next[dest_id].costs[0][TRE_M_NUM_ERR]++;
590 		  }
591 
592 		if (trans->state == tnfa->final
593 		    && (match_eo < 0
594 			|| match_costs[TRE_M_COST] > cost0
595 			|| (match_costs[TRE_M_COST] == cost0
596 			    && (num_tags > 0
597 				&& tmp_tags[0] <= match_tags[0]))))
598 		  {
599 		    DPRINT(("	 setting new match at %d, cost %d\n",
600 			    pos, cost0));
601 		    match_eo = pos;
602 		    memcpy(match_costs, reach_next[dest_id].costs[0],
603 			   sizeof(match_costs[0]) * TRE_M_LAST);
604 		    for (i = 0; i < num_tags; i++)
605 		      match_tags[i] = tmp_tags[i];
606 		  }
607 
608 		/* Add to the end of the deque. */
609 		*deque_end = &reach_next[dest_id];
610 		deque_end++;
611 		if (deque_end >= (ringbuffer + 512))
612 		  deque_end = ringbuffer;
613 		assert(deque_end != deque_start);
614 	      }
615 	    deque_start++;
616 	    if (deque_start >= (ringbuffer + 512))
617 	      deque_start = ringbuffer;
618 	  }
619 
620       }
621 
622 #ifdef TRE_DEBUG
623       tre_print_reach(tnfa, reach_next, pos, num_tags);
624 #endif /* TRE_DEBUG */
625 
626       /* Check for end of string. */
627       if (len < 0)
628 	{
629 	  if (type == STR_USER)
630 	    {
631 	      if (str_user_end)
632 		break;
633 	    }
634 	  else if (next_c == L'\0')
635 	    break;
636 	}
637       else
638 	{
639 	  if (pos >= len)
640 	    break;
641 	}
642 
643       prev_pos = pos;
644       GET_NEXT_WCHAR();
645 
646       /* Swap `reach' and `reach_next'. */
647       {
648 	tre_tnfa_approx_reach_t *tmp;
649 	tmp = reach;
650 	reach = reach_next;
651 	reach_next = tmp;
652       }
653 
654       /* Handle exact matches and substitutions. */
655       for (id = 0; id < tnfa->num_states; id++)
656 	{
657 	  tre_tnfa_transition_t *trans;
658 
659 	  if (reach[id].pos < prev_pos)
660 	    continue;  /* Not reached. */
661 	  for (trans = reach[id].state; trans->state; trans++)
662 	    {
663 	      int dest_id;
664 	      int depth;
665 	      int cost, cost0, err;
666 
667 	      if (trans->assertions
668 		  && (CHECK_ASSERTIONS(trans->assertions)
669 		      || CHECK_CHAR_CLASSES(trans, tnfa, eflags)))
670 		{
671 		  DPRINT(("  exact,  from %d: assert failed\n", id));
672 		  continue;
673 		}
674 
675 	      depth = reach[id].depth;
676 	      dest_id = trans->state_id;
677 
678 	      cost = reach[id].costs[depth][TRE_M_COST];
679 	      cost0 = reach[id].costs[0][TRE_M_COST];
680 	      err = 0;
681 
682 	      if (trans->code_min > (tre_cint_t)prev_c
683 		  || trans->code_max < (tre_cint_t)prev_c)
684 		{
685 		  /* Handle substitutes.  The required character was not in
686 		     the string, so match it in place of whatever was supposed
687 		     to be there and increase costs accordingly. */
688 		  err = 1;
689 
690 		  /* Compute and check cost at current depth. */
691 		  cost = reach[id].costs[depth][TRE_M_COST];
692 		  if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
693 		    cost += reach[id].params.cost_subst;
694 		  if (cost > reach[id].params.max_cost)
695 		    continue; /* Cost too large. */
696 
697 		  /* Check number of substitutes at current depth. */
698 		  if (reach[id].costs[depth][TRE_M_NUM_SUBST] + 1
699 		      > reach[id].params.max_subst)
700 		    continue; /* Too many substitutes. */
701 
702 		  /* Check total number of errors at current depth. */
703 		  if (reach[id].costs[depth][TRE_M_NUM_ERR] + 1
704 		      > reach[id].params.max_err)
705 		    continue; /* Too many errors. */
706 
707 		  /* Compute overall cost. */
708 		  cost0 = cost;
709 		  if (depth > 0)
710 		    {
711 		      cost0 = reach[id].costs[0][TRE_M_COST];
712 		      if (reach[id].params.cost_subst != TRE_PARAM_UNSET)
713 			cost0 += reach[id].params.cost_subst;
714 		      else
715 			cost0 += default_params.cost_subst;
716 		    }
717 		  DPRINT(("  subst,  from %03d to %03d, cost %d: ",
718 			  id, dest_id, cost0));
719 		}
720 	      else
721 		DPRINT(("  exact,  from %03d to %03d, cost %d: ",
722 			id, dest_id, cost0));
723 
724 	      /* Compute tag values after this transition. */
725 	      for (i = 0; i < num_tags; i++)
726 		tmp_tags[i] = reach[id].tags[i];
727 	      if (trans->tags)
728 		for (i = 0; trans->tags[i] >= 0; i++)
729 		  if ((size_t)trans->tags[i] < num_tags)
730 		    tmp_tags[trans->tags[i]] = pos;
731 
732 	      /* If another path has also reached this state, choose the
733 		 one with the smallest cost or best tags if costs are equal. */
734 	      if (reach_next[dest_id].pos == pos
735 		  && (cost0 > reach_next[dest_id].costs[0][TRE_M_COST]
736 		      || (cost0 == reach_next[dest_id].costs[0][TRE_M_COST]
737 			  && !tre_tag_order(num_tags, tnfa->tag_directions,
738 					    tmp_tags,
739 					    reach_next[dest_id].tags))))
740 		{
741 		  DPRINT(("lose\n"));
742 		  continue;
743 		}
744 	      DPRINT(("win %d %d\n",
745 		      reach_next[dest_id].pos,
746 		      reach_next[dest_id].costs[0][TRE_M_COST]));
747 
748 	      /* Set state, position, tags, and depth. */
749 	      reach_next[dest_id].state = trans->state;
750 	      reach_next[dest_id].pos = pos;
751 	      for (i = 0; i < num_tags; i++)
752 		reach_next[dest_id].tags[i] = tmp_tags[i];
753 	      reach_next[dest_id].depth = reach[id].depth;
754 
755 	      /* Set parameters. */
756 	      reach_next[dest_id].params = reach[id].params;
757 	      if (trans->params)
758 		tre_set_params(&reach_next[dest_id], trans->params,
759 			       default_params);
760 
761 	      /* Set the costs after this transition. */
762 	      memcpy(&reach_next[dest_id].costs,
763 		     reach[id].costs,
764 		     sizeof(reach[id].costs[0][0])
765 		     * TRE_M_LAST * (depth + 1));
766 	      reach_next[dest_id].costs[depth][TRE_M_COST] = cost;
767 	      reach_next[dest_id].costs[depth][TRE_M_NUM_SUBST] += err;
768 	      reach_next[dest_id].costs[depth][TRE_M_NUM_ERR] += err;
769 	      if (depth > 0)
770 		{
771 		  reach_next[dest_id].costs[0][TRE_M_COST] = cost0;
772 		  reach_next[dest_id].costs[0][TRE_M_NUM_SUBST] += err;
773 		  reach_next[dest_id].costs[0][TRE_M_NUM_ERR] += err;
774 		}
775 
776 	      if (trans->state == tnfa->final
777 		  && (match_eo < 0
778 		      || cost0 < match_costs[TRE_M_COST]
779 		      || (cost0 == match_costs[TRE_M_COST]
780 			  && num_tags > 0 && tmp_tags[0] <= match_tags[0])))
781 		{
782 		  DPRINT(("    setting new match at %d, cost %d\n",
783 			  pos, cost0));
784 		  match_eo = pos;
785 		  for (i = 0; i < TRE_M_LAST; i++)
786 		    match_costs[i] = reach_next[dest_id].costs[0][i];
787 		  for (i = 0; i < num_tags; i++)
788 		    match_tags[i] = tmp_tags[i];
789 		}
790 	    }
791 	}
792     }
793 
794   DPRINT(("match end offset = %d, match cost = %d\n", match_eo,
795 	  match_costs[TRE_M_COST]));
796 
797   match->cost = match_costs[TRE_M_COST];
798   match->num_ins = match_costs[TRE_M_NUM_INS];
799   match->num_del = match_costs[TRE_M_NUM_DEL];
800   match->num_subst = match_costs[TRE_M_NUM_SUBST];
801   *match_end_ofs = match_eo;
802 
803   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
804 error_exit:
805 #ifndef TRE_USE_ALLOCA
806   if (buf)
807     xfree(buf);
808 #endif /* !TRE_USE_ALLOCA */
809   return ret;
810 }
811