xref: /netbsd-src/external/bsd/tre/dist/lib/tre-match-parallel.c (revision b3178392f5caad8a10adb61c96d226e9c21c03ef)
163d4abf0Sagc /*
263d4abf0Sagc   tre-match-parallel.c - TRE parallel regex matching engine
363d4abf0Sagc 
463d4abf0Sagc   This software is released under a BSD-style license.
563d4abf0Sagc   See the file LICENSE for details and copyright.
663d4abf0Sagc 
763d4abf0Sagc */
863d4abf0Sagc 
963d4abf0Sagc /*
1063d4abf0Sagc   This algorithm searches for matches basically by reading characters
1163d4abf0Sagc   in the searched string one by one, starting at the beginning.	 All
1263d4abf0Sagc   matching paths in the TNFA are traversed in parallel.	 When two or
1363d4abf0Sagc   more paths reach the same state, exactly one is chosen according to
1463d4abf0Sagc   tag ordering rules; if returning submatches is not required it does
1563d4abf0Sagc   not matter which path is chosen.
1663d4abf0Sagc 
1763d4abf0Sagc   The worst case time required for finding the leftmost and longest
1863d4abf0Sagc   match, or determining that there is no match, is always linearly
1963d4abf0Sagc   dependent on the length of the text being searched.
2063d4abf0Sagc 
2163d4abf0Sagc   This algorithm cannot handle TNFAs with back referencing nodes.
2263d4abf0Sagc   See `tre-match-backtrack.c'.
2363d4abf0Sagc */
2463d4abf0Sagc 
2563d4abf0Sagc 
2663d4abf0Sagc #ifdef HAVE_CONFIG_H
2763d4abf0Sagc #include <config.h>
2863d4abf0Sagc #endif /* HAVE_CONFIG_H */
2963d4abf0Sagc 
30f2a3d147Schristos #include "tre-alloca.h"
3163d4abf0Sagc 
3263d4abf0Sagc #include <assert.h>
3363d4abf0Sagc #include <stdlib.h>
3463d4abf0Sagc #include <string.h>
3563d4abf0Sagc #ifdef HAVE_WCHAR_H
3663d4abf0Sagc #include <wchar.h>
3763d4abf0Sagc #endif /* HAVE_WCHAR_H */
3863d4abf0Sagc #ifdef HAVE_WCTYPE_H
3963d4abf0Sagc #include <wctype.h>
4063d4abf0Sagc #endif /* HAVE_WCTYPE_H */
4163d4abf0Sagc #ifndef TRE_WCHAR
4263d4abf0Sagc #include <ctype.h>
4363d4abf0Sagc #endif /* !TRE_WCHAR */
4463d4abf0Sagc #ifdef HAVE_MALLOC_H
4563d4abf0Sagc #include <malloc.h>
4663d4abf0Sagc #endif /* HAVE_MALLOC_H */
47*b3178392Srin #include <stdint.h>
4863d4abf0Sagc 
4963d4abf0Sagc #include "tre-internal.h"
5063d4abf0Sagc #include "tre-match-utils.h"
5163d4abf0Sagc #include "tre.h"
5263d4abf0Sagc #include "xmalloc.h"
5363d4abf0Sagc 
5463d4abf0Sagc 
5563d4abf0Sagc 
5663d4abf0Sagc typedef struct {
5763d4abf0Sagc   tre_tnfa_transition_t *state;
5863d4abf0Sagc   int *tags;
5963d4abf0Sagc } tre_tnfa_reach_t;
6063d4abf0Sagc 
6163d4abf0Sagc typedef struct {
6263d4abf0Sagc   int pos;
6363d4abf0Sagc   int **tags;
6463d4abf0Sagc } tre_reach_pos_t;
6563d4abf0Sagc 
6663d4abf0Sagc 
6763d4abf0Sagc #ifdef TRE_DEBUG
6863d4abf0Sagc static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_reach_t * reach,int num_tags)6963d4abf0Sagc tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
7063d4abf0Sagc {
7163d4abf0Sagc   int i;
7263d4abf0Sagc 
7363d4abf0Sagc   while (reach->state != NULL)
7463d4abf0Sagc     {
7563d4abf0Sagc       DPRINT((" %p", (void *)reach->state));
7663d4abf0Sagc       if (num_tags > 0)
7763d4abf0Sagc 	{
7863d4abf0Sagc 	  DPRINT(("/"));
7963d4abf0Sagc 	  for (i = 0; i < num_tags; i++)
8063d4abf0Sagc 	    {
8163d4abf0Sagc 	      DPRINT(("%d:%d", i, reach->tags[i]));
8263d4abf0Sagc 	      if (i < (num_tags-1))
8363d4abf0Sagc 		DPRINT((","));
8463d4abf0Sagc 	    }
8563d4abf0Sagc 	}
8663d4abf0Sagc       reach++;
8763d4abf0Sagc     }
8863d4abf0Sagc   DPRINT(("\n"));
8963d4abf0Sagc 
9063d4abf0Sagc }
9163d4abf0Sagc #endif /* TRE_DEBUG */
9263d4abf0Sagc 
9363d4abf0Sagc reg_errcode_t
tre_tnfa_run_parallel(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,int * match_tags,int eflags,int * match_end_ofs)9463d4abf0Sagc tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
9563d4abf0Sagc 		      tre_str_type_t type, int *match_tags, int eflags,
9663d4abf0Sagc 		      int *match_end_ofs)
9763d4abf0Sagc {
9863d4abf0Sagc   /* State variables required by GET_NEXT_WCHAR. */
9963d4abf0Sagc   tre_char_t prev_c = 0, next_c = 0;
10063d4abf0Sagc   const char *str_byte = string;
10163d4abf0Sagc   int pos = -1;
10263d4abf0Sagc   unsigned int pos_add_next = 1;
10363d4abf0Sagc #ifdef TRE_WCHAR
10463d4abf0Sagc   const wchar_t *str_wide = string;
10563d4abf0Sagc #ifdef TRE_MBSTATE
10663d4abf0Sagc   mbstate_t mbstate;
10763d4abf0Sagc #endif /* TRE_MBSTATE */
10863d4abf0Sagc #endif /* TRE_WCHAR */
10963d4abf0Sagc   int reg_notbol = eflags & REG_NOTBOL;
11063d4abf0Sagc   int reg_noteol = eflags & REG_NOTEOL;
11163d4abf0Sagc   int reg_newline = tnfa->cflags & REG_NEWLINE;
112f2a3d147Schristos   size_t str_user_end = 0;
11363d4abf0Sagc 
11463d4abf0Sagc   char *buf;
11563d4abf0Sagc   tre_tnfa_transition_t *trans_i;
11663d4abf0Sagc   tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
11763d4abf0Sagc   tre_reach_pos_t *reach_pos;
11863d4abf0Sagc   int *tag_i;
119f2a3d147Schristos   size_t num_tags, i;
12063d4abf0Sagc 
12163d4abf0Sagc   int match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
12263d4abf0Sagc   int new_match = 0;
12363d4abf0Sagc   int *tmp_tags = NULL;
12463d4abf0Sagc   int *tmp_iptr;
12527d137aeSrin   reg_errcode_t ret;
12663d4abf0Sagc 
12763d4abf0Sagc #ifdef TRE_MBSTATE
12863d4abf0Sagc   memset(&mbstate, '\0', sizeof(mbstate));
12963d4abf0Sagc #endif /* TRE_MBSTATE */
13063d4abf0Sagc 
13163d4abf0Sagc   DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
13263d4abf0Sagc 
13363d4abf0Sagc   if (!match_tags)
13463d4abf0Sagc     num_tags = 0;
13563d4abf0Sagc   else
13663d4abf0Sagc     num_tags = tnfa->num_tags;
13763d4abf0Sagc 
13863d4abf0Sagc   /* Allocate memory for temporary data required for matching.	This needs to
13963d4abf0Sagc      be done for every matching operation to be thread safe.  This allocates
14063d4abf0Sagc      everything in a single large block from the stack frame using alloca()
14163d4abf0Sagc      or with malloc() if alloca is unavailable. */
14263d4abf0Sagc   {
143f2a3d147Schristos     size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
14463d4abf0Sagc     char *tmp_buf;
14598796d9cSrin 
14698796d9cSrin     /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
14798796d9cSrin      * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
14898796d9cSrin     if (num_tags > SIZE_MAX/(8 * sizeof(int) * tnfa->num_states))
14998796d9cSrin       return REG_ESPACE;
15098796d9cSrin 
15198796d9cSrin     /* Likewise check rbytes. */
15298796d9cSrin     if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
15398796d9cSrin       return REG_ESPACE;
15498796d9cSrin 
15598796d9cSrin     /* Likewise check pbytes. */
15698796d9cSrin     if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
15798796d9cSrin       return REG_ESPACE;
15898796d9cSrin 
15963d4abf0Sagc     /* Compute the length of the block we need. */
16063d4abf0Sagc     tbytes = sizeof(*tmp_tags) * num_tags;
16163d4abf0Sagc     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
16263d4abf0Sagc     pbytes = sizeof(*reach_pos) * tnfa->num_states;
16363d4abf0Sagc     xbytes = sizeof(int) * num_tags;
16463d4abf0Sagc     total_bytes =
16563d4abf0Sagc       (sizeof(long) - 1) * 4 /* for alignment paddings */
16663d4abf0Sagc       + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
16763d4abf0Sagc 
16863d4abf0Sagc     /* Allocate the memory. */
16963d4abf0Sagc #ifdef TRE_USE_ALLOCA
17063d4abf0Sagc     buf = alloca(total_bytes);
17163d4abf0Sagc #else /* !TRE_USE_ALLOCA */
17263d4abf0Sagc     buf = xmalloc((unsigned)total_bytes);
17363d4abf0Sagc #endif /* !TRE_USE_ALLOCA */
17463d4abf0Sagc     if (buf == NULL)
17563d4abf0Sagc       return REG_ESPACE;
17663d4abf0Sagc     memset(buf, 0, (size_t)total_bytes);
17763d4abf0Sagc 
17863d4abf0Sagc     /* Get the various pointers within tmp_buf (properly aligned). */
17963d4abf0Sagc     tmp_tags = (void *)buf;
18063d4abf0Sagc     tmp_buf = buf + tbytes;
18163d4abf0Sagc     tmp_buf += ALIGN(tmp_buf, long);
18263d4abf0Sagc     reach_next = (void *)tmp_buf;
18363d4abf0Sagc     tmp_buf += rbytes;
18463d4abf0Sagc     tmp_buf += ALIGN(tmp_buf, long);
18563d4abf0Sagc     reach = (void *)tmp_buf;
18663d4abf0Sagc     tmp_buf += rbytes;
18763d4abf0Sagc     tmp_buf += ALIGN(tmp_buf, long);
18863d4abf0Sagc     reach_pos = (void *)tmp_buf;
18963d4abf0Sagc     tmp_buf += pbytes;
19063d4abf0Sagc     tmp_buf += ALIGN(tmp_buf, long);
19163d4abf0Sagc     for (i = 0; i < tnfa->num_states; i++)
19263d4abf0Sagc       {
19363d4abf0Sagc 	reach[i].tags = (void *)tmp_buf;
19463d4abf0Sagc 	tmp_buf += xbytes;
19563d4abf0Sagc 	reach_next[i].tags = (void *)tmp_buf;
19663d4abf0Sagc 	tmp_buf += xbytes;
19763d4abf0Sagc       }
19863d4abf0Sagc   }
19963d4abf0Sagc 
20063d4abf0Sagc   for (i = 0; i < tnfa->num_states; i++)
20163d4abf0Sagc     reach_pos[i].pos = -1;
20263d4abf0Sagc 
20363d4abf0Sagc   /* If only one character can start a match, find it first. */
20463d4abf0Sagc   if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte)
20563d4abf0Sagc     {
20663d4abf0Sagc       const char *orig_str = str_byte;
20763d4abf0Sagc       int first = tnfa->first_char;
20863d4abf0Sagc 
20963d4abf0Sagc       if (len >= 0)
21063d4abf0Sagc 	str_byte = memchr(orig_str, first, (size_t)len);
21163d4abf0Sagc       else
21263d4abf0Sagc 	str_byte = strchr(orig_str, first);
21363d4abf0Sagc       if (str_byte == NULL)
21463d4abf0Sagc 	{
21563d4abf0Sagc #ifndef TRE_USE_ALLOCA
21663d4abf0Sagc 	  if (buf)
21763d4abf0Sagc 	    xfree(buf);
21863d4abf0Sagc #endif /* !TRE_USE_ALLOCA */
21963d4abf0Sagc 	  return REG_NOMATCH;
22063d4abf0Sagc 	}
22163d4abf0Sagc       DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
22263d4abf0Sagc       if (str_byte >= orig_str + 1)
22363d4abf0Sagc 	prev_c = (unsigned char)*(str_byte - 1);
22463d4abf0Sagc       next_c = (unsigned char)*str_byte;
225f2a3d147Schristos       pos = (int)(str_byte - orig_str);
22663d4abf0Sagc       if (len < 0 || pos < len)
22763d4abf0Sagc 	str_byte++;
22863d4abf0Sagc     }
22963d4abf0Sagc   else
23063d4abf0Sagc     {
23163d4abf0Sagc       GET_NEXT_WCHAR();
23263d4abf0Sagc       pos = 0;
23363d4abf0Sagc     }
23463d4abf0Sagc 
23563d4abf0Sagc #if 0
23663d4abf0Sagc   /* Skip over characters that cannot possibly be the first character
23763d4abf0Sagc      of a match. */
23863d4abf0Sagc   if (tnfa->firstpos_chars != NULL)
23963d4abf0Sagc     {
24063d4abf0Sagc       char *chars = tnfa->firstpos_chars;
24163d4abf0Sagc 
24263d4abf0Sagc       if (len < 0)
24363d4abf0Sagc 	{
24463d4abf0Sagc 	  const char *orig_str = str_byte;
24563d4abf0Sagc 	  /* XXX - use strpbrk() and wcspbrk() because they might be
24663d4abf0Sagc 	     optimized for the target architecture.  Try also strcspn()
24763d4abf0Sagc 	     and wcscspn() and compare the speeds. */
24863d4abf0Sagc 	  while (next_c != L'\0' && !chars[next_c])
24963d4abf0Sagc 	    {
25063d4abf0Sagc 	      next_c = *str_byte++;
25163d4abf0Sagc 	    }
25263d4abf0Sagc 	  prev_c = *(str_byte - 2);
25363d4abf0Sagc 	  pos += str_byte - orig_str;
25463d4abf0Sagc 	  DPRINT(("skipped %d chars\n", str_byte - orig_str));
25563d4abf0Sagc 	}
25663d4abf0Sagc       else
25763d4abf0Sagc 	{
25863d4abf0Sagc 	  while (pos <= len && !chars[next_c])
25963d4abf0Sagc 	    {
26063d4abf0Sagc 	      prev_c = next_c;
26163d4abf0Sagc 	      next_c = (unsigned char)(*str_byte++);
26263d4abf0Sagc 	      pos++;
26363d4abf0Sagc 	    }
26463d4abf0Sagc 	}
26563d4abf0Sagc     }
26663d4abf0Sagc #endif
26763d4abf0Sagc 
26863d4abf0Sagc   DPRINT(("length: %d\n", len));
26963d4abf0Sagc   DPRINT(("pos:chr/code | states and tags\n"));
27063d4abf0Sagc   DPRINT(("-------------+------------------------------------------------\n"));
27163d4abf0Sagc 
27263d4abf0Sagc   reach_next_i = reach_next;
27313498f30Srin   while (/*CONSTCOND*/(void)1,1)
27463d4abf0Sagc     {
27563d4abf0Sagc       /* If no match found yet, add the initial states to `reach_next'. */
27663d4abf0Sagc       if (match_eo < 0)
27763d4abf0Sagc 	{
27863d4abf0Sagc 	  DPRINT((" init >"));
27963d4abf0Sagc 	  trans_i = tnfa->initial;
28063d4abf0Sagc 	  while (trans_i->state != NULL)
28163d4abf0Sagc 	    {
28263d4abf0Sagc 	      if (reach_pos[trans_i->state_id].pos < pos)
28363d4abf0Sagc 		{
28463d4abf0Sagc 		  if (trans_i->assertions
28563d4abf0Sagc 		      && CHECK_ASSERTIONS(trans_i->assertions))
28663d4abf0Sagc 		    {
28763d4abf0Sagc 		      DPRINT(("assertion failed\n"));
28863d4abf0Sagc 		      trans_i++;
28963d4abf0Sagc 		      continue;
29063d4abf0Sagc 		    }
29163d4abf0Sagc 
29263d4abf0Sagc 		  DPRINT((" %p", (void *)trans_i->state));
29363d4abf0Sagc 		  reach_next_i->state = trans_i->state;
29463d4abf0Sagc 		  for (i = 0; i < num_tags; i++)
29563d4abf0Sagc 		    reach_next_i->tags[i] = -1;
29663d4abf0Sagc 		  tag_i = trans_i->tags;
29763d4abf0Sagc 		  if (tag_i)
29863d4abf0Sagc 		    while (*tag_i >= 0)
29963d4abf0Sagc 		      {
300f2a3d147Schristos 			if ((size_t)*tag_i < num_tags)
30163d4abf0Sagc 			  reach_next_i->tags[*tag_i] = pos;
30263d4abf0Sagc 			tag_i++;
30363d4abf0Sagc 		      }
30463d4abf0Sagc 		  if (reach_next_i->state == tnfa->final)
30563d4abf0Sagc 		    {
30663d4abf0Sagc 		      DPRINT(("	 found empty match\n"));
30763d4abf0Sagc 		      match_eo = pos;
30863d4abf0Sagc 		      new_match = 1;
30963d4abf0Sagc 		      for (i = 0; i < num_tags; i++)
31063d4abf0Sagc 			match_tags[i] = reach_next_i->tags[i];
31163d4abf0Sagc 		    }
31263d4abf0Sagc 		  reach_pos[trans_i->state_id].pos = pos;
31363d4abf0Sagc 		  reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
31463d4abf0Sagc 		  reach_next_i++;
31563d4abf0Sagc 		}
31663d4abf0Sagc 	      trans_i++;
31763d4abf0Sagc 	    }
31863d4abf0Sagc 	  DPRINT(("\n"));
31963d4abf0Sagc 	  reach_next_i->state = NULL;
32063d4abf0Sagc 	}
32163d4abf0Sagc       else
32263d4abf0Sagc 	{
32363d4abf0Sagc 	  if (num_tags == 0 || reach_next_i == reach_next)
32413498f30Srin 	    /* We have found a match. */
32563d4abf0Sagc 	    break;
32663d4abf0Sagc 	}
32763d4abf0Sagc 
32863d4abf0Sagc       /* Check for end of string. */
32963d4abf0Sagc       if (len < 0)
33063d4abf0Sagc 	{
33163d4abf0Sagc 	  if (type == STR_USER)
33263d4abf0Sagc 	    {
33363d4abf0Sagc 	      if (str_user_end)
33463d4abf0Sagc 		break;
33563d4abf0Sagc 	    }
33663d4abf0Sagc 	  else if (next_c == L'\0')
33763d4abf0Sagc 	    break;
33863d4abf0Sagc 	}
33963d4abf0Sagc       else
34063d4abf0Sagc 	{
34163d4abf0Sagc 	  if (pos >= len)
34263d4abf0Sagc 	    break;
34363d4abf0Sagc 	}
34463d4abf0Sagc 
34563d4abf0Sagc       GET_NEXT_WCHAR();
34663d4abf0Sagc 
34763d4abf0Sagc #ifdef TRE_DEBUG
34863d4abf0Sagc       DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
34963d4abf0Sagc       tre_print_reach(tnfa, reach_next, num_tags);
35063d4abf0Sagc       DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c));
35163d4abf0Sagc       tre_print_reach(tnfa, reach_next, num_tags);
35263d4abf0Sagc #endif /* TRE_DEBUG */
35363d4abf0Sagc 
35463d4abf0Sagc       /* Swap `reach' and `reach_next'. */
35563d4abf0Sagc       reach_i = reach;
35663d4abf0Sagc       reach = reach_next;
35763d4abf0Sagc       reach_next = reach_i;
35863d4abf0Sagc 
35963d4abf0Sagc       /* For each state in `reach', weed out states that don't fulfill the
36063d4abf0Sagc 	 minimal matching conditions. */
36163d4abf0Sagc       if (tnfa->num_minimals && new_match)
36263d4abf0Sagc 	{
36363d4abf0Sagc 	  new_match = 0;
36463d4abf0Sagc 	  reach_next_i = reach_next;
36563d4abf0Sagc 	  for (reach_i = reach; reach_i->state; reach_i++)
36663d4abf0Sagc 	    {
36763d4abf0Sagc 	      int skip = 0;
36863d4abf0Sagc 	      for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
36963d4abf0Sagc 		{
37063d4abf0Sagc 		  int end = tnfa->minimal_tags[i];
37163d4abf0Sagc 		  int start = tnfa->minimal_tags[i + 1];
37263d4abf0Sagc 		  DPRINT(("  Minimal start %d, end %d\n", start, end));
373f2a3d147Schristos 		  if ((size_t)end >= num_tags)
37463d4abf0Sagc 		    {
37563d4abf0Sagc 		      DPRINT(("	 Throwing %p out.\n", reach_i->state));
37663d4abf0Sagc 		      skip = 1;
37763d4abf0Sagc 		      break;
37863d4abf0Sagc 		    }
37963d4abf0Sagc 		  else if (reach_i->tags[start] == match_tags[start]
38063d4abf0Sagc 			   && reach_i->tags[end] < match_tags[end])
38163d4abf0Sagc 		    {
38263d4abf0Sagc 		      DPRINT(("	 Throwing %p out because t%d < %d\n",
38363d4abf0Sagc 			      reach_i->state, end, match_tags[end]));
38463d4abf0Sagc 		      skip = 1;
38563d4abf0Sagc 		      break;
38663d4abf0Sagc 		    }
38763d4abf0Sagc 		}
38863d4abf0Sagc 	      if (!skip)
38963d4abf0Sagc 		{
39063d4abf0Sagc 		  reach_next_i->state = reach_i->state;
39163d4abf0Sagc 		  tmp_iptr = reach_next_i->tags;
39263d4abf0Sagc 		  reach_next_i->tags = reach_i->tags;
39363d4abf0Sagc 		  reach_i->tags = tmp_iptr;
39463d4abf0Sagc 		  reach_next_i++;
39563d4abf0Sagc 		}
39663d4abf0Sagc 	    }
39763d4abf0Sagc 	  reach_next_i->state = NULL;
39863d4abf0Sagc 
39963d4abf0Sagc 	  /* Swap `reach' and `reach_next'. */
40063d4abf0Sagc 	  reach_i = reach;
40163d4abf0Sagc 	  reach = reach_next;
40263d4abf0Sagc 	  reach_next = reach_i;
40363d4abf0Sagc 	}
40463d4abf0Sagc 
40563d4abf0Sagc       /* For each state in `reach' see if there is a transition leaving with
40663d4abf0Sagc 	 the current input symbol to a state not yet in `reach_next', and
40763d4abf0Sagc 	 add the destination states to `reach_next'. */
40863d4abf0Sagc       reach_next_i = reach_next;
40963d4abf0Sagc       for (reach_i = reach; reach_i->state; reach_i++)
41063d4abf0Sagc 	{
41163d4abf0Sagc 	  for (trans_i = reach_i->state; trans_i->state; trans_i++)
41263d4abf0Sagc 	    {
41363d4abf0Sagc 	      /* Does this transition match the input symbol? */
41463d4abf0Sagc 	      if (trans_i->code_min <= (tre_cint_t)prev_c &&
41563d4abf0Sagc 		  trans_i->code_max >= (tre_cint_t)prev_c)
41663d4abf0Sagc 		{
41763d4abf0Sagc 		  if (trans_i->assertions
41863d4abf0Sagc 		      && (CHECK_ASSERTIONS(trans_i->assertions)
41963d4abf0Sagc 			  || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
42063d4abf0Sagc 		    {
42163d4abf0Sagc 		      DPRINT(("assertion failed\n"));
42263d4abf0Sagc 		      continue;
42363d4abf0Sagc 		    }
42463d4abf0Sagc 
42563d4abf0Sagc 		  /* Compute the tags after this transition. */
42663d4abf0Sagc 		  for (i = 0; i < num_tags; i++)
42763d4abf0Sagc 		    tmp_tags[i] = reach_i->tags[i];
42863d4abf0Sagc 		  tag_i = trans_i->tags;
42963d4abf0Sagc 		  if (tag_i != NULL)
43063d4abf0Sagc 		    while (*tag_i >= 0)
43163d4abf0Sagc 		      {
432f2a3d147Schristos 			if ((size_t)*tag_i < num_tags)
43363d4abf0Sagc 			  tmp_tags[*tag_i] = pos;
43463d4abf0Sagc 			tag_i++;
43563d4abf0Sagc 		      }
43663d4abf0Sagc 
43763d4abf0Sagc 		  if (reach_pos[trans_i->state_id].pos < pos)
43863d4abf0Sagc 		    {
43963d4abf0Sagc 		      /* Found an unvisited node. */
44063d4abf0Sagc 		      reach_next_i->state = trans_i->state;
44163d4abf0Sagc 		      tmp_iptr = reach_next_i->tags;
44263d4abf0Sagc 		      reach_next_i->tags = tmp_tags;
44363d4abf0Sagc 		      tmp_tags = tmp_iptr;
44463d4abf0Sagc 		      reach_pos[trans_i->state_id].pos = pos;
44563d4abf0Sagc 		      reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
44663d4abf0Sagc 
44763d4abf0Sagc 		      if (reach_next_i->state == tnfa->final
44863d4abf0Sagc 			  && (match_eo == -1
44963d4abf0Sagc 			      || (num_tags > 0
45063d4abf0Sagc 				  && reach_next_i->tags[0] <= match_tags[0])))
45163d4abf0Sagc 			{
45263d4abf0Sagc 			  DPRINT(("  found match %p\n", trans_i->state));
45363d4abf0Sagc 			  match_eo = pos;
45463d4abf0Sagc 			  new_match = 1;
45563d4abf0Sagc 			  for (i = 0; i < num_tags; i++)
45663d4abf0Sagc 			    match_tags[i] = reach_next_i->tags[i];
45763d4abf0Sagc 			}
45863d4abf0Sagc 		      reach_next_i++;
45963d4abf0Sagc 
46063d4abf0Sagc 		    }
46163d4abf0Sagc 		  else
46263d4abf0Sagc 		    {
46363d4abf0Sagc 		      assert(reach_pos[trans_i->state_id].pos == pos);
46463d4abf0Sagc 		      /* Another path has also reached this state.  We choose
46563d4abf0Sagc 			 the winner by examining the tag values for both
46663d4abf0Sagc 			 paths. */
46763d4abf0Sagc 		      if (tre_tag_order(num_tags, tnfa->tag_directions,
46863d4abf0Sagc 					tmp_tags,
46963d4abf0Sagc 					*reach_pos[trans_i->state_id].tags))
47063d4abf0Sagc 			{
47163d4abf0Sagc 			  /* The new path wins. */
47263d4abf0Sagc 			  tmp_iptr = *reach_pos[trans_i->state_id].tags;
47363d4abf0Sagc 			  *reach_pos[trans_i->state_id].tags = tmp_tags;
47463d4abf0Sagc 			  if (trans_i->state == tnfa->final)
47563d4abf0Sagc 			    {
47663d4abf0Sagc 			      DPRINT(("	 found better match\n"));
47763d4abf0Sagc 			      match_eo = pos;
47863d4abf0Sagc 			      new_match = 1;
47963d4abf0Sagc 			      for (i = 0; i < num_tags; i++)
48063d4abf0Sagc 				match_tags[i] = tmp_tags[i];
48163d4abf0Sagc 			    }
48263d4abf0Sagc 			  tmp_tags = tmp_iptr;
48363d4abf0Sagc 			}
48463d4abf0Sagc 		    }
48563d4abf0Sagc 		}
48663d4abf0Sagc 	    }
48763d4abf0Sagc 	}
48863d4abf0Sagc       reach_next_i->state = NULL;
48963d4abf0Sagc     }
49063d4abf0Sagc 
49163d4abf0Sagc   DPRINT(("match end offset = %d\n", match_eo));
49263d4abf0Sagc 
49327d137aeSrin   *match_end_ofs = match_eo;
49427d137aeSrin   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
49527d137aeSrin error_exit:
49663d4abf0Sagc #ifndef TRE_USE_ALLOCA
49763d4abf0Sagc   if (buf)
49863d4abf0Sagc     xfree(buf);
49963d4abf0Sagc #endif /* !TRE_USE_ALLOCA */
50027d137aeSrin   return ret;
50163d4abf0Sagc }
50263d4abf0Sagc 
50363d4abf0Sagc /* EOF */
504