xref: /dflybsd-src/contrib/tre/lib/tre-match-parallel.c (revision d5f8dde1e2bae450329b3ee8c25ef109b5713f07)
15f2eab64SJohn Marino /*
25f2eab64SJohn Marino   tre-match-parallel.c - TRE parallel regex matching engine
35f2eab64SJohn Marino 
45f2eab64SJohn Marino   This software is released under a BSD-style license.
55f2eab64SJohn Marino   See the file LICENSE for details and copyright.
65f2eab64SJohn Marino 
75f2eab64SJohn Marino */
85f2eab64SJohn Marino 
95f2eab64SJohn Marino /*
105f2eab64SJohn Marino   This algorithm searches for matches basically by reading characters
115f2eab64SJohn Marino   in the searched string one by one, starting at the beginning.	 All
125f2eab64SJohn Marino   matching paths in the TNFA are traversed in parallel.	 When two or
135f2eab64SJohn Marino   more paths reach the same state, exactly one is chosen according to
145f2eab64SJohn Marino   tag ordering rules; if returning submatches is not required it does
155f2eab64SJohn Marino   not matter which path is chosen.
165f2eab64SJohn Marino 
175f2eab64SJohn Marino   The worst case time required for finding the leftmost and longest
185f2eab64SJohn Marino   match, or determining that there is no match, is always linearly
195f2eab64SJohn Marino   dependent on the length of the text being searched.
205f2eab64SJohn Marino 
215f2eab64SJohn Marino   This algorithm cannot handle TNFAs with back referencing nodes.
225f2eab64SJohn Marino   See `tre-match-backtrack.c'.
235f2eab64SJohn Marino */
245f2eab64SJohn Marino 
255f2eab64SJohn Marino 
265f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
275f2eab64SJohn Marino #include <config.h>
285f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
295f2eab64SJohn Marino 
30*d5f8dde1SJohn Marino /* Unset TRE_USE_ALLOCA to avoid using the stack to hold all the state
31*d5f8dde1SJohn Marino    info while running */
32*d5f8dde1SJohn Marino #undef TRE_USE_ALLOCA
33*d5f8dde1SJohn Marino 
345f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
355f2eab64SJohn Marino /* AIX requires this to be the first thing in the file.	 */
365f2eab64SJohn Marino #ifndef __GNUC__
375f2eab64SJohn Marino # if HAVE_ALLOCA_H
385f2eab64SJohn Marino #  include <alloca.h>
395f2eab64SJohn Marino # else
405f2eab64SJohn Marino #  ifdef _AIX
415f2eab64SJohn Marino  #pragma alloca
425f2eab64SJohn Marino #  else
435f2eab64SJohn Marino #   ifndef alloca /* predefined by HP cc +Olibcalls */
445f2eab64SJohn Marino char *alloca ();
455f2eab64SJohn Marino #   endif
465f2eab64SJohn Marino #  endif
475f2eab64SJohn Marino # endif
485f2eab64SJohn Marino #endif
495f2eab64SJohn Marino #endif /* TRE_USE_ALLOCA */
505f2eab64SJohn Marino 
515f2eab64SJohn Marino #include <assert.h>
525f2eab64SJohn Marino #include <stdlib.h>
535f2eab64SJohn Marino #include <string.h>
545f2eab64SJohn Marino #ifdef HAVE_WCHAR_H
555f2eab64SJohn Marino #include <wchar.h>
565f2eab64SJohn Marino #endif /* HAVE_WCHAR_H */
575f2eab64SJohn Marino #ifdef HAVE_WCTYPE_H
585f2eab64SJohn Marino #include <wctype.h>
595f2eab64SJohn Marino #endif /* HAVE_WCTYPE_H */
605f2eab64SJohn Marino #ifndef TRE_WCHAR
615f2eab64SJohn Marino #include <ctype.h>
625f2eab64SJohn Marino #endif /* !TRE_WCHAR */
635f2eab64SJohn Marino #ifdef HAVE_MALLOC_H
645f2eab64SJohn Marino #include <malloc.h>
655f2eab64SJohn Marino #endif /* HAVE_MALLOC_H */
665f2eab64SJohn Marino 
675f2eab64SJohn Marino #include "tre-internal.h"
685f2eab64SJohn Marino #include "tre-match-utils.h"
695f2eab64SJohn Marino #include "tre.h"
705f2eab64SJohn Marino #include "xmalloc.h"
715f2eab64SJohn Marino 
725f2eab64SJohn Marino 
735f2eab64SJohn Marino 
745f2eab64SJohn Marino typedef struct {
755f2eab64SJohn Marino   tre_tnfa_transition_t *state;
76*d5f8dde1SJohn Marino   tre_tag_t *tags;
775f2eab64SJohn Marino } tre_tnfa_reach_t;
785f2eab64SJohn Marino 
795f2eab64SJohn Marino typedef struct {
805f2eab64SJohn Marino   int pos;
81*d5f8dde1SJohn Marino   tre_tag_t **tags;
825f2eab64SJohn Marino } tre_reach_pos_t;
835f2eab64SJohn Marino 
845f2eab64SJohn Marino 
855f2eab64SJohn Marino #ifdef TRE_DEBUG
865f2eab64SJohn Marino static void
tre_print_reach1(tre_tnfa_transition_t * state,tre_tag_t * tags,int num_tags)87*d5f8dde1SJohn Marino tre_print_reach1(tre_tnfa_transition_t *state, tre_tag_t *tags, int num_tags)
885f2eab64SJohn Marino {
89*d5f8dde1SJohn Marino   DPRINT((" %p", (void *)state));
905f2eab64SJohn Marino   if (num_tags > 0)
915f2eab64SJohn Marino     {
925f2eab64SJohn Marino       DPRINT(("/"));
93*d5f8dde1SJohn Marino       tre_print_tags(tags, num_tags);
94*d5f8dde1SJohn Marino     }
95*d5f8dde1SJohn Marino }
96*d5f8dde1SJohn Marino 
97*d5f8dde1SJohn Marino static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_reach_t * reach,int num_tags)98*d5f8dde1SJohn Marino tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
995f2eab64SJohn Marino {
100*d5f8dde1SJohn Marino   while (reach->state != NULL)
101*d5f8dde1SJohn Marino     {
102*d5f8dde1SJohn Marino       tre_print_reach1(reach->state, reach->tags, num_tags);
1035f2eab64SJohn Marino       reach++;
1045f2eab64SJohn Marino     }
1055f2eab64SJohn Marino   DPRINT(("\n"));
1065f2eab64SJohn Marino 
1075f2eab64SJohn Marino }
1085f2eab64SJohn Marino #endif /* TRE_DEBUG */
1095f2eab64SJohn Marino 
1105f2eab64SJohn Marino reg_errcode_t
tre_tnfa_run_parallel(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,tre_tag_t * match_tags,int eflags,int * match_end_ofs)1115f2eab64SJohn Marino tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
112*d5f8dde1SJohn Marino 		      tre_str_type_t type, tre_tag_t *match_tags, int eflags,
1135f2eab64SJohn Marino 		      int *match_end_ofs)
1145f2eab64SJohn Marino {
1155f2eab64SJohn Marino   /* State variables required by GET_NEXT_WCHAR. */
1165f2eab64SJohn Marino   tre_char_t prev_c = 0, next_c = 0;
1175f2eab64SJohn Marino   const char *str_byte = string;
1185f2eab64SJohn Marino   int pos = -1;
1195f2eab64SJohn Marino   unsigned int pos_add_next = 1;
1205f2eab64SJohn Marino #ifdef TRE_WCHAR
1215f2eab64SJohn Marino   const wchar_t *str_wide = string;
1225f2eab64SJohn Marino #ifdef TRE_MBSTATE
1235f2eab64SJohn Marino   mbstate_t mbstate;
1245f2eab64SJohn Marino #endif /* TRE_MBSTATE */
1255f2eab64SJohn Marino #endif /* TRE_WCHAR */
1265f2eab64SJohn Marino   int reg_notbol = eflags & REG_NOTBOL;
1275f2eab64SJohn Marino   int reg_noteol = eflags & REG_NOTEOL;
1285f2eab64SJohn Marino   int reg_newline = tnfa->cflags & REG_NEWLINE;
1295f2eab64SJohn Marino 
1305f2eab64SJohn Marino   char *buf;
1315f2eab64SJohn Marino   tre_tnfa_transition_t *trans_i;
1325f2eab64SJohn Marino   tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
1335f2eab64SJohn Marino   tre_reach_pos_t *reach_pos;
1345f2eab64SJohn Marino   int *tag_i;
1355f2eab64SJohn Marino   int num_tags, i;
1365f2eab64SJohn Marino 
1375f2eab64SJohn Marino   int match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
138*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
139*d5f8dde1SJohn Marino   int once;
140*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
141*d5f8dde1SJohn Marino   tre_tag_t *tmp_tags = NULL;
142*d5f8dde1SJohn Marino   tre_tag_t *tmp_iptr;
143*d5f8dde1SJohn Marino   int tbytes;
144*d5f8dde1SJohn Marino   int touch = 1;
1455f2eab64SJohn Marino 
1465f2eab64SJohn Marino #ifdef TRE_MBSTATE
1475f2eab64SJohn Marino   memset(&mbstate, '\0', sizeof(mbstate));
1485f2eab64SJohn Marino #endif /* TRE_MBSTATE */
1495f2eab64SJohn Marino 
1505f2eab64SJohn Marino   DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
1515f2eab64SJohn Marino 
1525f2eab64SJohn Marino   if (!match_tags)
1535f2eab64SJohn Marino     num_tags = 0;
1545f2eab64SJohn Marino   else
1555f2eab64SJohn Marino     num_tags = tnfa->num_tags;
1565f2eab64SJohn Marino 
1575f2eab64SJohn Marino   /* Allocate memory for temporary data required for matching.	This needs to
1585f2eab64SJohn Marino      be done for every matching operation to be thread safe.  This allocates
1595f2eab64SJohn Marino      everything in a single large block from the stack frame using alloca()
1605f2eab64SJohn Marino      or with malloc() if alloca is unavailable. */
1615f2eab64SJohn Marino   {
162*d5f8dde1SJohn Marino     int rbytes, pbytes, total_bytes;
1635f2eab64SJohn Marino     char *tmp_buf;
1645f2eab64SJohn Marino     /* Compute the length of the block we need. */
1655f2eab64SJohn Marino     tbytes = sizeof(*tmp_tags) * num_tags;
1665f2eab64SJohn Marino     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
1675f2eab64SJohn Marino     pbytes = sizeof(*reach_pos) * tnfa->num_states;
1685f2eab64SJohn Marino     total_bytes =
1695f2eab64SJohn Marino       (sizeof(long) - 1) * 4 /* for alignment paddings */
170*d5f8dde1SJohn Marino       + (rbytes + tbytes * tnfa->num_states) * 2 + tbytes + pbytes;
1715f2eab64SJohn Marino 
172*d5f8dde1SJohn Marino     DPRINT(("tre_tnfa_run_parallel, allocate %d bytes\n", total_bytes));
1735f2eab64SJohn Marino     /* Allocate the memory. */
1745f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
1755f2eab64SJohn Marino     buf = alloca(total_bytes);
1765f2eab64SJohn Marino #else /* !TRE_USE_ALLOCA */
1775f2eab64SJohn Marino     buf = xmalloc((unsigned)total_bytes);
1785f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
1795f2eab64SJohn Marino     if (buf == NULL)
1805f2eab64SJohn Marino       return REG_ESPACE;
1815f2eab64SJohn Marino     memset(buf, 0, (size_t)total_bytes);
1825f2eab64SJohn Marino 
1835f2eab64SJohn Marino     /* Get the various pointers within tmp_buf (properly aligned). */
1845f2eab64SJohn Marino     tmp_tags = (void *)buf;
1855f2eab64SJohn Marino     tmp_buf = buf + tbytes;
1865f2eab64SJohn Marino     tmp_buf += ALIGN(tmp_buf, long);
1875f2eab64SJohn Marino     reach_next = (void *)tmp_buf;
1885f2eab64SJohn Marino     tmp_buf += rbytes;
1895f2eab64SJohn Marino     tmp_buf += ALIGN(tmp_buf, long);
1905f2eab64SJohn Marino     reach = (void *)tmp_buf;
1915f2eab64SJohn Marino     tmp_buf += rbytes;
1925f2eab64SJohn Marino     tmp_buf += ALIGN(tmp_buf, long);
1935f2eab64SJohn Marino     reach_pos = (void *)tmp_buf;
1945f2eab64SJohn Marino     tmp_buf += pbytes;
1955f2eab64SJohn Marino     tmp_buf += ALIGN(tmp_buf, long);
1965f2eab64SJohn Marino     for (i = 0; i < tnfa->num_states; i++)
1975f2eab64SJohn Marino       {
1985f2eab64SJohn Marino 	reach[i].tags = (void *)tmp_buf;
199*d5f8dde1SJohn Marino 	tmp_buf += tbytes;
2005f2eab64SJohn Marino 	reach_next[i].tags = (void *)tmp_buf;
201*d5f8dde1SJohn Marino 	tmp_buf += tbytes;
2025f2eab64SJohn Marino       }
2035f2eab64SJohn Marino   }
2045f2eab64SJohn Marino 
2055f2eab64SJohn Marino   for (i = 0; i < tnfa->num_states; i++)
2065f2eab64SJohn Marino     reach_pos[i].pos = -1;
2075f2eab64SJohn Marino 
2085f2eab64SJohn Marino   /* If only one character can start a match, find it first. */
209*d5f8dde1SJohn Marino   if (tnfa->first_char >= 0 && str_byte)
2105f2eab64SJohn Marino     {
2115f2eab64SJohn Marino       const char *orig_str = str_byte;
2125f2eab64SJohn Marino       int first = tnfa->first_char;
213*d5f8dde1SJohn Marino       int found_high_bit = 0;
2145f2eab64SJohn Marino 
215*d5f8dde1SJohn Marino 
216*d5f8dde1SJohn Marino       if (type == STR_BYTE)
217*d5f8dde1SJohn Marino 	{
2185f2eab64SJohn Marino 	  if (len >= 0)
2195f2eab64SJohn Marino 	    str_byte = memchr(orig_str, first, (size_t)len);
2205f2eab64SJohn Marino 	  else
2215f2eab64SJohn Marino 	    str_byte = strchr(orig_str, first);
222*d5f8dde1SJohn Marino 	}
223*d5f8dde1SJohn Marino       else if (type == STR_MBS)
224*d5f8dde1SJohn Marino 	{
225*d5f8dde1SJohn Marino 	  /*
226*d5f8dde1SJohn Marino 	   * If the match character is ASCII, try to match the character
227*d5f8dde1SJohn Marino 	   * directly, but if a high bit character is found, we stop there.
228*d5f8dde1SJohn Marino 	   */
229*d5f8dde1SJohn Marino 	  if (first < 0x80)
230*d5f8dde1SJohn Marino 	    {
231*d5f8dde1SJohn Marino 	      if (len >= 0)
232*d5f8dde1SJohn Marino 		{
233*d5f8dde1SJohn Marino 		  int i;
234*d5f8dde1SJohn Marino 		  for (i = 0; ; str_byte++, i++)
235*d5f8dde1SJohn Marino 		    {
236*d5f8dde1SJohn Marino 		      if (i >= len)
237*d5f8dde1SJohn Marino 			{
238*d5f8dde1SJohn Marino 			  str_byte = NULL;
239*d5f8dde1SJohn Marino 			  break;
240*d5f8dde1SJohn Marino 			}
241*d5f8dde1SJohn Marino 		      if (*str_byte == first)
242*d5f8dde1SJohn Marino 			break;
243*d5f8dde1SJohn Marino 		      if (*str_byte & 0x80)
244*d5f8dde1SJohn Marino 			{
245*d5f8dde1SJohn Marino 			  found_high_bit = 1;
246*d5f8dde1SJohn Marino 			  break;
247*d5f8dde1SJohn Marino 			}
248*d5f8dde1SJohn Marino 		    }
249*d5f8dde1SJohn Marino 		}
250*d5f8dde1SJohn Marino 	      else
251*d5f8dde1SJohn Marino 		{
252*d5f8dde1SJohn Marino 		  for (; ; str_byte++)
253*d5f8dde1SJohn Marino 		    {
254*d5f8dde1SJohn Marino 		      if (!*str_byte)
255*d5f8dde1SJohn Marino 			{
256*d5f8dde1SJohn Marino 			  str_byte = NULL;
257*d5f8dde1SJohn Marino 			  break;
258*d5f8dde1SJohn Marino 			}
259*d5f8dde1SJohn Marino 		      if (*str_byte == first)
260*d5f8dde1SJohn Marino 			break;
261*d5f8dde1SJohn Marino 		      if (*str_byte & 0x80)
262*d5f8dde1SJohn Marino 			{
263*d5f8dde1SJohn Marino 			  found_high_bit = 1;
264*d5f8dde1SJohn Marino 			  break;
265*d5f8dde1SJohn Marino 			}
266*d5f8dde1SJohn Marino 		    }
267*d5f8dde1SJohn Marino 		}
268*d5f8dde1SJohn Marino 	    }
269*d5f8dde1SJohn Marino 	  else
270*d5f8dde1SJohn Marino 	    {
271*d5f8dde1SJohn Marino 	      if (len >= 0)
272*d5f8dde1SJohn Marino 		{
273*d5f8dde1SJohn Marino 		  int i;
274*d5f8dde1SJohn Marino 		  for (i = 0; ; str_byte++, i++)
275*d5f8dde1SJohn Marino 		    {
276*d5f8dde1SJohn Marino 		      if (i >= len)
277*d5f8dde1SJohn Marino 			{
278*d5f8dde1SJohn Marino 			  str_byte = NULL;
279*d5f8dde1SJohn Marino 			  break;
280*d5f8dde1SJohn Marino 			}
281*d5f8dde1SJohn Marino 		      if (*str_byte & 0x80)
282*d5f8dde1SJohn Marino 			{
283*d5f8dde1SJohn Marino 			  found_high_bit = 1;
284*d5f8dde1SJohn Marino 			  break;
285*d5f8dde1SJohn Marino 			}
286*d5f8dde1SJohn Marino 		    }
287*d5f8dde1SJohn Marino 		}
288*d5f8dde1SJohn Marino 	      else
289*d5f8dde1SJohn Marino 		{
290*d5f8dde1SJohn Marino 		  for (; ; str_byte++)
291*d5f8dde1SJohn Marino 		    {
292*d5f8dde1SJohn Marino 		      if (!*str_byte)
293*d5f8dde1SJohn Marino 			{
294*d5f8dde1SJohn Marino 			  str_byte = NULL;
295*d5f8dde1SJohn Marino 			  break;
296*d5f8dde1SJohn Marino 			}
297*d5f8dde1SJohn Marino 		      if (*str_byte & 0x80)
298*d5f8dde1SJohn Marino 			{
299*d5f8dde1SJohn Marino 			  found_high_bit = 1;
300*d5f8dde1SJohn Marino 			  break;
301*d5f8dde1SJohn Marino 			}
302*d5f8dde1SJohn Marino 		    }
303*d5f8dde1SJohn Marino 		}
304*d5f8dde1SJohn Marino 	    }
305*d5f8dde1SJohn Marino 	}
3065f2eab64SJohn Marino       if (str_byte == NULL)
3075f2eab64SJohn Marino 	{
3085f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
3095f2eab64SJohn Marino 	  if (buf)
3105f2eab64SJohn Marino 	    xfree(buf);
3115f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
3125f2eab64SJohn Marino 	  return REG_NOMATCH;
3135f2eab64SJohn Marino 	}
3145f2eab64SJohn Marino       DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
315*d5f8dde1SJohn Marino       if (!found_high_bit)
316*d5f8dde1SJohn Marino 	{
3175f2eab64SJohn Marino 	  if (str_byte >= orig_str + 1)
3185f2eab64SJohn Marino 	    prev_c = (unsigned char)*(str_byte - 1);
3195f2eab64SJohn Marino 	  next_c = (unsigned char)*str_byte;
3205f2eab64SJohn Marino 	  pos = str_byte - orig_str;
3215f2eab64SJohn Marino 	  if (len < 0 || pos < len)
3225f2eab64SJohn Marino 	    str_byte++;
3235f2eab64SJohn Marino 	}
3245f2eab64SJohn Marino       else
3255f2eab64SJohn Marino 	{
326*d5f8dde1SJohn Marino 	  if (str_byte == orig_str)
327*d5f8dde1SJohn Marino 	    goto no_first_optimization;
328*d5f8dde1SJohn Marino 	  /*
329*d5f8dde1SJohn Marino 	   * Back up one character, fix up the position, then call
330*d5f8dde1SJohn Marino 	   * GET_NEXT_WCHAR() to process the multibyte character.
331*d5f8dde1SJohn Marino 	   */
332*d5f8dde1SJohn Marino 	  /* no need to set prev_c, since GET_NEXT_WCHAR will overwrite */
333*d5f8dde1SJohn Marino 	  next_c = (unsigned char)*(str_byte - 1);
334*d5f8dde1SJohn Marino 	  pos = (str_byte - 1) - orig_str;
3355f2eab64SJohn Marino 	  GET_NEXT_WCHAR();
3365f2eab64SJohn Marino 	}
3375f2eab64SJohn Marino     }
3385f2eab64SJohn Marino   else
3395f2eab64SJohn Marino     {
340*d5f8dde1SJohn Marino no_first_optimization:
341*d5f8dde1SJohn Marino       GET_NEXT_WCHAR();
342*d5f8dde1SJohn Marino       pos = 0;
3435f2eab64SJohn Marino     }
3445f2eab64SJohn Marino 
3455f2eab64SJohn Marino   DPRINT(("length: %d\n", len));
3465f2eab64SJohn Marino   DPRINT(("pos:chr/code | states and tags\n"));
3475f2eab64SJohn Marino   DPRINT(("-------------+------------------------------------------------\n"));
3485f2eab64SJohn Marino 
3495f2eab64SJohn Marino   reach_next_i = reach_next;
3505f2eab64SJohn Marino   while (/*CONSTCOND*/1)
3515f2eab64SJohn Marino     {
3525f2eab64SJohn Marino       /* If no match found yet, add the initial states to `reach_next'. */
3535f2eab64SJohn Marino       if (match_eo < 0)
3545f2eab64SJohn Marino 	{
3555f2eab64SJohn Marino 	  DPRINT((" init >"));
3565f2eab64SJohn Marino 	  trans_i = tnfa->initial;
3575f2eab64SJohn Marino 	  while (trans_i->state != NULL)
3585f2eab64SJohn Marino 	    {
3595f2eab64SJohn Marino 	      if (reach_pos[trans_i->state_id].pos < pos)
3605f2eab64SJohn Marino 		{
3615f2eab64SJohn Marino 		  if (trans_i->assertions
3625f2eab64SJohn Marino 		      && CHECK_ASSERTIONS(trans_i->assertions))
3635f2eab64SJohn Marino 		    {
3645f2eab64SJohn Marino 		      DPRINT(("assertion failed\n"));
3655f2eab64SJohn Marino 		      trans_i++;
3665f2eab64SJohn Marino 		      continue;
3675f2eab64SJohn Marino 		    }
3685f2eab64SJohn Marino 
3695f2eab64SJohn Marino 		  DPRINT((" %p", (void *)trans_i->state));
3705f2eab64SJohn Marino 		  reach_next_i->state = trans_i->state;
371*d5f8dde1SJohn Marino 		  memset(reach_next_i->tags, 0, tbytes);
3725f2eab64SJohn Marino 		  tag_i = trans_i->tags;
3735f2eab64SJohn Marino 		  if (tag_i)
374*d5f8dde1SJohn Marino 		    {
3755f2eab64SJohn Marino 		      while (*tag_i >= 0)
3765f2eab64SJohn Marino 			{
3775f2eab64SJohn Marino 			  if (*tag_i < num_tags)
378*d5f8dde1SJohn Marino 			    tre_tag_set(reach_next_i->tags, *tag_i, pos, touch);
3795f2eab64SJohn Marino 			  tag_i++;
3805f2eab64SJohn Marino 			}
381*d5f8dde1SJohn Marino 			touch++;
382*d5f8dde1SJohn Marino 		    }
3835f2eab64SJohn Marino 		  if (reach_next_i->state == tnfa->final)
3845f2eab64SJohn Marino 		    {
3855f2eab64SJohn Marino 		      DPRINT(("	 found empty match\n"));
3865f2eab64SJohn Marino 		      match_eo = pos;
387*d5f8dde1SJohn Marino 		      memcpy(match_tags, reach_next_i->tags, tbytes);
3885f2eab64SJohn Marino 		    }
3895f2eab64SJohn Marino 		  reach_pos[trans_i->state_id].pos = pos;
3905f2eab64SJohn Marino 		  reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
3915f2eab64SJohn Marino 		  reach_next_i++;
3925f2eab64SJohn Marino 		}
3935f2eab64SJohn Marino 	      trans_i++;
3945f2eab64SJohn Marino 	    }
3955f2eab64SJohn Marino 	  DPRINT(("\n"));
3965f2eab64SJohn Marino 	  reach_next_i->state = NULL;
3975f2eab64SJohn Marino 	}
3985f2eab64SJohn Marino       else
3995f2eab64SJohn Marino 	{
4005f2eab64SJohn Marino 	  if (num_tags == 0 || reach_next_i == reach_next)
4015f2eab64SJohn Marino 	    /*�We have found a match. */
4025f2eab64SJohn Marino 	    break;
4035f2eab64SJohn Marino 	}
4045f2eab64SJohn Marino 
4055f2eab64SJohn Marino       /* Check for end of string. */
4065f2eab64SJohn Marino       if (len < 0)
4075f2eab64SJohn Marino 	{
408*d5f8dde1SJohn Marino 	  if (next_c == L'\0')
4095f2eab64SJohn Marino 	    break;
4105f2eab64SJohn Marino 	}
4115f2eab64SJohn Marino       else
4125f2eab64SJohn Marino 	{
4135f2eab64SJohn Marino 	  if (pos >= len)
4145f2eab64SJohn Marino 	    break;
4155f2eab64SJohn Marino 	}
4165f2eab64SJohn Marino 
4175f2eab64SJohn Marino       GET_NEXT_WCHAR();
4185f2eab64SJohn Marino 
4195f2eab64SJohn Marino #ifdef TRE_DEBUG
4205f2eab64SJohn Marino       DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
4215f2eab64SJohn Marino       tre_print_reach(tnfa, reach_next, num_tags);
4225f2eab64SJohn Marino #endif /* TRE_DEBUG */
4235f2eab64SJohn Marino 
4245f2eab64SJohn Marino       /* Swap `reach' and `reach_next'. */
4255f2eab64SJohn Marino       reach_i = reach;
4265f2eab64SJohn Marino       reach = reach_next;
4275f2eab64SJohn Marino       reach_next = reach_i;
4285f2eab64SJohn Marino 
429*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
430*d5f8dde1SJohn Marino       once = 0;
431*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
4325f2eab64SJohn Marino 
4335f2eab64SJohn Marino       /* For each state in `reach' see if there is a transition leaving with
4345f2eab64SJohn Marino 	 the current input symbol to a state not yet in `reach_next', and
4355f2eab64SJohn Marino 	 add the destination states to `reach_next'. */
4365f2eab64SJohn Marino       reach_next_i = reach_next;
4375f2eab64SJohn Marino       for (reach_i = reach; reach_i->state; reach_i++)
4385f2eab64SJohn Marino 	{
4395f2eab64SJohn Marino 	  for (trans_i = reach_i->state; trans_i->state; trans_i++)
4405f2eab64SJohn Marino 	    {
4415f2eab64SJohn Marino 	      /* Does this transition match the input symbol? */
4425f2eab64SJohn Marino 	      if (trans_i->code_min <= (tre_cint_t)prev_c &&
4435f2eab64SJohn Marino 		  trans_i->code_max >= (tre_cint_t)prev_c)
4445f2eab64SJohn Marino 		{
4455f2eab64SJohn Marino 		  if (trans_i->assertions
4465f2eab64SJohn Marino 		      && (CHECK_ASSERTIONS(trans_i->assertions)
4475f2eab64SJohn Marino 			  || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
4485f2eab64SJohn Marino 		    {
4495f2eab64SJohn Marino 		      DPRINT(("assertion failed\n"));
4505f2eab64SJohn Marino 		      continue;
4515f2eab64SJohn Marino 		    }
4525f2eab64SJohn Marino 
4535f2eab64SJohn Marino 		  /* Compute the tags after this transition. */
454*d5f8dde1SJohn Marino 		  memcpy(tmp_tags, reach_i->tags, tbytes);
4555f2eab64SJohn Marino 		  tag_i = trans_i->tags;
4565f2eab64SJohn Marino 		  if (tag_i != NULL)
457*d5f8dde1SJohn Marino 		    {
4585f2eab64SJohn Marino 		      while (*tag_i >= 0)
4595f2eab64SJohn Marino 			{
4605f2eab64SJohn Marino 			  if (*tag_i < num_tags)
461*d5f8dde1SJohn Marino 			    tre_tag_set(tmp_tags, *tag_i, pos, touch);
4625f2eab64SJohn Marino 			  tag_i++;
4635f2eab64SJohn Marino 			}
464*d5f8dde1SJohn Marino 			touch++;
465*d5f8dde1SJohn Marino 		    }
466*d5f8dde1SJohn Marino 
467*d5f8dde1SJohn Marino 		  /* For each new transition, weed out those that don't
468*d5f8dde1SJohn Marino 		     fulfill the minimal matching conditions. */
469*d5f8dde1SJohn Marino 		  if (tnfa->num_minimals && match_eo >= 0)
470*d5f8dde1SJohn Marino 		    {
471*d5f8dde1SJohn Marino 		      int skip = 0;
472*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
473*d5f8dde1SJohn Marino 		      if (!once)
474*d5f8dde1SJohn Marino 			{
475*d5f8dde1SJohn Marino 			  DPRINT(("Checking minimal conditions: match_eo=%d "
476*d5f8dde1SJohn Marino 				  "match_tags=", match_eo));
477*d5f8dde1SJohn Marino 			  tre_print_tags(match_tags, num_tags);
478*d5f8dde1SJohn Marino 			  DPRINT(("\n"));
479*d5f8dde1SJohn Marino 			  once++;
480*d5f8dde1SJohn Marino 			}
481*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
482*d5f8dde1SJohn Marino 		      for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
483*d5f8dde1SJohn Marino 			{
484*d5f8dde1SJohn Marino 			   int end = tnfa->minimal_tags[i];
485*d5f8dde1SJohn Marino 			   int start = tnfa->minimal_tags[i + 1];
486*d5f8dde1SJohn Marino 			   DPRINT(("  Minimal start %d, end %d\n", start, end));
487*d5f8dde1SJohn Marino 			   if (tre_minimal_tag_order(start, end, match_tags,
488*d5f8dde1SJohn Marino 						     tmp_tags) > 0)
489*d5f8dde1SJohn Marino 			     {
490*d5f8dde1SJohn Marino 				skip = 1;
491*d5f8dde1SJohn Marino 				break;
492*d5f8dde1SJohn Marino 			     }
493*d5f8dde1SJohn Marino 			}
494*d5f8dde1SJohn Marino 		      if (skip)
495*d5f8dde1SJohn Marino 			{
496*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
497*d5f8dde1SJohn Marino 			   DPRINT(("	 Throwing out"));
498*d5f8dde1SJohn Marino 			   tre_print_reach1(reach_i->state, tmp_tags,
499*d5f8dde1SJohn Marino 					     num_tags);
500*d5f8dde1SJohn Marino 			   DPRINT(("\n"));
501*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
502*d5f8dde1SJohn Marino 			   continue;
503*d5f8dde1SJohn Marino 			}
504*d5f8dde1SJohn Marino 		    }
5055f2eab64SJohn Marino 
5065f2eab64SJohn Marino 		  if (reach_pos[trans_i->state_id].pos < pos)
5075f2eab64SJohn Marino 		    {
5085f2eab64SJohn Marino 		      /* Found an unvisited node. */
5095f2eab64SJohn Marino 		      reach_next_i->state = trans_i->state;
5105f2eab64SJohn Marino 		      tmp_iptr = reach_next_i->tags;
5115f2eab64SJohn Marino 		      reach_next_i->tags = tmp_tags;
5125f2eab64SJohn Marino 		      tmp_tags = tmp_iptr;
5135f2eab64SJohn Marino 		      reach_pos[trans_i->state_id].pos = pos;
5145f2eab64SJohn Marino 		      reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
5155f2eab64SJohn Marino 
5165f2eab64SJohn Marino 		      if (reach_next_i->state == tnfa->final
5175f2eab64SJohn Marino 			  && (match_eo == -1
5185f2eab64SJohn Marino 			      || (num_tags > 0
519*d5f8dde1SJohn Marino 				  && tre_tag_get(reach_next_i->tags, 0) <=
520*d5f8dde1SJohn Marino 				  tre_tag_get(match_tags, 0))))
5215f2eab64SJohn Marino 			{
522*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
523*d5f8dde1SJohn Marino 			  DPRINT(("  found match"));
524*d5f8dde1SJohn Marino 			  tre_print_reach1(trans_i->state, reach_next_i->tags, num_tags);
525*d5f8dde1SJohn Marino 			  DPRINT(("\n"));
526*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
5275f2eab64SJohn Marino 			  match_eo = pos;
528*d5f8dde1SJohn Marino 			  memcpy(match_tags, reach_next_i->tags, tbytes);
5295f2eab64SJohn Marino 			}
5305f2eab64SJohn Marino 		      reach_next_i++;
5315f2eab64SJohn Marino 
5325f2eab64SJohn Marino 		    }
5335f2eab64SJohn Marino 		  else
5345f2eab64SJohn Marino 		    {
5355f2eab64SJohn Marino 		      assert(reach_pos[trans_i->state_id].pos == pos);
5365f2eab64SJohn Marino 		      /* Another path has also reached this state.  We choose
5375f2eab64SJohn Marino 			 the winner by examining the tag values for both
5385f2eab64SJohn Marino 			 paths. */
5395f2eab64SJohn Marino 		      if (tre_tag_order(num_tags, tnfa->tag_directions,
5405f2eab64SJohn Marino 					tmp_tags,
5415f2eab64SJohn Marino 					*reach_pos[trans_i->state_id].tags))
5425f2eab64SJohn Marino 			{
5435f2eab64SJohn Marino 			  /* The new path wins. */
5445f2eab64SJohn Marino 			  tmp_iptr = *reach_pos[trans_i->state_id].tags;
5455f2eab64SJohn Marino 			  *reach_pos[trans_i->state_id].tags = tmp_tags;
5465f2eab64SJohn Marino 			  if (trans_i->state == tnfa->final)
5475f2eab64SJohn Marino 			    {
548*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
549*d5f8dde1SJohn Marino 			      DPRINT(("	 found better match"));
550*d5f8dde1SJohn Marino 			      tre_print_reach1(trans_i->state, tmp_tags, num_tags);
551*d5f8dde1SJohn Marino 			      DPRINT(("\n"));
552*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
5535f2eab64SJohn Marino 			      match_eo = pos;
554*d5f8dde1SJohn Marino 			      memcpy(match_tags, tmp_tags, tbytes);
5555f2eab64SJohn Marino 			    }
5565f2eab64SJohn Marino 			  tmp_tags = tmp_iptr;
5575f2eab64SJohn Marino 			}
5585f2eab64SJohn Marino 		    }
5595f2eab64SJohn Marino 		}
5605f2eab64SJohn Marino 	    }
5615f2eab64SJohn Marino 	}
5625f2eab64SJohn Marino       reach_next_i->state = NULL;
5635f2eab64SJohn Marino     }
5645f2eab64SJohn Marino 
5655f2eab64SJohn Marino   DPRINT(("match end offset = %d\n", match_eo));
5665f2eab64SJohn Marino 
567*d5f8dde1SJohn Marino   *match_end_ofs = match_eo;
568*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
569*d5f8dde1SJohn Marino   if (match_tags)
570*d5f8dde1SJohn Marino     {
571*d5f8dde1SJohn Marino       DPRINT(("Winning tags="));
572*d5f8dde1SJohn Marino       tre_print_tags_all(match_tags, num_tags);
573*d5f8dde1SJohn Marino       DPRINT((" touch=%d\n", touch));
574*d5f8dde1SJohn Marino     }
575*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
576*d5f8dde1SJohn Marino 
5775f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
5785f2eab64SJohn Marino   if (buf)
5795f2eab64SJohn Marino     xfree(buf);
5805f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
5815f2eab64SJohn Marino 
5825f2eab64SJohn Marino   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
5835f2eab64SJohn Marino }
5845f2eab64SJohn Marino 
5855f2eab64SJohn Marino /* EOF */
586