15f2eab64SJohn Marino /*
25f2eab64SJohn Marino tre-match-parallel.c - TRE parallel regex matching engine
35f2eab64SJohn Marino
45f2eab64SJohn Marino This software is released under a BSD-style license.
55f2eab64SJohn Marino See the file LICENSE for details and copyright.
65f2eab64SJohn Marino
75f2eab64SJohn Marino */
85f2eab64SJohn Marino
95f2eab64SJohn Marino /*
105f2eab64SJohn Marino This algorithm searches for matches basically by reading characters
115f2eab64SJohn Marino in the searched string one by one, starting at the beginning. All
125f2eab64SJohn Marino matching paths in the TNFA are traversed in parallel. When two or
135f2eab64SJohn Marino more paths reach the same state, exactly one is chosen according to
145f2eab64SJohn Marino tag ordering rules; if returning submatches is not required it does
155f2eab64SJohn Marino not matter which path is chosen.
165f2eab64SJohn Marino
175f2eab64SJohn Marino The worst case time required for finding the leftmost and longest
185f2eab64SJohn Marino match, or determining that there is no match, is always linearly
195f2eab64SJohn Marino dependent on the length of the text being searched.
205f2eab64SJohn Marino
215f2eab64SJohn Marino This algorithm cannot handle TNFAs with back referencing nodes.
225f2eab64SJohn Marino See `tre-match-backtrack.c'.
235f2eab64SJohn Marino */
245f2eab64SJohn Marino
255f2eab64SJohn Marino
265f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
275f2eab64SJohn Marino #include <config.h>
285f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
295f2eab64SJohn Marino
30*d5f8dde1SJohn Marino /* Unset TRE_USE_ALLOCA to avoid using the stack to hold all the state
31*d5f8dde1SJohn Marino info while running */
32*d5f8dde1SJohn Marino #undef TRE_USE_ALLOCA
33*d5f8dde1SJohn Marino
345f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
355f2eab64SJohn Marino /* AIX requires this to be the first thing in the file. */
365f2eab64SJohn Marino #ifndef __GNUC__
375f2eab64SJohn Marino # if HAVE_ALLOCA_H
385f2eab64SJohn Marino # include <alloca.h>
395f2eab64SJohn Marino # else
405f2eab64SJohn Marino # ifdef _AIX
415f2eab64SJohn Marino #pragma alloca
425f2eab64SJohn Marino # else
435f2eab64SJohn Marino # ifndef alloca /* predefined by HP cc +Olibcalls */
445f2eab64SJohn Marino char *alloca ();
455f2eab64SJohn Marino # endif
465f2eab64SJohn Marino # endif
475f2eab64SJohn Marino # endif
485f2eab64SJohn Marino #endif
495f2eab64SJohn Marino #endif /* TRE_USE_ALLOCA */
505f2eab64SJohn Marino
515f2eab64SJohn Marino #include <assert.h>
525f2eab64SJohn Marino #include <stdlib.h>
535f2eab64SJohn Marino #include <string.h>
545f2eab64SJohn Marino #ifdef HAVE_WCHAR_H
555f2eab64SJohn Marino #include <wchar.h>
565f2eab64SJohn Marino #endif /* HAVE_WCHAR_H */
575f2eab64SJohn Marino #ifdef HAVE_WCTYPE_H
585f2eab64SJohn Marino #include <wctype.h>
595f2eab64SJohn Marino #endif /* HAVE_WCTYPE_H */
605f2eab64SJohn Marino #ifndef TRE_WCHAR
615f2eab64SJohn Marino #include <ctype.h>
625f2eab64SJohn Marino #endif /* !TRE_WCHAR */
635f2eab64SJohn Marino #ifdef HAVE_MALLOC_H
645f2eab64SJohn Marino #include <malloc.h>
655f2eab64SJohn Marino #endif /* HAVE_MALLOC_H */
665f2eab64SJohn Marino
675f2eab64SJohn Marino #include "tre-internal.h"
685f2eab64SJohn Marino #include "tre-match-utils.h"
695f2eab64SJohn Marino #include "tre.h"
705f2eab64SJohn Marino #include "xmalloc.h"
715f2eab64SJohn Marino
725f2eab64SJohn Marino
735f2eab64SJohn Marino
745f2eab64SJohn Marino typedef struct {
755f2eab64SJohn Marino tre_tnfa_transition_t *state;
76*d5f8dde1SJohn Marino tre_tag_t *tags;
775f2eab64SJohn Marino } tre_tnfa_reach_t;
785f2eab64SJohn Marino
795f2eab64SJohn Marino typedef struct {
805f2eab64SJohn Marino int pos;
81*d5f8dde1SJohn Marino tre_tag_t **tags;
825f2eab64SJohn Marino } tre_reach_pos_t;
835f2eab64SJohn Marino
845f2eab64SJohn Marino
855f2eab64SJohn Marino #ifdef TRE_DEBUG
865f2eab64SJohn Marino static void
tre_print_reach1(tre_tnfa_transition_t * state,tre_tag_t * tags,int num_tags)87*d5f8dde1SJohn Marino tre_print_reach1(tre_tnfa_transition_t *state, tre_tag_t *tags, int num_tags)
885f2eab64SJohn Marino {
89*d5f8dde1SJohn Marino DPRINT((" %p", (void *)state));
905f2eab64SJohn Marino if (num_tags > 0)
915f2eab64SJohn Marino {
925f2eab64SJohn Marino DPRINT(("/"));
93*d5f8dde1SJohn Marino tre_print_tags(tags, num_tags);
94*d5f8dde1SJohn Marino }
95*d5f8dde1SJohn Marino }
96*d5f8dde1SJohn Marino
97*d5f8dde1SJohn Marino static void
tre_print_reach(const tre_tnfa_t * tnfa,tre_tnfa_reach_t * reach,int num_tags)98*d5f8dde1SJohn Marino tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
995f2eab64SJohn Marino {
100*d5f8dde1SJohn Marino while (reach->state != NULL)
101*d5f8dde1SJohn Marino {
102*d5f8dde1SJohn Marino tre_print_reach1(reach->state, reach->tags, num_tags);
1035f2eab64SJohn Marino reach++;
1045f2eab64SJohn Marino }
1055f2eab64SJohn Marino DPRINT(("\n"));
1065f2eab64SJohn Marino
1075f2eab64SJohn Marino }
1085f2eab64SJohn Marino #endif /* TRE_DEBUG */
1095f2eab64SJohn Marino
1105f2eab64SJohn Marino reg_errcode_t
tre_tnfa_run_parallel(const tre_tnfa_t * tnfa,const void * string,int len,tre_str_type_t type,tre_tag_t * match_tags,int eflags,int * match_end_ofs)1115f2eab64SJohn Marino tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
112*d5f8dde1SJohn Marino tre_str_type_t type, tre_tag_t *match_tags, int eflags,
1135f2eab64SJohn Marino int *match_end_ofs)
1145f2eab64SJohn Marino {
1155f2eab64SJohn Marino /* State variables required by GET_NEXT_WCHAR. */
1165f2eab64SJohn Marino tre_char_t prev_c = 0, next_c = 0;
1175f2eab64SJohn Marino const char *str_byte = string;
1185f2eab64SJohn Marino int pos = -1;
1195f2eab64SJohn Marino unsigned int pos_add_next = 1;
1205f2eab64SJohn Marino #ifdef TRE_WCHAR
1215f2eab64SJohn Marino const wchar_t *str_wide = string;
1225f2eab64SJohn Marino #ifdef TRE_MBSTATE
1235f2eab64SJohn Marino mbstate_t mbstate;
1245f2eab64SJohn Marino #endif /* TRE_MBSTATE */
1255f2eab64SJohn Marino #endif /* TRE_WCHAR */
1265f2eab64SJohn Marino int reg_notbol = eflags & REG_NOTBOL;
1275f2eab64SJohn Marino int reg_noteol = eflags & REG_NOTEOL;
1285f2eab64SJohn Marino int reg_newline = tnfa->cflags & REG_NEWLINE;
1295f2eab64SJohn Marino
1305f2eab64SJohn Marino char *buf;
1315f2eab64SJohn Marino tre_tnfa_transition_t *trans_i;
1325f2eab64SJohn Marino tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
1335f2eab64SJohn Marino tre_reach_pos_t *reach_pos;
1345f2eab64SJohn Marino int *tag_i;
1355f2eab64SJohn Marino int num_tags, i;
1365f2eab64SJohn Marino
1375f2eab64SJohn Marino int match_eo = -1; /* end offset of match (-1 if no match found yet) */
138*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
139*d5f8dde1SJohn Marino int once;
140*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
141*d5f8dde1SJohn Marino tre_tag_t *tmp_tags = NULL;
142*d5f8dde1SJohn Marino tre_tag_t *tmp_iptr;
143*d5f8dde1SJohn Marino int tbytes;
144*d5f8dde1SJohn Marino int touch = 1;
1455f2eab64SJohn Marino
1465f2eab64SJohn Marino #ifdef TRE_MBSTATE
1475f2eab64SJohn Marino memset(&mbstate, '\0', sizeof(mbstate));
1485f2eab64SJohn Marino #endif /* TRE_MBSTATE */
1495f2eab64SJohn Marino
1505f2eab64SJohn Marino DPRINT(("tre_tnfa_run_parallel, input type %d\n", type));
1515f2eab64SJohn Marino
1525f2eab64SJohn Marino if (!match_tags)
1535f2eab64SJohn Marino num_tags = 0;
1545f2eab64SJohn Marino else
1555f2eab64SJohn Marino num_tags = tnfa->num_tags;
1565f2eab64SJohn Marino
1575f2eab64SJohn Marino /* Allocate memory for temporary data required for matching. This needs to
1585f2eab64SJohn Marino be done for every matching operation to be thread safe. This allocates
1595f2eab64SJohn Marino everything in a single large block from the stack frame using alloca()
1605f2eab64SJohn Marino or with malloc() if alloca is unavailable. */
1615f2eab64SJohn Marino {
162*d5f8dde1SJohn Marino int rbytes, pbytes, total_bytes;
1635f2eab64SJohn Marino char *tmp_buf;
1645f2eab64SJohn Marino /* Compute the length of the block we need. */
1655f2eab64SJohn Marino tbytes = sizeof(*tmp_tags) * num_tags;
1665f2eab64SJohn Marino rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
1675f2eab64SJohn Marino pbytes = sizeof(*reach_pos) * tnfa->num_states;
1685f2eab64SJohn Marino total_bytes =
1695f2eab64SJohn Marino (sizeof(long) - 1) * 4 /* for alignment paddings */
170*d5f8dde1SJohn Marino + (rbytes + tbytes * tnfa->num_states) * 2 + tbytes + pbytes;
1715f2eab64SJohn Marino
172*d5f8dde1SJohn Marino DPRINT(("tre_tnfa_run_parallel, allocate %d bytes\n", total_bytes));
1735f2eab64SJohn Marino /* Allocate the memory. */
1745f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
1755f2eab64SJohn Marino buf = alloca(total_bytes);
1765f2eab64SJohn Marino #else /* !TRE_USE_ALLOCA */
1775f2eab64SJohn Marino buf = xmalloc((unsigned)total_bytes);
1785f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
1795f2eab64SJohn Marino if (buf == NULL)
1805f2eab64SJohn Marino return REG_ESPACE;
1815f2eab64SJohn Marino memset(buf, 0, (size_t)total_bytes);
1825f2eab64SJohn Marino
1835f2eab64SJohn Marino /* Get the various pointers within tmp_buf (properly aligned). */
1845f2eab64SJohn Marino tmp_tags = (void *)buf;
1855f2eab64SJohn Marino tmp_buf = buf + tbytes;
1865f2eab64SJohn Marino tmp_buf += ALIGN(tmp_buf, long);
1875f2eab64SJohn Marino reach_next = (void *)tmp_buf;
1885f2eab64SJohn Marino tmp_buf += rbytes;
1895f2eab64SJohn Marino tmp_buf += ALIGN(tmp_buf, long);
1905f2eab64SJohn Marino reach = (void *)tmp_buf;
1915f2eab64SJohn Marino tmp_buf += rbytes;
1925f2eab64SJohn Marino tmp_buf += ALIGN(tmp_buf, long);
1935f2eab64SJohn Marino reach_pos = (void *)tmp_buf;
1945f2eab64SJohn Marino tmp_buf += pbytes;
1955f2eab64SJohn Marino tmp_buf += ALIGN(tmp_buf, long);
1965f2eab64SJohn Marino for (i = 0; i < tnfa->num_states; i++)
1975f2eab64SJohn Marino {
1985f2eab64SJohn Marino reach[i].tags = (void *)tmp_buf;
199*d5f8dde1SJohn Marino tmp_buf += tbytes;
2005f2eab64SJohn Marino reach_next[i].tags = (void *)tmp_buf;
201*d5f8dde1SJohn Marino tmp_buf += tbytes;
2025f2eab64SJohn Marino }
2035f2eab64SJohn Marino }
2045f2eab64SJohn Marino
2055f2eab64SJohn Marino for (i = 0; i < tnfa->num_states; i++)
2065f2eab64SJohn Marino reach_pos[i].pos = -1;
2075f2eab64SJohn Marino
2085f2eab64SJohn Marino /* If only one character can start a match, find it first. */
209*d5f8dde1SJohn Marino if (tnfa->first_char >= 0 && str_byte)
2105f2eab64SJohn Marino {
2115f2eab64SJohn Marino const char *orig_str = str_byte;
2125f2eab64SJohn Marino int first = tnfa->first_char;
213*d5f8dde1SJohn Marino int found_high_bit = 0;
2145f2eab64SJohn Marino
215*d5f8dde1SJohn Marino
216*d5f8dde1SJohn Marino if (type == STR_BYTE)
217*d5f8dde1SJohn Marino {
2185f2eab64SJohn Marino if (len >= 0)
2195f2eab64SJohn Marino str_byte = memchr(orig_str, first, (size_t)len);
2205f2eab64SJohn Marino else
2215f2eab64SJohn Marino str_byte = strchr(orig_str, first);
222*d5f8dde1SJohn Marino }
223*d5f8dde1SJohn Marino else if (type == STR_MBS)
224*d5f8dde1SJohn Marino {
225*d5f8dde1SJohn Marino /*
226*d5f8dde1SJohn Marino * If the match character is ASCII, try to match the character
227*d5f8dde1SJohn Marino * directly, but if a high bit character is found, we stop there.
228*d5f8dde1SJohn Marino */
229*d5f8dde1SJohn Marino if (first < 0x80)
230*d5f8dde1SJohn Marino {
231*d5f8dde1SJohn Marino if (len >= 0)
232*d5f8dde1SJohn Marino {
233*d5f8dde1SJohn Marino int i;
234*d5f8dde1SJohn Marino for (i = 0; ; str_byte++, i++)
235*d5f8dde1SJohn Marino {
236*d5f8dde1SJohn Marino if (i >= len)
237*d5f8dde1SJohn Marino {
238*d5f8dde1SJohn Marino str_byte = NULL;
239*d5f8dde1SJohn Marino break;
240*d5f8dde1SJohn Marino }
241*d5f8dde1SJohn Marino if (*str_byte == first)
242*d5f8dde1SJohn Marino break;
243*d5f8dde1SJohn Marino if (*str_byte & 0x80)
244*d5f8dde1SJohn Marino {
245*d5f8dde1SJohn Marino found_high_bit = 1;
246*d5f8dde1SJohn Marino break;
247*d5f8dde1SJohn Marino }
248*d5f8dde1SJohn Marino }
249*d5f8dde1SJohn Marino }
250*d5f8dde1SJohn Marino else
251*d5f8dde1SJohn Marino {
252*d5f8dde1SJohn Marino for (; ; str_byte++)
253*d5f8dde1SJohn Marino {
254*d5f8dde1SJohn Marino if (!*str_byte)
255*d5f8dde1SJohn Marino {
256*d5f8dde1SJohn Marino str_byte = NULL;
257*d5f8dde1SJohn Marino break;
258*d5f8dde1SJohn Marino }
259*d5f8dde1SJohn Marino if (*str_byte == first)
260*d5f8dde1SJohn Marino break;
261*d5f8dde1SJohn Marino if (*str_byte & 0x80)
262*d5f8dde1SJohn Marino {
263*d5f8dde1SJohn Marino found_high_bit = 1;
264*d5f8dde1SJohn Marino break;
265*d5f8dde1SJohn Marino }
266*d5f8dde1SJohn Marino }
267*d5f8dde1SJohn Marino }
268*d5f8dde1SJohn Marino }
269*d5f8dde1SJohn Marino else
270*d5f8dde1SJohn Marino {
271*d5f8dde1SJohn Marino if (len >= 0)
272*d5f8dde1SJohn Marino {
273*d5f8dde1SJohn Marino int i;
274*d5f8dde1SJohn Marino for (i = 0; ; str_byte++, i++)
275*d5f8dde1SJohn Marino {
276*d5f8dde1SJohn Marino if (i >= len)
277*d5f8dde1SJohn Marino {
278*d5f8dde1SJohn Marino str_byte = NULL;
279*d5f8dde1SJohn Marino break;
280*d5f8dde1SJohn Marino }
281*d5f8dde1SJohn Marino if (*str_byte & 0x80)
282*d5f8dde1SJohn Marino {
283*d5f8dde1SJohn Marino found_high_bit = 1;
284*d5f8dde1SJohn Marino break;
285*d5f8dde1SJohn Marino }
286*d5f8dde1SJohn Marino }
287*d5f8dde1SJohn Marino }
288*d5f8dde1SJohn Marino else
289*d5f8dde1SJohn Marino {
290*d5f8dde1SJohn Marino for (; ; str_byte++)
291*d5f8dde1SJohn Marino {
292*d5f8dde1SJohn Marino if (!*str_byte)
293*d5f8dde1SJohn Marino {
294*d5f8dde1SJohn Marino str_byte = NULL;
295*d5f8dde1SJohn Marino break;
296*d5f8dde1SJohn Marino }
297*d5f8dde1SJohn Marino if (*str_byte & 0x80)
298*d5f8dde1SJohn Marino {
299*d5f8dde1SJohn Marino found_high_bit = 1;
300*d5f8dde1SJohn Marino break;
301*d5f8dde1SJohn Marino }
302*d5f8dde1SJohn Marino }
303*d5f8dde1SJohn Marino }
304*d5f8dde1SJohn Marino }
305*d5f8dde1SJohn Marino }
3065f2eab64SJohn Marino if (str_byte == NULL)
3075f2eab64SJohn Marino {
3085f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
3095f2eab64SJohn Marino if (buf)
3105f2eab64SJohn Marino xfree(buf);
3115f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
3125f2eab64SJohn Marino return REG_NOMATCH;
3135f2eab64SJohn Marino }
3145f2eab64SJohn Marino DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str)));
315*d5f8dde1SJohn Marino if (!found_high_bit)
316*d5f8dde1SJohn Marino {
3175f2eab64SJohn Marino if (str_byte >= orig_str + 1)
3185f2eab64SJohn Marino prev_c = (unsigned char)*(str_byte - 1);
3195f2eab64SJohn Marino next_c = (unsigned char)*str_byte;
3205f2eab64SJohn Marino pos = str_byte - orig_str;
3215f2eab64SJohn Marino if (len < 0 || pos < len)
3225f2eab64SJohn Marino str_byte++;
3235f2eab64SJohn Marino }
3245f2eab64SJohn Marino else
3255f2eab64SJohn Marino {
326*d5f8dde1SJohn Marino if (str_byte == orig_str)
327*d5f8dde1SJohn Marino goto no_first_optimization;
328*d5f8dde1SJohn Marino /*
329*d5f8dde1SJohn Marino * Back up one character, fix up the position, then call
330*d5f8dde1SJohn Marino * GET_NEXT_WCHAR() to process the multibyte character.
331*d5f8dde1SJohn Marino */
332*d5f8dde1SJohn Marino /* no need to set prev_c, since GET_NEXT_WCHAR will overwrite */
333*d5f8dde1SJohn Marino next_c = (unsigned char)*(str_byte - 1);
334*d5f8dde1SJohn Marino pos = (str_byte - 1) - orig_str;
3355f2eab64SJohn Marino GET_NEXT_WCHAR();
3365f2eab64SJohn Marino }
3375f2eab64SJohn Marino }
3385f2eab64SJohn Marino else
3395f2eab64SJohn Marino {
340*d5f8dde1SJohn Marino no_first_optimization:
341*d5f8dde1SJohn Marino GET_NEXT_WCHAR();
342*d5f8dde1SJohn Marino pos = 0;
3435f2eab64SJohn Marino }
3445f2eab64SJohn Marino
3455f2eab64SJohn Marino DPRINT(("length: %d\n", len));
3465f2eab64SJohn Marino DPRINT(("pos:chr/code | states and tags\n"));
3475f2eab64SJohn Marino DPRINT(("-------------+------------------------------------------------\n"));
3485f2eab64SJohn Marino
3495f2eab64SJohn Marino reach_next_i = reach_next;
3505f2eab64SJohn Marino while (/*CONSTCOND*/1)
3515f2eab64SJohn Marino {
3525f2eab64SJohn Marino /* If no match found yet, add the initial states to `reach_next'. */
3535f2eab64SJohn Marino if (match_eo < 0)
3545f2eab64SJohn Marino {
3555f2eab64SJohn Marino DPRINT((" init >"));
3565f2eab64SJohn Marino trans_i = tnfa->initial;
3575f2eab64SJohn Marino while (trans_i->state != NULL)
3585f2eab64SJohn Marino {
3595f2eab64SJohn Marino if (reach_pos[trans_i->state_id].pos < pos)
3605f2eab64SJohn Marino {
3615f2eab64SJohn Marino if (trans_i->assertions
3625f2eab64SJohn Marino && CHECK_ASSERTIONS(trans_i->assertions))
3635f2eab64SJohn Marino {
3645f2eab64SJohn Marino DPRINT(("assertion failed\n"));
3655f2eab64SJohn Marino trans_i++;
3665f2eab64SJohn Marino continue;
3675f2eab64SJohn Marino }
3685f2eab64SJohn Marino
3695f2eab64SJohn Marino DPRINT((" %p", (void *)trans_i->state));
3705f2eab64SJohn Marino reach_next_i->state = trans_i->state;
371*d5f8dde1SJohn Marino memset(reach_next_i->tags, 0, tbytes);
3725f2eab64SJohn Marino tag_i = trans_i->tags;
3735f2eab64SJohn Marino if (tag_i)
374*d5f8dde1SJohn Marino {
3755f2eab64SJohn Marino while (*tag_i >= 0)
3765f2eab64SJohn Marino {
3775f2eab64SJohn Marino if (*tag_i < num_tags)
378*d5f8dde1SJohn Marino tre_tag_set(reach_next_i->tags, *tag_i, pos, touch);
3795f2eab64SJohn Marino tag_i++;
3805f2eab64SJohn Marino }
381*d5f8dde1SJohn Marino touch++;
382*d5f8dde1SJohn Marino }
3835f2eab64SJohn Marino if (reach_next_i->state == tnfa->final)
3845f2eab64SJohn Marino {
3855f2eab64SJohn Marino DPRINT((" found empty match\n"));
3865f2eab64SJohn Marino match_eo = pos;
387*d5f8dde1SJohn Marino memcpy(match_tags, reach_next_i->tags, tbytes);
3885f2eab64SJohn Marino }
3895f2eab64SJohn Marino reach_pos[trans_i->state_id].pos = pos;
3905f2eab64SJohn Marino reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
3915f2eab64SJohn Marino reach_next_i++;
3925f2eab64SJohn Marino }
3935f2eab64SJohn Marino trans_i++;
3945f2eab64SJohn Marino }
3955f2eab64SJohn Marino DPRINT(("\n"));
3965f2eab64SJohn Marino reach_next_i->state = NULL;
3975f2eab64SJohn Marino }
3985f2eab64SJohn Marino else
3995f2eab64SJohn Marino {
4005f2eab64SJohn Marino if (num_tags == 0 || reach_next_i == reach_next)
4015f2eab64SJohn Marino /*�We have found a match. */
4025f2eab64SJohn Marino break;
4035f2eab64SJohn Marino }
4045f2eab64SJohn Marino
4055f2eab64SJohn Marino /* Check for end of string. */
4065f2eab64SJohn Marino if (len < 0)
4075f2eab64SJohn Marino {
408*d5f8dde1SJohn Marino if (next_c == L'\0')
4095f2eab64SJohn Marino break;
4105f2eab64SJohn Marino }
4115f2eab64SJohn Marino else
4125f2eab64SJohn Marino {
4135f2eab64SJohn Marino if (pos >= len)
4145f2eab64SJohn Marino break;
4155f2eab64SJohn Marino }
4165f2eab64SJohn Marino
4175f2eab64SJohn Marino GET_NEXT_WCHAR();
4185f2eab64SJohn Marino
4195f2eab64SJohn Marino #ifdef TRE_DEBUG
4205f2eab64SJohn Marino DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
4215f2eab64SJohn Marino tre_print_reach(tnfa, reach_next, num_tags);
4225f2eab64SJohn Marino #endif /* TRE_DEBUG */
4235f2eab64SJohn Marino
4245f2eab64SJohn Marino /* Swap `reach' and `reach_next'. */
4255f2eab64SJohn Marino reach_i = reach;
4265f2eab64SJohn Marino reach = reach_next;
4275f2eab64SJohn Marino reach_next = reach_i;
4285f2eab64SJohn Marino
429*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
430*d5f8dde1SJohn Marino once = 0;
431*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
4325f2eab64SJohn Marino
4335f2eab64SJohn Marino /* For each state in `reach' see if there is a transition leaving with
4345f2eab64SJohn Marino the current input symbol to a state not yet in `reach_next', and
4355f2eab64SJohn Marino add the destination states to `reach_next'. */
4365f2eab64SJohn Marino reach_next_i = reach_next;
4375f2eab64SJohn Marino for (reach_i = reach; reach_i->state; reach_i++)
4385f2eab64SJohn Marino {
4395f2eab64SJohn Marino for (trans_i = reach_i->state; trans_i->state; trans_i++)
4405f2eab64SJohn Marino {
4415f2eab64SJohn Marino /* Does this transition match the input symbol? */
4425f2eab64SJohn Marino if (trans_i->code_min <= (tre_cint_t)prev_c &&
4435f2eab64SJohn Marino trans_i->code_max >= (tre_cint_t)prev_c)
4445f2eab64SJohn Marino {
4455f2eab64SJohn Marino if (trans_i->assertions
4465f2eab64SJohn Marino && (CHECK_ASSERTIONS(trans_i->assertions)
4475f2eab64SJohn Marino || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
4485f2eab64SJohn Marino {
4495f2eab64SJohn Marino DPRINT(("assertion failed\n"));
4505f2eab64SJohn Marino continue;
4515f2eab64SJohn Marino }
4525f2eab64SJohn Marino
4535f2eab64SJohn Marino /* Compute the tags after this transition. */
454*d5f8dde1SJohn Marino memcpy(tmp_tags, reach_i->tags, tbytes);
4555f2eab64SJohn Marino tag_i = trans_i->tags;
4565f2eab64SJohn Marino if (tag_i != NULL)
457*d5f8dde1SJohn Marino {
4585f2eab64SJohn Marino while (*tag_i >= 0)
4595f2eab64SJohn Marino {
4605f2eab64SJohn Marino if (*tag_i < num_tags)
461*d5f8dde1SJohn Marino tre_tag_set(tmp_tags, *tag_i, pos, touch);
4625f2eab64SJohn Marino tag_i++;
4635f2eab64SJohn Marino }
464*d5f8dde1SJohn Marino touch++;
465*d5f8dde1SJohn Marino }
466*d5f8dde1SJohn Marino
467*d5f8dde1SJohn Marino /* For each new transition, weed out those that don't
468*d5f8dde1SJohn Marino fulfill the minimal matching conditions. */
469*d5f8dde1SJohn Marino if (tnfa->num_minimals && match_eo >= 0)
470*d5f8dde1SJohn Marino {
471*d5f8dde1SJohn Marino int skip = 0;
472*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
473*d5f8dde1SJohn Marino if (!once)
474*d5f8dde1SJohn Marino {
475*d5f8dde1SJohn Marino DPRINT(("Checking minimal conditions: match_eo=%d "
476*d5f8dde1SJohn Marino "match_tags=", match_eo));
477*d5f8dde1SJohn Marino tre_print_tags(match_tags, num_tags);
478*d5f8dde1SJohn Marino DPRINT(("\n"));
479*d5f8dde1SJohn Marino once++;
480*d5f8dde1SJohn Marino }
481*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
482*d5f8dde1SJohn Marino for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
483*d5f8dde1SJohn Marino {
484*d5f8dde1SJohn Marino int end = tnfa->minimal_tags[i];
485*d5f8dde1SJohn Marino int start = tnfa->minimal_tags[i + 1];
486*d5f8dde1SJohn Marino DPRINT((" Minimal start %d, end %d\n", start, end));
487*d5f8dde1SJohn Marino if (tre_minimal_tag_order(start, end, match_tags,
488*d5f8dde1SJohn Marino tmp_tags) > 0)
489*d5f8dde1SJohn Marino {
490*d5f8dde1SJohn Marino skip = 1;
491*d5f8dde1SJohn Marino break;
492*d5f8dde1SJohn Marino }
493*d5f8dde1SJohn Marino }
494*d5f8dde1SJohn Marino if (skip)
495*d5f8dde1SJohn Marino {
496*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
497*d5f8dde1SJohn Marino DPRINT((" Throwing out"));
498*d5f8dde1SJohn Marino tre_print_reach1(reach_i->state, tmp_tags,
499*d5f8dde1SJohn Marino num_tags);
500*d5f8dde1SJohn Marino DPRINT(("\n"));
501*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
502*d5f8dde1SJohn Marino continue;
503*d5f8dde1SJohn Marino }
504*d5f8dde1SJohn Marino }
5055f2eab64SJohn Marino
5065f2eab64SJohn Marino if (reach_pos[trans_i->state_id].pos < pos)
5075f2eab64SJohn Marino {
5085f2eab64SJohn Marino /* Found an unvisited node. */
5095f2eab64SJohn Marino reach_next_i->state = trans_i->state;
5105f2eab64SJohn Marino tmp_iptr = reach_next_i->tags;
5115f2eab64SJohn Marino reach_next_i->tags = tmp_tags;
5125f2eab64SJohn Marino tmp_tags = tmp_iptr;
5135f2eab64SJohn Marino reach_pos[trans_i->state_id].pos = pos;
5145f2eab64SJohn Marino reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
5155f2eab64SJohn Marino
5165f2eab64SJohn Marino if (reach_next_i->state == tnfa->final
5175f2eab64SJohn Marino && (match_eo == -1
5185f2eab64SJohn Marino || (num_tags > 0
519*d5f8dde1SJohn Marino && tre_tag_get(reach_next_i->tags, 0) <=
520*d5f8dde1SJohn Marino tre_tag_get(match_tags, 0))))
5215f2eab64SJohn Marino {
522*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
523*d5f8dde1SJohn Marino DPRINT((" found match"));
524*d5f8dde1SJohn Marino tre_print_reach1(trans_i->state, reach_next_i->tags, num_tags);
525*d5f8dde1SJohn Marino DPRINT(("\n"));
526*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
5275f2eab64SJohn Marino match_eo = pos;
528*d5f8dde1SJohn Marino memcpy(match_tags, reach_next_i->tags, tbytes);
5295f2eab64SJohn Marino }
5305f2eab64SJohn Marino reach_next_i++;
5315f2eab64SJohn Marino
5325f2eab64SJohn Marino }
5335f2eab64SJohn Marino else
5345f2eab64SJohn Marino {
5355f2eab64SJohn Marino assert(reach_pos[trans_i->state_id].pos == pos);
5365f2eab64SJohn Marino /* Another path has also reached this state. We choose
5375f2eab64SJohn Marino the winner by examining the tag values for both
5385f2eab64SJohn Marino paths. */
5395f2eab64SJohn Marino if (tre_tag_order(num_tags, tnfa->tag_directions,
5405f2eab64SJohn Marino tmp_tags,
5415f2eab64SJohn Marino *reach_pos[trans_i->state_id].tags))
5425f2eab64SJohn Marino {
5435f2eab64SJohn Marino /* The new path wins. */
5445f2eab64SJohn Marino tmp_iptr = *reach_pos[trans_i->state_id].tags;
5455f2eab64SJohn Marino *reach_pos[trans_i->state_id].tags = tmp_tags;
5465f2eab64SJohn Marino if (trans_i->state == tnfa->final)
5475f2eab64SJohn Marino {
548*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
549*d5f8dde1SJohn Marino DPRINT((" found better match"));
550*d5f8dde1SJohn Marino tre_print_reach1(trans_i->state, tmp_tags, num_tags);
551*d5f8dde1SJohn Marino DPRINT(("\n"));
552*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
5535f2eab64SJohn Marino match_eo = pos;
554*d5f8dde1SJohn Marino memcpy(match_tags, tmp_tags, tbytes);
5555f2eab64SJohn Marino }
5565f2eab64SJohn Marino tmp_tags = tmp_iptr;
5575f2eab64SJohn Marino }
5585f2eab64SJohn Marino }
5595f2eab64SJohn Marino }
5605f2eab64SJohn Marino }
5615f2eab64SJohn Marino }
5625f2eab64SJohn Marino reach_next_i->state = NULL;
5635f2eab64SJohn Marino }
5645f2eab64SJohn Marino
5655f2eab64SJohn Marino DPRINT(("match end offset = %d\n", match_eo));
5665f2eab64SJohn Marino
567*d5f8dde1SJohn Marino *match_end_ofs = match_eo;
568*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
569*d5f8dde1SJohn Marino if (match_tags)
570*d5f8dde1SJohn Marino {
571*d5f8dde1SJohn Marino DPRINT(("Winning tags="));
572*d5f8dde1SJohn Marino tre_print_tags_all(match_tags, num_tags);
573*d5f8dde1SJohn Marino DPRINT((" touch=%d\n", touch));
574*d5f8dde1SJohn Marino }
575*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
576*d5f8dde1SJohn Marino
5775f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
5785f2eab64SJohn Marino if (buf)
5795f2eab64SJohn Marino xfree(buf);
5805f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
5815f2eab64SJohn Marino
5825f2eab64SJohn Marino return match_eo >= 0 ? REG_OK : REG_NOMATCH;
5835f2eab64SJohn Marino }
5845f2eab64SJohn Marino
5855f2eab64SJohn Marino /* EOF */
586