15f2eab64SJohn Marino /*
25f2eab64SJohn Marino tre_regexec.c - TRE POSIX compatible matching functions (and more).
35f2eab64SJohn Marino
45f2eab64SJohn Marino This software is released under a BSD-style license.
55f2eab64SJohn Marino See the file LICENSE for details and copyright.
65f2eab64SJohn Marino
75f2eab64SJohn Marino */
85f2eab64SJohn Marino
95f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
105f2eab64SJohn Marino #include <config.h>
115f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
125f2eab64SJohn Marino
135f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
145f2eab64SJohn Marino /* AIX requires this to be the first thing in the file. */
155f2eab64SJohn Marino #ifndef __GNUC__
165f2eab64SJohn Marino # if HAVE_ALLOCA_H
175f2eab64SJohn Marino # include <alloca.h>
185f2eab64SJohn Marino # else
195f2eab64SJohn Marino # ifdef _AIX
205f2eab64SJohn Marino #pragma alloca
215f2eab64SJohn Marino # else
225f2eab64SJohn Marino # ifndef alloca /* predefined by HP cc +Olibcalls */
235f2eab64SJohn Marino char *alloca ();
245f2eab64SJohn Marino # endif
255f2eab64SJohn Marino # endif
265f2eab64SJohn Marino # endif
275f2eab64SJohn Marino #endif
285f2eab64SJohn Marino #endif /* TRE_USE_ALLOCA */
295f2eab64SJohn Marino
305f2eab64SJohn Marino #include <assert.h>
315f2eab64SJohn Marino #include <stdlib.h>
325f2eab64SJohn Marino #include <string.h>
335f2eab64SJohn Marino #ifdef HAVE_WCHAR_H
345f2eab64SJohn Marino #include <wchar.h>
355f2eab64SJohn Marino #endif /* HAVE_WCHAR_H */
365f2eab64SJohn Marino #ifdef HAVE_WCTYPE_H
375f2eab64SJohn Marino #include <wctype.h>
385f2eab64SJohn Marino #endif /* HAVE_WCTYPE_H */
395f2eab64SJohn Marino #ifndef TRE_WCHAR
405f2eab64SJohn Marino #include <ctype.h>
415f2eab64SJohn Marino #endif /* !TRE_WCHAR */
425f2eab64SJohn Marino #ifdef HAVE_MALLOC_H
435f2eab64SJohn Marino #include <malloc.h>
445f2eab64SJohn Marino #endif /* HAVE_MALLOC_H */
455f2eab64SJohn Marino #include <limits.h>
465f2eab64SJohn Marino
475f2eab64SJohn Marino #include "tre-internal.h"
48d5f8dde1SJohn Marino #include "tre-match-utils.h"
495f2eab64SJohn Marino #include "tre.h"
505f2eab64SJohn Marino #include "xmalloc.h"
515f2eab64SJohn Marino
525f2eab64SJohn Marino
53d5f8dde1SJohn Marino /* For each tre_last_matched_t in the lm array, find the last matched branch by
54d5f8dde1SJohn Marino comparing the touch value of the cmp_tag's. For all other branches, reset
55d5f8dde1SJohn Marino the corresponding tags. If reset_all is non-zero, reset all tags in all
56d5f8dde1SJohn Marino branches. Recurse into the nested last matched structures, clearing tags as
57d5f8dde1SJohn Marino apprpriate. */
58d5f8dde1SJohn Marino static void
tre_reset_last_matched_branches(tre_tag_t * tags,const tre_last_matched_t * lm,int n,int start_tag,int reset_all)59d5f8dde1SJohn Marino tre_reset_last_matched_branches(tre_tag_t *tags, const tre_last_matched_t *lm,
60d5f8dde1SJohn Marino int n, int start_tag, int reset_all)
61d5f8dde1SJohn Marino {
62d5f8dde1SJohn Marino int max, i, reset;
63d5f8dde1SJohn Marino tre_last_matched_branch_t *b;
64d5f8dde1SJohn Marino
65d5f8dde1SJohn Marino DPRINT(("tre_reset_last_matched_branches: n=%d start_tag=%d reset_all=%d\n",
66d5f8dde1SJohn Marino n, start_tag, reset_all));
67d5f8dde1SJohn Marino for (; n-- > 0; lm++)
68d5f8dde1SJohn Marino {
69d5f8dde1SJohn Marino if (lm->n_branches == 1)
70d5f8dde1SJohn Marino {
71d5f8dde1SJohn Marino b = lm->branches;
72d5f8dde1SJohn Marino if (start_tag > 0)
73d5f8dde1SJohn Marino {
74d5f8dde1SJohn Marino DPRINT((" b->cmp_tag=%d %d <? %d\n", b->cmp_tag,
75d5f8dde1SJohn Marino tre_tag_touch_get(tags, b->cmp_tag),
76d5f8dde1SJohn Marino tre_tag_touch_get(tags, start_tag)));
77d5f8dde1SJohn Marino reset = (reset_all || tre_tag_touch_get(tags, b->cmp_tag) <
78d5f8dde1SJohn Marino tre_tag_touch_get(tags, start_tag));
79d5f8dde1SJohn Marino }
80d5f8dde1SJohn Marino else
81d5f8dde1SJohn Marino reset = 0;
82d5f8dde1SJohn Marino
83d5f8dde1SJohn Marino if (reset)
84d5f8dde1SJohn Marino {
85d5f8dde1SJohn Marino int *t;
86d5f8dde1SJohn Marino
87d5f8dde1SJohn Marino for (i = b->n_tags, t = b->tags; i > 0; i--, t++)
88d5f8dde1SJohn Marino {
89d5f8dde1SJohn Marino DPRINT((" Resetting t%d\n", *t));
90d5f8dde1SJohn Marino tre_tag_reset(tags, *t);
91d5f8dde1SJohn Marino }
92d5f8dde1SJohn Marino }
93d5f8dde1SJohn Marino if (b->n_last_matched > 0)
94d5f8dde1SJohn Marino tre_reset_last_matched_branches(tags, b->last_matched,
95d5f8dde1SJohn Marino b->n_last_matched,
96d5f8dde1SJohn Marino lm->start_tag, reset);
97d5f8dde1SJohn Marino }
98d5f8dde1SJohn Marino else
99d5f8dde1SJohn Marino {
100d5f8dde1SJohn Marino if (!reset_all)
101d5f8dde1SJohn Marino {
102d5f8dde1SJohn Marino #ifdef TRE_DEBUG
103d5f8dde1SJohn Marino int last;
104d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
105d5f8dde1SJohn Marino max = 0;
106d5f8dde1SJohn Marino for (i = lm->n_branches, b = lm->branches; i > 0; i--, b++)
107d5f8dde1SJohn Marino {
108d5f8dde1SJohn Marino int t = b->cmp_tag;
109d5f8dde1SJohn Marino int touch = tre_tag_touch_get(tags, t);
110d5f8dde1SJohn Marino if (touch > max)
111d5f8dde1SJohn Marino {
112d5f8dde1SJohn Marino max = touch;
113d5f8dde1SJohn Marino #ifdef TRE_DEBUG
114d5f8dde1SJohn Marino last = t;
115d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
116d5f8dde1SJohn Marino }
117d5f8dde1SJohn Marino }
118d5f8dde1SJohn Marino DPRINT((" Last touched end tag t%d=%d\n", last, max));
119d5f8dde1SJohn Marino }
120d5f8dde1SJohn Marino
121d5f8dde1SJohn Marino for (i = lm->n_branches, b = lm->branches; i > 0; i--, b++)
122d5f8dde1SJohn Marino {
123d5f8dde1SJohn Marino reset = (reset_all || tre_tag_touch_get(tags, b->cmp_tag) < max);
124d5f8dde1SJohn Marino if (reset)
125d5f8dde1SJohn Marino {
126d5f8dde1SJohn Marino int j;
127d5f8dde1SJohn Marino int *t;
128d5f8dde1SJohn Marino
129d5f8dde1SJohn Marino for (j = b->n_tags, t = b->tags; j > 0; j--, t++)
130d5f8dde1SJohn Marino {
131d5f8dde1SJohn Marino DPRINT((" Resetting t%d\n", *t));
132d5f8dde1SJohn Marino tre_tag_reset(tags, *t);
133d5f8dde1SJohn Marino }
134d5f8dde1SJohn Marino }
135d5f8dde1SJohn Marino if (b->n_last_matched > 0)
136d5f8dde1SJohn Marino tre_reset_last_matched_branches(tags, b->last_matched,
137d5f8dde1SJohn Marino b->n_last_matched,
138d5f8dde1SJohn Marino lm->start_tag, reset);
139d5f8dde1SJohn Marino }
140d5f8dde1SJohn Marino }
141d5f8dde1SJohn Marino }
142d5f8dde1SJohn Marino }
143d5f8dde1SJohn Marino
144d5f8dde1SJohn Marino
1455f2eab64SJohn Marino /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
1465f2eab64SJohn Marino endpoint values. */
147d5f8dde1SJohn Marino reg_errcode_t
tre_fill_pmatch(size_t nmatch,regmatch_t pmatch[],int cflags,const tre_tnfa_t * tnfa,const tre_tag_t * intags,int match_eo)1485f2eab64SJohn Marino tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
149d5f8dde1SJohn Marino const tre_tnfa_t *tnfa, const tre_tag_t *intags, int match_eo)
1505f2eab64SJohn Marino {
151d5f8dde1SJohn Marino unsigned int i;
152d5f8dde1SJohn Marino
153d5f8dde1SJohn Marino if (cflags & REG_NOSUB) return REG_OK;
1545f2eab64SJohn Marino
1555f2eab64SJohn Marino i = 0;
156d5f8dde1SJohn Marino if (match_eo >= 0 && intags)
1575f2eab64SJohn Marino {
158d5f8dde1SJohn Marino const tre_tag_t *tags = intags;
159d5f8dde1SJohn Marino tre_submatch_data_t *submatch_data;
160d5f8dde1SJohn Marino
161d5f8dde1SJohn Marino if (tnfa->last_matched_branch &&
162d5f8dde1SJohn Marino tnfa->last_matched_branch->n_last_matched > 0)
163d5f8dde1SJohn Marino {
164d5f8dde1SJohn Marino tre_tag_t *t;
165d5f8dde1SJohn Marino #ifdef TRE_USE_ALLOCA
166d5f8dde1SJohn Marino t = alloca(sizeof(*t) * tnfa->num_tags);
167d5f8dde1SJohn Marino #else /* !TRE_USE_ALLOCA */
168d5f8dde1SJohn Marino t = xmalloc(sizeof(*t) * tnfa->num_tags);
169d5f8dde1SJohn Marino #endif /* !TRE_USE_ALLOCA */
170d5f8dde1SJohn Marino if (!t) return REG_ESPACE;
171d5f8dde1SJohn Marino memcpy(t, intags, tnfa->num_tags * sizeof(tre_tag_t));
172d5f8dde1SJohn Marino tre_reset_last_matched_branches(t,
173d5f8dde1SJohn Marino tnfa->last_matched_branch->last_matched,
174d5f8dde1SJohn Marino tnfa->last_matched_branch->n_last_matched,
175d5f8dde1SJohn Marino 0, 0);
176d5f8dde1SJohn Marino tags = t;
177d5f8dde1SJohn Marino }
1785f2eab64SJohn Marino /* Construct submatch offsets from the tags. */
1795f2eab64SJohn Marino DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
1805f2eab64SJohn Marino submatch_data = tnfa->submatch_data;
1815f2eab64SJohn Marino while (i < tnfa->num_submatches && i < nmatch)
1825f2eab64SJohn Marino {
1835f2eab64SJohn Marino if (submatch_data[i].so_tag == tnfa->end_tag)
1845f2eab64SJohn Marino pmatch[i].rm_so = match_eo;
1855f2eab64SJohn Marino else
186d5f8dde1SJohn Marino pmatch[i].rm_so = tre_tag_get(tags, submatch_data[i].so_tag);
1875f2eab64SJohn Marino
1885f2eab64SJohn Marino if (submatch_data[i].eo_tag == tnfa->end_tag)
1895f2eab64SJohn Marino pmatch[i].rm_eo = match_eo;
1905f2eab64SJohn Marino else
191d5f8dde1SJohn Marino pmatch[i].rm_eo = tre_tag_get(tags, submatch_data[i].eo_tag);
1925f2eab64SJohn Marino
1935f2eab64SJohn Marino /* If either of the endpoints were not used, this submatch
1945f2eab64SJohn Marino was not part of the match. */
1955f2eab64SJohn Marino if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
1965f2eab64SJohn Marino pmatch[i].rm_so = pmatch[i].rm_eo = -1;
1975f2eab64SJohn Marino
198d5f8dde1SJohn Marino DPRINT(("pmatch[%d] = {t%d = %qd, t%d = %qd}\n", i,
1995f2eab64SJohn Marino submatch_data[i].so_tag, pmatch[i].rm_so,
2005f2eab64SJohn Marino submatch_data[i].eo_tag, pmatch[i].rm_eo));
2015f2eab64SJohn Marino i++;
2025f2eab64SJohn Marino }
203d5f8dde1SJohn Marino #ifndef TRE_USE_ALLOCA
204b4603dcfSzrj if (tags != intags) xfree(__DECONST(tre_tag_t *,tags));
205d5f8dde1SJohn Marino #endif /* !TRE_USE_ALLOCA */
2065f2eab64SJohn Marino }
2075f2eab64SJohn Marino
2085f2eab64SJohn Marino while (i < nmatch)
2095f2eab64SJohn Marino {
2105f2eab64SJohn Marino pmatch[i].rm_so = -1;
2115f2eab64SJohn Marino pmatch[i].rm_eo = -1;
2125f2eab64SJohn Marino i++;
2135f2eab64SJohn Marino }
214d5f8dde1SJohn Marino
215d5f8dde1SJohn Marino return REG_OK;
2165f2eab64SJohn Marino }
2175f2eab64SJohn Marino
2185f2eab64SJohn Marino
2195f2eab64SJohn Marino /*
2205f2eab64SJohn Marino Wrapper functions for POSIX compatible regexp matching.
2215f2eab64SJohn Marino */
2225f2eab64SJohn Marino
2235f2eab64SJohn Marino int
tre_have_backrefs(const regex_t * preg)2245f2eab64SJohn Marino tre_have_backrefs(const regex_t *preg)
2255f2eab64SJohn Marino {
2265f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2275f2eab64SJohn Marino return tnfa->have_backrefs;
2285f2eab64SJohn Marino }
2295f2eab64SJohn Marino
230d5f8dde1SJohn Marino #ifdef TRE_APPROX
2315f2eab64SJohn Marino int
tre_have_approx(const regex_t * preg)2325f2eab64SJohn Marino tre_have_approx(const regex_t *preg)
2335f2eab64SJohn Marino {
2345f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2355f2eab64SJohn Marino return tnfa->have_approx;
2365f2eab64SJohn Marino }
237d5f8dde1SJohn Marino #endif /* TRE_APPROX */
2385f2eab64SJohn Marino
2395f2eab64SJohn Marino static int
tre_match(const tre_tnfa_t * tnfa,const void * string,size_t len,tre_str_type_t type,size_t nmatch,regmatch_t pmatch[],int eflags)2405f2eab64SJohn Marino tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
2415f2eab64SJohn Marino tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
2425f2eab64SJohn Marino int eflags)
2435f2eab64SJohn Marino {
2445f2eab64SJohn Marino reg_errcode_t status;
245d5f8dde1SJohn Marino tre_tag_t *tags = NULL;
246d5f8dde1SJohn Marino int eo;
247d5f8dde1SJohn Marino size_t offset = 0, count = 0;
2485f2eab64SJohn Marino if (tnfa->num_tags > 0 && nmatch > 0)
2495f2eab64SJohn Marino {
2505f2eab64SJohn Marino #ifdef TRE_USE_ALLOCA
2515f2eab64SJohn Marino tags = alloca(sizeof(*tags) * tnfa->num_tags);
2525f2eab64SJohn Marino #else /* !TRE_USE_ALLOCA */
2535f2eab64SJohn Marino tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
2545f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
2555f2eab64SJohn Marino if (tags == NULL)
2565f2eab64SJohn Marino return REG_ESPACE;
2575f2eab64SJohn Marino }
2585f2eab64SJohn Marino
259d5f8dde1SJohn Marino if (
260d5f8dde1SJohn Marino (eflags & REG_STARTEND) && pmatch)
261d5f8dde1SJohn Marino {
262d5f8dde1SJohn Marino if (pmatch->rm_so < 0)
263d5f8dde1SJohn Marino return REG_INVARG;
264d5f8dde1SJohn Marino if (len == (size_t)-1)
265d5f8dde1SJohn Marino {
266d5f8dde1SJohn Marino if (pmatch->rm_eo < 0 || pmatch->rm_so > pmatch->rm_eo)
267d5f8dde1SJohn Marino return REG_INVARG;
268d5f8dde1SJohn Marino len = pmatch->rm_eo - pmatch->rm_so;
269d5f8dde1SJohn Marino }
270d5f8dde1SJohn Marino count = offset = pmatch->rm_so;
271d5f8dde1SJohn Marino if (type == STR_WIDE) offset *= sizeof(wchar_t);
272d5f8dde1SJohn Marino }
273d5f8dde1SJohn Marino
2745f2eab64SJohn Marino /* Dispatch to the appropriate matcher. */
2755f2eab64SJohn Marino if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER)
2765f2eab64SJohn Marino {
2775f2eab64SJohn Marino /* The regex has back references, use the backtracking matcher. */
278*6f872551SSascha Wildner status = tre_tnfa_run_backtrack(tnfa, (const char *)string + offset, (int)len, type,
2795f2eab64SJohn Marino tags, eflags, &eo);
2805f2eab64SJohn Marino }
2815f2eab64SJohn Marino #ifdef TRE_APPROX
2825f2eab64SJohn Marino else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER)
2835f2eab64SJohn Marino {
2845f2eab64SJohn Marino /* The regex uses approximate matching, use the approximate matcher. */
2855f2eab64SJohn Marino regamatch_t match;
2865f2eab64SJohn Marino regaparams_t params;
2875f2eab64SJohn Marino tre_regaparams_default(¶ms);
2885f2eab64SJohn Marino params.max_err = 0;
2895f2eab64SJohn Marino params.max_cost = 0;
290d5f8dde1SJohn Marino status = tre_tnfa_run_approx(tnfa, string + offset, (int)len, type, tags,
2915f2eab64SJohn Marino &match, params, eflags, &eo);
2925f2eab64SJohn Marino }
2935f2eab64SJohn Marino #endif /* TRE_APPROX */
2945f2eab64SJohn Marino else
2955f2eab64SJohn Marino {
2965f2eab64SJohn Marino /* Exact matching, no back references, use the parallel matcher. */
297*6f872551SSascha Wildner status = tre_tnfa_run_parallel(tnfa, (const char *)string + offset, (int)len, type,
2985f2eab64SJohn Marino tags, eflags, &eo);
2995f2eab64SJohn Marino }
3005f2eab64SJohn Marino
3015f2eab64SJohn Marino if (status == REG_OK)
302d5f8dde1SJohn Marino {
3035f2eab64SJohn Marino /* A match was found, so fill the submatch registers. */
304d5f8dde1SJohn Marino status = tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
305d5f8dde1SJohn Marino /* If doing REG_STARTEND, adjust the pmatch array (we can't build
306d5f8dde1SJohn Marino this into tre_fill_pmatch, because tre_tnfa_run_backtrack calls
307d5f8dde1SJohn Marino tre_fill_pmatch itself). */
308d5f8dde1SJohn Marino if (status == REG_OK && !(tnfa->cflags & REG_NOSUB) &&
309d5f8dde1SJohn Marino (eflags & REG_STARTEND) && pmatch && nmatch > 0)
310d5f8dde1SJohn Marino {
311d5f8dde1SJohn Marino size_t i;
312d5f8dde1SJohn Marino regmatch_t *p;
313d5f8dde1SJohn Marino for (i = nmatch, p = pmatch; i > 0; p++, i--)
314d5f8dde1SJohn Marino {
315d5f8dde1SJohn Marino if (p->rm_so >= 0) p->rm_so += count;
316d5f8dde1SJohn Marino if (p->rm_eo >= 0) p->rm_eo += count;
317d5f8dde1SJohn Marino }
318d5f8dde1SJohn Marino }
319d5f8dde1SJohn Marino }
3205f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
3215f2eab64SJohn Marino if (tags)
3225f2eab64SJohn Marino xfree(tags);
3235f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
3245f2eab64SJohn Marino return status;
3255f2eab64SJohn Marino }
3265f2eab64SJohn Marino
3275f2eab64SJohn Marino int
tre_regnexec(const regex_t * preg,const char * str,size_t len,size_t nmatch,regmatch_t pmatch[],int eflags)3285f2eab64SJohn Marino tre_regnexec(const regex_t *preg, const char *str, size_t len,
3295f2eab64SJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags)
3305f2eab64SJohn Marino {
3315f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
332d5f8dde1SJohn Marino tre_str_type_t type = (TRE_MB_CUR_MAX_L(tnfa->loc) == 1) ? STR_BYTE : STR_MBS;
333d5f8dde1SJohn Marino
334d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
335d5f8dde1SJohn Marino if (preg->re_magic != RE_MAGIC) return REG_BADPAT;
336d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
3375f2eab64SJohn Marino
3385f2eab64SJohn Marino return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
3395f2eab64SJohn Marino }
3405f2eab64SJohn Marino
3415f2eab64SJohn Marino int
tre_regexec(const regex_t * __restrict preg,const char * __restrict str,size_t nmatch,regmatch_t pmatch[__restrict_arr],int eflags)342d33005aaSSascha Wildner tre_regexec(const regex_t * __restrict preg, const char * __restrict str,
343d33005aaSSascha Wildner size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
3445f2eab64SJohn Marino {
345d5f8dde1SJohn Marino return tre_regnexec(preg, str, (size_t)-1, nmatch, pmatch, eflags);
3465f2eab64SJohn Marino }
3475f2eab64SJohn Marino
3485f2eab64SJohn Marino
3495f2eab64SJohn Marino #ifdef TRE_WCHAR
3505f2eab64SJohn Marino
3515f2eab64SJohn Marino int
tre_regwnexec(const regex_t * preg,const wchar_t * str,size_t len,size_t nmatch,regmatch_t pmatch[],int eflags)3525f2eab64SJohn Marino tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len,
3535f2eab64SJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags)
3545f2eab64SJohn Marino {
3555f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
356d5f8dde1SJohn Marino
357d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
358d5f8dde1SJohn Marino if (preg->re_magic != RE_MAGIC) return REG_BADPAT;
359d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
360d5f8dde1SJohn Marino
3615f2eab64SJohn Marino return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
3625f2eab64SJohn Marino }
3635f2eab64SJohn Marino
3645f2eab64SJohn Marino int
tre_regwexec(const regex_t * preg,const wchar_t * str,size_t nmatch,regmatch_t pmatch[],int eflags)3655f2eab64SJohn Marino tre_regwexec(const regex_t *preg, const wchar_t *str,
3665f2eab64SJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags)
3675f2eab64SJohn Marino {
368d5f8dde1SJohn Marino return tre_regwnexec(preg, str, (size_t)-1, nmatch, pmatch, eflags);
3695f2eab64SJohn Marino }
3705f2eab64SJohn Marino
3715f2eab64SJohn Marino #endif /* TRE_WCHAR */
3725f2eab64SJohn Marino
3735f2eab64SJohn Marino #ifdef TRE_APPROX
3745f2eab64SJohn Marino
3755f2eab64SJohn Marino /*
3765f2eab64SJohn Marino Wrapper functions for approximate regexp matching.
3775f2eab64SJohn Marino */
3785f2eab64SJohn Marino
3795f2eab64SJohn Marino static int
tre_match_approx(const tre_tnfa_t * tnfa,const void * string,size_t len,tre_str_type_t type,regamatch_t * match,regaparams_t params,int eflags)3805f2eab64SJohn Marino tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len,
3815f2eab64SJohn Marino tre_str_type_t type, regamatch_t *match, regaparams_t params,
3825f2eab64SJohn Marino int eflags)
3835f2eab64SJohn Marino {
3845f2eab64SJohn Marino reg_errcode_t status;
385d5f8dde1SJohn Marino tre_tag_t *tags = NULL;
386d5f8dde1SJohn Marino int eo;
387d5f8dde1SJohn Marino size_t offset = 0, count = 0;
3885f2eab64SJohn Marino
3895f2eab64SJohn Marino /* If the regexp does not use approximate matching features, the
3905f2eab64SJohn Marino maximum cost is zero, and the approximate matcher isn't forced,
3915f2eab64SJohn Marino use the exact matcher instead. */
3925f2eab64SJohn Marino if (params.max_cost == 0 && !tnfa->have_approx
3935f2eab64SJohn Marino && !(eflags & REG_APPROX_MATCHER))
3945f2eab64SJohn Marino return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
3955f2eab64SJohn Marino eflags);
3965f2eab64SJohn Marino
3975f2eab64SJohn Marino /* Back references are not supported by the approximate matcher. */
3985f2eab64SJohn Marino if (tnfa->have_backrefs)
3995f2eab64SJohn Marino return REG_BADPAT;
4005f2eab64SJohn Marino
4015f2eab64SJohn Marino if (tnfa->num_tags > 0 && match->nmatch > 0)
4025f2eab64SJohn Marino {
4035f2eab64SJohn Marino #if TRE_USE_ALLOCA
4045f2eab64SJohn Marino tags = alloca(sizeof(*tags) * tnfa->num_tags);
4055f2eab64SJohn Marino #else /* !TRE_USE_ALLOCA */
4065f2eab64SJohn Marino tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
4075f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
4085f2eab64SJohn Marino if (tags == NULL)
4095f2eab64SJohn Marino return REG_ESPACE;
4105f2eab64SJohn Marino }
411d5f8dde1SJohn Marino
412d5f8dde1SJohn Marino if (
413d5f8dde1SJohn Marino (eflags & REG_STARTEND) && match->pmatch)
414d5f8dde1SJohn Marino {
415d5f8dde1SJohn Marino if (match->pmatch->rm_so < 0)
416d5f8dde1SJohn Marino return REG_INVARG;
417d5f8dde1SJohn Marino if (len == (size_t)-1)
418d5f8dde1SJohn Marino {
419d5f8dde1SJohn Marino if (match->pmatch->rm_eo < 0 || match->pmatch->rm_so >
420d5f8dde1SJohn Marino match->pmatch->rm_eo)
421d5f8dde1SJohn Marino return REG_INVARG;
422d5f8dde1SJohn Marino len = match->pmatch->rm_eo - match->pmatch->rm_so;
423d5f8dde1SJohn Marino }
424d5f8dde1SJohn Marino count = offset = match->pmatch->rm_so;
425d5f8dde1SJohn Marino if (type == STR_WIDE) offset *= sizeof(wchar_t);
426d5f8dde1SJohn Marino }
427d5f8dde1SJohn Marino
4285f2eab64SJohn Marino status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags,
4295f2eab64SJohn Marino match, params, eflags, &eo);
4305f2eab64SJohn Marino if (status == REG_OK)
431d5f8dde1SJohn Marino {
432d5f8dde1SJohn Marino status = tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags,
433d5f8dde1SJohn Marino tnfa, tags, eo);
434d5f8dde1SJohn Marino /* If doing REG_STARTEND, adjust the pmatch array (we can't build
435d5f8dde1SJohn Marino this into tre_fill_pmatch, because tre_tnfa_run_backtrack call
436d5f8dde1SJohn Marino tre_fill_pmatch itself). */
437d5f8dde1SJohn Marino if (status == REG_OK && !(tnfa->cflags & REG_NOSUB) &&
438d5f8dde1SJohn Marino (eflags & REG_STARTEND) && match->pmatch && match->nmatch > 0)
439d5f8dde1SJohn Marino {
440d5f8dde1SJohn Marino size_t i;
441d5f8dde1SJohn Marino regmatch_t *p;
442d5f8dde1SJohn Marino for (i = match->nmatch, p = match->pmatch; i > 0; p++, i--)
443d5f8dde1SJohn Marino {
444d5f8dde1SJohn Marino if (p->rm_so >= 0) p->rm_so += count;
445d5f8dde1SJohn Marino if (p->rm_eo >= 0) p->rm_eo += count;
446d5f8dde1SJohn Marino }
447d5f8dde1SJohn Marino }
448d5f8dde1SJohn Marino }
4495f2eab64SJohn Marino #ifndef TRE_USE_ALLOCA
4505f2eab64SJohn Marino if (tags)
4515f2eab64SJohn Marino xfree(tags);
4525f2eab64SJohn Marino #endif /* !TRE_USE_ALLOCA */
4535f2eab64SJohn Marino return status;
4545f2eab64SJohn Marino }
4555f2eab64SJohn Marino
4565f2eab64SJohn Marino int
tre_reganexec(const regex_t * preg,const char * str,size_t len,regamatch_t * match,regaparams_t params,int eflags)4575f2eab64SJohn Marino tre_reganexec(const regex_t *preg, const char *str, size_t len,
4585f2eab64SJohn Marino regamatch_t *match, regaparams_t params, int eflags)
4595f2eab64SJohn Marino {
4605f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
461d5f8dde1SJohn Marino tre_str_type_t type = (TRE_MB_CUR_MAX_L(tnfa->loc) == 1) ? STR_BYTE : STR_MBS;
462d5f8dde1SJohn Marino
463d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
464d5f8dde1SJohn Marino if (preg->re_magic != RE_MAGIC) return REG_BADPAT;
465d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
4665f2eab64SJohn Marino
4675f2eab64SJohn Marino return tre_match_approx(tnfa, str, len, type, match, params, eflags);
4685f2eab64SJohn Marino }
4695f2eab64SJohn Marino
4705f2eab64SJohn Marino int
tre_regaexec(const regex_t * preg,const char * str,regamatch_t * match,regaparams_t params,int eflags)4715f2eab64SJohn Marino tre_regaexec(const regex_t *preg, const char *str,
4725f2eab64SJohn Marino regamatch_t *match, regaparams_t params, int eflags)
4735f2eab64SJohn Marino {
474d5f8dde1SJohn Marino return tre_reganexec(preg, str, (size_t)-1, match, params, eflags);
4755f2eab64SJohn Marino }
4765f2eab64SJohn Marino
4775f2eab64SJohn Marino #ifdef TRE_WCHAR
4785f2eab64SJohn Marino
4795f2eab64SJohn Marino int
tre_regawnexec(const regex_t * preg,const wchar_t * str,size_t len,regamatch_t * match,regaparams_t params,int eflags)4805f2eab64SJohn Marino tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len,
4815f2eab64SJohn Marino regamatch_t *match, regaparams_t params, int eflags)
4825f2eab64SJohn Marino {
4835f2eab64SJohn Marino tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
484d5f8dde1SJohn Marino
485d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
486d5f8dde1SJohn Marino if (preg->re_magic != RE_MAGIC) return REG_BADPAT;
487d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
488d5f8dde1SJohn Marino
4895f2eab64SJohn Marino return tre_match_approx(tnfa, str, len, STR_WIDE,
4905f2eab64SJohn Marino match, params, eflags);
4915f2eab64SJohn Marino }
4925f2eab64SJohn Marino
4935f2eab64SJohn Marino int
tre_regawexec(const regex_t * preg,const wchar_t * str,regamatch_t * match,regaparams_t params,int eflags)4945f2eab64SJohn Marino tre_regawexec(const regex_t *preg, const wchar_t *str,
4955f2eab64SJohn Marino regamatch_t *match, regaparams_t params, int eflags)
4965f2eab64SJohn Marino {
497d5f8dde1SJohn Marino return tre_regawnexec(preg, str, (size_t)-1, match, params, eflags);
4985f2eab64SJohn Marino }
4995f2eab64SJohn Marino
5005f2eab64SJohn Marino #endif /* TRE_WCHAR */
5015f2eab64SJohn Marino
5025f2eab64SJohn Marino void
tre_regaparams_default(regaparams_t * params)5035f2eab64SJohn Marino tre_regaparams_default(regaparams_t *params)
5045f2eab64SJohn Marino {
5055f2eab64SJohn Marino memset(params, 0, sizeof(*params));
5065f2eab64SJohn Marino params->cost_ins = 1;
5075f2eab64SJohn Marino params->cost_del = 1;
5085f2eab64SJohn Marino params->cost_subst = 1;
5095f2eab64SJohn Marino params->max_cost = INT_MAX;
5105f2eab64SJohn Marino params->max_ins = INT_MAX;
5115f2eab64SJohn Marino params->max_del = INT_MAX;
5125f2eab64SJohn Marino params->max_subst = INT_MAX;
5135f2eab64SJohn Marino params->max_err = INT_MAX;
5145f2eab64SJohn Marino }
5155f2eab64SJohn Marino
5165f2eab64SJohn Marino #endif /* TRE_APPROX */
5175f2eab64SJohn Marino
5185f2eab64SJohn Marino /* EOF */
519