163d4abf0Sagc /*
263d4abf0Sagc tre-parse.c - Regexp parser
363d4abf0Sagc
463d4abf0Sagc This software is released under a BSD-style license.
563d4abf0Sagc See the file LICENSE for details and copyright.
663d4abf0Sagc
763d4abf0Sagc */
863d4abf0Sagc
963d4abf0Sagc /*
1063d4abf0Sagc This parser is just a simple recursive descent parser for POSIX.2
1163d4abf0Sagc regexps. The parser supports both the obsolete default syntax and
1263d4abf0Sagc the "extended" syntax, and some nonstandard extensions.
1363d4abf0Sagc */
1463d4abf0Sagc
1563d4abf0Sagc
1663d4abf0Sagc #ifdef HAVE_CONFIG_H
1763d4abf0Sagc #include <config.h>
1863d4abf0Sagc #endif /* HAVE_CONFIG_H */
1963d4abf0Sagc #include <string.h>
2063d4abf0Sagc #include <assert.h>
2163d4abf0Sagc #include <limits.h>
2263d4abf0Sagc
2363d4abf0Sagc #include "xmalloc.h"
2463d4abf0Sagc #include "tre-mem.h"
2563d4abf0Sagc #include "tre-ast.h"
2663d4abf0Sagc #include "tre-stack.h"
2763d4abf0Sagc #include "tre-parse.h"
2863d4abf0Sagc
2963d4abf0Sagc
3063d4abf0Sagc /* Characters with special meanings in regexp syntax. */
3163d4abf0Sagc #define CHAR_PIPE L'|'
3263d4abf0Sagc #define CHAR_LPAREN L'('
3363d4abf0Sagc #define CHAR_RPAREN L')'
3463d4abf0Sagc #define CHAR_LBRACE L'{'
3563d4abf0Sagc #define CHAR_RBRACE L'}'
3663d4abf0Sagc #define CHAR_LBRACKET L'['
3763d4abf0Sagc #define CHAR_RBRACKET L']'
3863d4abf0Sagc #define CHAR_MINUS L'-'
3963d4abf0Sagc #define CHAR_STAR L'*'
4063d4abf0Sagc #define CHAR_QUESTIONMARK L'?'
4163d4abf0Sagc #define CHAR_PLUS L'+'
4263d4abf0Sagc #define CHAR_PERIOD L'.'
4363d4abf0Sagc #define CHAR_COLON L':'
4463d4abf0Sagc #define CHAR_EQUAL L'='
4563d4abf0Sagc #define CHAR_COMMA L','
4663d4abf0Sagc #define CHAR_CARET L'^'
4763d4abf0Sagc #define CHAR_DOLLAR L'$'
4863d4abf0Sagc #define CHAR_BACKSLASH L'\\'
4963d4abf0Sagc #define CHAR_HASH L'#'
5063d4abf0Sagc #define CHAR_TILDE L'~'
5163d4abf0Sagc
5263d4abf0Sagc
5363d4abf0Sagc /* Some macros for expanding \w, \s, etc. */
5463d4abf0Sagc static const struct tre_macro_struct {
5563d4abf0Sagc const char c;
5663d4abf0Sagc const char *expansion;
5763d4abf0Sagc } tre_macros[] =
5863d4abf0Sagc { {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
5963d4abf0Sagc {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
6063d4abf0Sagc {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
6163d4abf0Sagc {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
6263d4abf0Sagc { 0, NULL }
6363d4abf0Sagc };
6463d4abf0Sagc
6563d4abf0Sagc
6663d4abf0Sagc /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
6763d4abf0Sagc must have at least `len' items. Sets buf[0] to zero if the there
6863d4abf0Sagc is no match in `tre_macros'. */
6963d4abf0Sagc static void
tre_expand_macro(const tre_char_t * regex,const tre_char_t * regex_end,tre_char_t * buf,size_t buf_len)7063d4abf0Sagc tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
7163d4abf0Sagc tre_char_t *buf, size_t buf_len)
7263d4abf0Sagc {
7363d4abf0Sagc int i;
7463d4abf0Sagc
7563d4abf0Sagc buf[0] = 0;
7663d4abf0Sagc if (regex >= regex_end)
7763d4abf0Sagc return;
7863d4abf0Sagc
7963d4abf0Sagc for (i = 0; tre_macros[i].expansion; i++)
8063d4abf0Sagc {
8163d4abf0Sagc if (tre_macros[i].c == *regex)
8263d4abf0Sagc {
8363d4abf0Sagc unsigned int j;
8463d4abf0Sagc DPRINT(("Expanding macro '%c' => '%s'\n",
8563d4abf0Sagc tre_macros[i].c, tre_macros[i].expansion));
8663d4abf0Sagc for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
8763d4abf0Sagc buf[j] = tre_macros[i].expansion[j];
8863d4abf0Sagc buf[j] = 0;
8963d4abf0Sagc break;
9063d4abf0Sagc }
9163d4abf0Sagc }
9263d4abf0Sagc }
9363d4abf0Sagc
9463d4abf0Sagc static reg_errcode_t
tre_new_item(tre_mem_t mem,int min,int max,int * i,int * max_i,tre_ast_node_t *** items)9563d4abf0Sagc tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
9663d4abf0Sagc tre_ast_node_t ***items)
9763d4abf0Sagc {
9863d4abf0Sagc reg_errcode_t status;
9963d4abf0Sagc tre_ast_node_t **array = *items;
10063d4abf0Sagc /* Allocate more space if necessary. */
10163d4abf0Sagc if (*i >= *max_i)
10263d4abf0Sagc {
10363d4abf0Sagc tre_ast_node_t **new_items;
10463d4abf0Sagc DPRINT(("out of array space, i = %d\n", *i));
10563d4abf0Sagc /* If the array is already 1024 items large, give up -- there's
10663d4abf0Sagc probably an error in the regexp (e.g. not a '\0' terminated
10763d4abf0Sagc string and missing ']') */
10863d4abf0Sagc if (*max_i > 1024)
10963d4abf0Sagc return REG_ESPACE;
11063d4abf0Sagc *max_i *= 2;
111*f6cec12cSrin new_items = xrealloc(array, sizeof(*array) * *max_i);
11263d4abf0Sagc if (new_items == NULL)
11363d4abf0Sagc return REG_ESPACE;
11463d4abf0Sagc *items = array = new_items;
11563d4abf0Sagc }
11663d4abf0Sagc array[*i] = tre_ast_new_literal(mem, min, max, -1);
11763d4abf0Sagc status = array[*i] == NULL ? REG_ESPACE : REG_OK;
11863d4abf0Sagc (*i)++;
11963d4abf0Sagc return status;
12063d4abf0Sagc }
12163d4abf0Sagc
12263d4abf0Sagc
12363d4abf0Sagc /* Expands a character class to character ranges. */
12463d4abf0Sagc static reg_errcode_t
tre_expand_ctype(tre_mem_t mem,tre_ctype_t class,tre_ast_node_t *** items,int * i,int * max_i,int cflags)12563d4abf0Sagc tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
12663d4abf0Sagc int *i, int *max_i, int cflags)
12763d4abf0Sagc {
12863d4abf0Sagc reg_errcode_t status = REG_OK;
12963d4abf0Sagc tre_cint_t c;
13063d4abf0Sagc int j, min = -1, max = 0;
13163d4abf0Sagc assert(TRE_MB_CUR_MAX == 1);
13263d4abf0Sagc
13363d4abf0Sagc DPRINT((" expanding class to character ranges\n"));
13463d4abf0Sagc for (j = 0; (j < 256) && (status == REG_OK); j++)
13563d4abf0Sagc {
13613498f30Srin c = (tre_cint_t) j;
13763d4abf0Sagc if (tre_isctype(c, class)
13863d4abf0Sagc || ((cflags & REG_ICASE)
13963d4abf0Sagc && (tre_isctype(tre_tolower(c), class)
14063d4abf0Sagc || tre_isctype(tre_toupper(c), class))))
14163d4abf0Sagc {
14263d4abf0Sagc if (min < 0)
14363d4abf0Sagc min = c;
14463d4abf0Sagc max = c;
14563d4abf0Sagc }
14663d4abf0Sagc else if (min >= 0)
14763d4abf0Sagc {
14863d4abf0Sagc DPRINT((" range %c (%d) to %c (%d)\n", min, min, max, max));
14963d4abf0Sagc status = tre_new_item(mem, min, max, i, max_i, items);
15063d4abf0Sagc min = -1;
15163d4abf0Sagc }
15263d4abf0Sagc }
15363d4abf0Sagc if (min >= 0 && status == REG_OK)
15463d4abf0Sagc status = tre_new_item(mem, min, max, i, max_i, items);
15563d4abf0Sagc return status;
15663d4abf0Sagc }
15763d4abf0Sagc
15863d4abf0Sagc
15963d4abf0Sagc static int
tre_compare_items(const void * a,const void * b)16063d4abf0Sagc tre_compare_items(const void *a, const void *b)
16163d4abf0Sagc {
16263d4abf0Sagc const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
16363d4abf0Sagc const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
16463d4abf0Sagc tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
16563d4abf0Sagc int a_min = l_a->code_min, b_min = l_b->code_min;
16663d4abf0Sagc
16763d4abf0Sagc if (a_min < b_min)
16863d4abf0Sagc return -1;
16963d4abf0Sagc else if (a_min > b_min)
17063d4abf0Sagc return 1;
17163d4abf0Sagc else
17263d4abf0Sagc return 0;
17363d4abf0Sagc }
17463d4abf0Sagc
17563d4abf0Sagc #ifndef TRE_USE_SYSTEM_WCTYPE
17663d4abf0Sagc
17782e82fcaSahoka int tre_isalnum_func(tre_cint_t);
17882e82fcaSahoka int tre_isalpha_func(tre_cint_t);
17982e82fcaSahoka int tre_isascii_func(tre_cint_t);
18082e82fcaSahoka int tre_isblank_func(tre_cint_t);
18182e82fcaSahoka int tre_iscntrl_func(tre_cint_t);
18282e82fcaSahoka int tre_isdigit_func(tre_cint_t);
18382e82fcaSahoka int tre_isgraph_func(tre_cint_t);
18482e82fcaSahoka int tre_islower_func(tre_cint_t);
18582e82fcaSahoka int tre_isprint_func(tre_cint_t);
18682e82fcaSahoka int tre_ispunct_func(tre_cint_t);
18782e82fcaSahoka int tre_isspace_func(tre_cint_t);
18882e82fcaSahoka int tre_isupper_func(tre_cint_t);
18982e82fcaSahoka int tre_isxdigit_func(tre_cint_t);
19082e82fcaSahoka
19163d4abf0Sagc /* isalnum() and the rest may be macros, so wrap them to functions. */
tre_isalnum_func(tre_cint_t c)19263d4abf0Sagc int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
tre_isalpha_func(tre_cint_t c)19363d4abf0Sagc int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
19463d4abf0Sagc
19563d4abf0Sagc #ifdef tre_isascii
tre_isascii_func(tre_cint_t c)19663d4abf0Sagc int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
19763d4abf0Sagc #else /* !tre_isascii */
tre_isascii_func(tre_cint_t c)19863d4abf0Sagc int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
19963d4abf0Sagc #endif /* !tre_isascii */
20063d4abf0Sagc
20163d4abf0Sagc #ifdef tre_isblank
tre_isblank_func(tre_cint_t c)20263d4abf0Sagc int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
20363d4abf0Sagc #else /* !tre_isblank */
tre_isblank_func(tre_cint_t c)20463d4abf0Sagc int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
20563d4abf0Sagc #endif /* !tre_isblank */
20663d4abf0Sagc
tre_iscntrl_func(tre_cint_t c)20763d4abf0Sagc int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
tre_isdigit_func(tre_cint_t c)20863d4abf0Sagc int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
tre_isgraph_func(tre_cint_t c)20963d4abf0Sagc int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
tre_islower_func(tre_cint_t c)21063d4abf0Sagc int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
tre_isprint_func(tre_cint_t c)21163d4abf0Sagc int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
tre_ispunct_func(tre_cint_t c)21263d4abf0Sagc int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
tre_isspace_func(tre_cint_t c)21363d4abf0Sagc int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
tre_isupper_func(tre_cint_t c)21463d4abf0Sagc int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
tre_isxdigit_func(tre_cint_t c)21563d4abf0Sagc int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
21663d4abf0Sagc
21763d4abf0Sagc struct {
21882e82fcaSahoka const char *name;
21963d4abf0Sagc int (*func)(tre_cint_t);
22063d4abf0Sagc } tre_ctype_map[] = {
22163d4abf0Sagc { "alnum", &tre_isalnum_func },
22263d4abf0Sagc { "alpha", &tre_isalpha_func },
22363d4abf0Sagc #ifdef tre_isascii
22463d4abf0Sagc { "ascii", &tre_isascii_func },
22563d4abf0Sagc #endif /* tre_isascii */
22663d4abf0Sagc #ifdef tre_isblank
22763d4abf0Sagc { "blank", &tre_isblank_func },
22863d4abf0Sagc #endif /* tre_isblank */
22963d4abf0Sagc { "cntrl", &tre_iscntrl_func },
23063d4abf0Sagc { "digit", &tre_isdigit_func },
23163d4abf0Sagc { "graph", &tre_isgraph_func },
23263d4abf0Sagc { "lower", &tre_islower_func },
23363d4abf0Sagc { "print", &tre_isprint_func },
23463d4abf0Sagc { "punct", &tre_ispunct_func },
23563d4abf0Sagc { "space", &tre_isspace_func },
23663d4abf0Sagc { "upper", &tre_isupper_func },
23763d4abf0Sagc { "xdigit", &tre_isxdigit_func },
23863d4abf0Sagc { NULL, NULL}
23963d4abf0Sagc };
24063d4abf0Sagc
tre_ctype(const char * name)24163d4abf0Sagc tre_ctype_t tre_ctype(const char *name)
24263d4abf0Sagc {
24363d4abf0Sagc int i;
24463d4abf0Sagc for (i = 0; tre_ctype_map[i].name != NULL; i++)
24563d4abf0Sagc {
24663d4abf0Sagc if (strcmp(name, tre_ctype_map[i].name) == 0)
24763d4abf0Sagc return tre_ctype_map[i].func;
24863d4abf0Sagc }
24963d4abf0Sagc return (tre_ctype_t)0;
25063d4abf0Sagc }
25163d4abf0Sagc #endif /* !TRE_USE_SYSTEM_WCTYPE */
25263d4abf0Sagc
25363d4abf0Sagc /* Maximum number of character classes that can occur in a negated bracket
25463d4abf0Sagc expression. */
25563d4abf0Sagc #define MAX_NEG_CLASSES 64
25663d4abf0Sagc
25763d4abf0Sagc /* Maximum length of character class names. */
25863d4abf0Sagc #define MAX_CLASS_NAME
25963d4abf0Sagc
26063d4abf0Sagc #define REST(re) (int)(ctx->re_end - (re)), (re)
26163d4abf0Sagc
26263d4abf0Sagc static reg_errcode_t
tre_parse_bracket_items(tre_parse_ctx_t * ctx,int negate,tre_ctype_t neg_classes[],int * num_neg_classes,tre_ast_node_t *** items,int * num_items,int * items_size)26363d4abf0Sagc tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
26463d4abf0Sagc tre_ctype_t neg_classes[], int *num_neg_classes,
26563d4abf0Sagc tre_ast_node_t ***items, int *num_items,
26663d4abf0Sagc int *items_size)
26763d4abf0Sagc {
26863d4abf0Sagc const tre_char_t *re = ctx->re;
26963d4abf0Sagc reg_errcode_t status = REG_OK;
27063d4abf0Sagc tre_ctype_t class = (tre_ctype_t)0;
27163d4abf0Sagc int i = *num_items;
27263d4abf0Sagc int max_i = *items_size;
27363d4abf0Sagc int skip;
27463d4abf0Sagc
27563d4abf0Sagc /* Build an array of the items in the bracket expression. */
27663d4abf0Sagc while (status == REG_OK)
27763d4abf0Sagc {
27863d4abf0Sagc skip = 0;
27963d4abf0Sagc if (re == ctx->re_end)
28063d4abf0Sagc {
28163d4abf0Sagc status = REG_EBRACK;
28263d4abf0Sagc }
28363d4abf0Sagc else if (*re == CHAR_RBRACKET && re > ctx->re)
28463d4abf0Sagc {
28563d4abf0Sagc DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n", REST(re)));
28663d4abf0Sagc re++;
28763d4abf0Sagc break;
28863d4abf0Sagc }
28963d4abf0Sagc else
29063d4abf0Sagc {
29163d4abf0Sagc tre_cint_t min = 0, max = 0;
29263d4abf0Sagc
29363d4abf0Sagc class = (tre_ctype_t)0;
29463d4abf0Sagc if (re + 2 < ctx->re_end
29563d4abf0Sagc && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
29663d4abf0Sagc {
29763d4abf0Sagc DPRINT(("tre_parse_bracket: range: '%.*" STRF "'\n", REST(re)));
29863d4abf0Sagc min = *re;
29963d4abf0Sagc max = *(re + 2);
30063d4abf0Sagc re += 3;
30163d4abf0Sagc /* XXX - Should use collation order instead of encoding values
30263d4abf0Sagc in character ranges. */
30363d4abf0Sagc if (min > max)
30463d4abf0Sagc status = REG_ERANGE;
30563d4abf0Sagc }
30663d4abf0Sagc else if (re + 1 < ctx->re_end
30763d4abf0Sagc && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
30863d4abf0Sagc status = REG_ECOLLATE;
30963d4abf0Sagc else if (re + 1 < ctx->re_end
31063d4abf0Sagc && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
31163d4abf0Sagc status = REG_ECOLLATE;
31263d4abf0Sagc else if (re + 1 < ctx->re_end
31363d4abf0Sagc && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
31463d4abf0Sagc {
31563d4abf0Sagc char tmp_str[64];
31663d4abf0Sagc const tre_char_t *endptr = re + 2;
317f2a3d147Schristos size_t len;
31863d4abf0Sagc DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
31963d4abf0Sagc while (endptr < ctx->re_end && *endptr != CHAR_COLON)
32063d4abf0Sagc endptr++;
32163d4abf0Sagc if (endptr != ctx->re_end)
32263d4abf0Sagc {
32363d4abf0Sagc len = MIN(endptr - re - 2, 63);
32463d4abf0Sagc #ifdef TRE_WCHAR
32563d4abf0Sagc {
32663d4abf0Sagc tre_char_t tmp_wcs[64];
32763d4abf0Sagc wcsncpy(tmp_wcs, re + 2, (size_t)len);
32863d4abf0Sagc tmp_wcs[len] = L'\0';
32963d4abf0Sagc #if defined HAVE_WCSRTOMBS
33063d4abf0Sagc {
33163d4abf0Sagc mbstate_t state;
33263d4abf0Sagc const tre_char_t *src = tmp_wcs;
33363d4abf0Sagc memset(&state, '\0', sizeof(state));
33463d4abf0Sagc len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
33563d4abf0Sagc }
33663d4abf0Sagc #elif defined HAVE_WCSTOMBS
33763d4abf0Sagc len = wcstombs(tmp_str, tmp_wcs, 63);
33863d4abf0Sagc #endif /* defined HAVE_WCSTOMBS */
33963d4abf0Sagc }
34063d4abf0Sagc #else /* !TRE_WCHAR */
34182e82fcaSahoka /* LINTED */strncpy(tmp_str, (const char*)re + 2, len);
34263d4abf0Sagc #endif /* !TRE_WCHAR */
34363d4abf0Sagc tmp_str[len] = '\0';
34463d4abf0Sagc DPRINT((" class name: %s\n", tmp_str));
34563d4abf0Sagc class = tre_ctype(tmp_str);
34663d4abf0Sagc if (!class)
34763d4abf0Sagc status = REG_ECTYPE;
34863d4abf0Sagc /* Optimize character classes for 8 bit character sets. */
34913498f30Srin if (status == REG_OK && ctx->cur_max == 1)
35063d4abf0Sagc {
35163d4abf0Sagc status = tre_expand_ctype(ctx->mem, class, items,
35263d4abf0Sagc &i, &max_i, ctx->cflags);
35363d4abf0Sagc class = (tre_ctype_t)0;
35463d4abf0Sagc skip = 1;
35563d4abf0Sagc }
35663d4abf0Sagc re = endptr + 2;
35763d4abf0Sagc }
35863d4abf0Sagc else
35963d4abf0Sagc status = REG_ECTYPE;
36063d4abf0Sagc min = 0;
36163d4abf0Sagc max = TRE_CHAR_MAX;
36263d4abf0Sagc }
36363d4abf0Sagc else
36463d4abf0Sagc {
36563d4abf0Sagc DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re)));
36663d4abf0Sagc if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
36763d4abf0Sagc && ctx->re != re)
36863d4abf0Sagc /* Two ranges are not allowed to share and endpoint. */
36963d4abf0Sagc status = REG_ERANGE;
37063d4abf0Sagc min = max = *re++;
37163d4abf0Sagc }
37263d4abf0Sagc
37363d4abf0Sagc if (status != REG_OK)
37463d4abf0Sagc break;
37563d4abf0Sagc
37663d4abf0Sagc if (class && negate)
37763d4abf0Sagc if (*num_neg_classes >= MAX_NEG_CLASSES)
37863d4abf0Sagc status = REG_ESPACE;
37963d4abf0Sagc else
38063d4abf0Sagc neg_classes[(*num_neg_classes)++] = class;
38163d4abf0Sagc else if (!skip)
38263d4abf0Sagc {
38363d4abf0Sagc status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
38463d4abf0Sagc if (status != REG_OK)
38563d4abf0Sagc break;
38663d4abf0Sagc ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
38763d4abf0Sagc }
38863d4abf0Sagc
38963d4abf0Sagc /* Add opposite-case counterpoints if REG_ICASE is present.
39063d4abf0Sagc This is broken if there are more than two "same" characters. */
39163d4abf0Sagc if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
39263d4abf0Sagc {
39363d4abf0Sagc tre_cint_t cmin, ccurr;
39463d4abf0Sagc
39563d4abf0Sagc DPRINT(("adding opposite-case counterpoints\n"));
39663d4abf0Sagc while (min <= max)
39763d4abf0Sagc {
39863d4abf0Sagc if (tre_islower(min))
39963d4abf0Sagc {
40063d4abf0Sagc cmin = ccurr = tre_toupper(min++);
40163d4abf0Sagc while (tre_islower(min) && tre_toupper(min) == ccurr + 1
40263d4abf0Sagc && min <= max)
40363d4abf0Sagc ccurr = tre_toupper(min++);
40463d4abf0Sagc status = tre_new_item(ctx->mem, cmin, ccurr,
40563d4abf0Sagc &i, &max_i, items);
40663d4abf0Sagc }
40763d4abf0Sagc else if (tre_isupper(min))
40863d4abf0Sagc {
40963d4abf0Sagc cmin = ccurr = tre_tolower(min++);
41063d4abf0Sagc while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
41163d4abf0Sagc && min <= max)
41263d4abf0Sagc ccurr = tre_tolower(min++);
41363d4abf0Sagc status = tre_new_item(ctx->mem, cmin, ccurr,
41463d4abf0Sagc &i, &max_i, items);
41563d4abf0Sagc }
41663d4abf0Sagc else min++;
41763d4abf0Sagc if (status != REG_OK)
41863d4abf0Sagc break;
41963d4abf0Sagc }
42063d4abf0Sagc if (status != REG_OK)
42163d4abf0Sagc break;
42263d4abf0Sagc }
42363d4abf0Sagc }
42463d4abf0Sagc }
42563d4abf0Sagc *num_items = i;
42663d4abf0Sagc *items_size = max_i;
42763d4abf0Sagc ctx->re = re;
42863d4abf0Sagc return status;
42963d4abf0Sagc }
43063d4abf0Sagc
43163d4abf0Sagc static reg_errcode_t
tre_parse_bracket(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)43263d4abf0Sagc tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
43363d4abf0Sagc {
43463d4abf0Sagc tre_ast_node_t *node = NULL;
43563d4abf0Sagc int negate = 0;
43663d4abf0Sagc reg_errcode_t status = REG_OK;
43763d4abf0Sagc tre_ast_node_t **items, *u, *n;
43863d4abf0Sagc int i = 0, j, max_i = 32, curr_max, curr_min;
43963d4abf0Sagc tre_ctype_t neg_classes[MAX_NEG_CLASSES];
44063d4abf0Sagc int num_neg_classes = 0;
44163d4abf0Sagc
44263d4abf0Sagc /* Start off with an array of `max_i' elements. */
44363d4abf0Sagc items = xmalloc(sizeof(*items) * max_i);
44463d4abf0Sagc if (items == NULL)
44563d4abf0Sagc return REG_ESPACE;
44663d4abf0Sagc
44763d4abf0Sagc if (*ctx->re == CHAR_CARET)
44863d4abf0Sagc {
44963d4abf0Sagc DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
45063d4abf0Sagc negate = 1;
45163d4abf0Sagc ctx->re++;
45263d4abf0Sagc }
45363d4abf0Sagc
45463d4abf0Sagc status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
45563d4abf0Sagc &items, &i, &max_i);
45663d4abf0Sagc
45763d4abf0Sagc if (status != REG_OK)
45863d4abf0Sagc goto parse_bracket_done;
45963d4abf0Sagc
46063d4abf0Sagc /* Sort the array if we need to negate it. */
46163d4abf0Sagc if (negate)
46263d4abf0Sagc qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
46363d4abf0Sagc
46463d4abf0Sagc curr_max = curr_min = 0;
46563d4abf0Sagc /* Build a union of the items in the array, negated if necessary. */
46663d4abf0Sagc for (j = 0; j < i && status == REG_OK; j++)
46763d4abf0Sagc {
46863d4abf0Sagc int min, max;
46963d4abf0Sagc tre_literal_t *l = items[j]->obj;
47063d4abf0Sagc min = l->code_min;
47163d4abf0Sagc max = l->code_max;
47263d4abf0Sagc
47363d4abf0Sagc DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
47463d4abf0Sagc (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));
47563d4abf0Sagc
47663d4abf0Sagc if (negate)
47763d4abf0Sagc {
47863d4abf0Sagc if (min < curr_max)
47963d4abf0Sagc {
48063d4abf0Sagc /* Overlap. */
48163d4abf0Sagc curr_max = MAX(max + 1, curr_max);
48263d4abf0Sagc DPRINT(("overlap, curr_max = %d\n", curr_max));
48363d4abf0Sagc l = NULL;
48463d4abf0Sagc }
48563d4abf0Sagc else
48663d4abf0Sagc {
48763d4abf0Sagc /* No overlap. */
48863d4abf0Sagc curr_max = min - 1;
48963d4abf0Sagc if (curr_max >= curr_min)
49063d4abf0Sagc {
49163d4abf0Sagc DPRINT(("no overlap\n"));
49263d4abf0Sagc l->code_min = curr_min;
49363d4abf0Sagc l->code_max = curr_max;
49463d4abf0Sagc }
49563d4abf0Sagc else
49663d4abf0Sagc {
49763d4abf0Sagc DPRINT(("no overlap, zero room\n"));
49863d4abf0Sagc l = NULL;
49963d4abf0Sagc }
50063d4abf0Sagc curr_min = curr_max = max + 1;
50163d4abf0Sagc }
50263d4abf0Sagc }
50363d4abf0Sagc
50463d4abf0Sagc if (l != NULL)
50563d4abf0Sagc {
50663d4abf0Sagc int k;
50763d4abf0Sagc DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
50863d4abf0Sagc l->position = ctx->position;
50963d4abf0Sagc if (num_neg_classes > 0)
51063d4abf0Sagc {
51163d4abf0Sagc l->neg_classes = tre_mem_alloc(ctx->mem,
512*f6cec12cSrin (sizeof(*l->neg_classes)
51363d4abf0Sagc * (num_neg_classes + 1)));
51463d4abf0Sagc if (l->neg_classes == NULL)
51563d4abf0Sagc {
51663d4abf0Sagc status = REG_ESPACE;
51763d4abf0Sagc break;
51863d4abf0Sagc }
51963d4abf0Sagc for (k = 0; k < num_neg_classes; k++)
52063d4abf0Sagc l->neg_classes[k] = neg_classes[k];
52163d4abf0Sagc l->neg_classes[k] = (tre_ctype_t)0;
52263d4abf0Sagc }
52363d4abf0Sagc else
52463d4abf0Sagc l->neg_classes = NULL;
52563d4abf0Sagc if (node == NULL)
52663d4abf0Sagc node = items[j];
52763d4abf0Sagc else
52863d4abf0Sagc {
52963d4abf0Sagc u = tre_ast_new_union(ctx->mem, node, items[j]);
53063d4abf0Sagc if (u == NULL)
53163d4abf0Sagc status = REG_ESPACE;
53263d4abf0Sagc node = u;
53363d4abf0Sagc }
53463d4abf0Sagc }
53563d4abf0Sagc }
53663d4abf0Sagc
53763d4abf0Sagc if (status != REG_OK)
53863d4abf0Sagc goto parse_bracket_done;
53963d4abf0Sagc
54063d4abf0Sagc if (negate)
54163d4abf0Sagc {
54263d4abf0Sagc int k;
54363d4abf0Sagc DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
54463d4abf0Sagc n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
54563d4abf0Sagc if (n == NULL)
54663d4abf0Sagc status = REG_ESPACE;
54763d4abf0Sagc else
54863d4abf0Sagc {
54963d4abf0Sagc tre_literal_t *l = n->obj;
55063d4abf0Sagc if (num_neg_classes > 0)
55163d4abf0Sagc {
55263d4abf0Sagc l->neg_classes = tre_mem_alloc(ctx->mem,
553*f6cec12cSrin (sizeof(*l->neg_classes)
55463d4abf0Sagc * (num_neg_classes + 1)));
55563d4abf0Sagc if (l->neg_classes == NULL)
55663d4abf0Sagc {
55763d4abf0Sagc status = REG_ESPACE;
55863d4abf0Sagc goto parse_bracket_done;
55963d4abf0Sagc }
56063d4abf0Sagc for (k = 0; k < num_neg_classes; k++)
56163d4abf0Sagc l->neg_classes[k] = neg_classes[k];
56263d4abf0Sagc l->neg_classes[k] = (tre_ctype_t)0;
56363d4abf0Sagc }
56463d4abf0Sagc else
56563d4abf0Sagc l->neg_classes = NULL;
56663d4abf0Sagc if (node == NULL)
56763d4abf0Sagc node = n;
56863d4abf0Sagc else
56963d4abf0Sagc {
57063d4abf0Sagc u = tre_ast_new_union(ctx->mem, node, n);
57163d4abf0Sagc if (u == NULL)
57263d4abf0Sagc status = REG_ESPACE;
57363d4abf0Sagc node = u;
57463d4abf0Sagc }
57563d4abf0Sagc }
57663d4abf0Sagc }
57763d4abf0Sagc
57863d4abf0Sagc if (status != REG_OK)
57963d4abf0Sagc goto parse_bracket_done;
58063d4abf0Sagc
58163d4abf0Sagc #ifdef TRE_DEBUG
58263d4abf0Sagc tre_ast_print(node);
58363d4abf0Sagc #endif /* TRE_DEBUG */
58463d4abf0Sagc
58563d4abf0Sagc parse_bracket_done:
58663d4abf0Sagc xfree(items);
58763d4abf0Sagc ctx->position++;
58863d4abf0Sagc *result = node;
58963d4abf0Sagc return status;
59063d4abf0Sagc }
59163d4abf0Sagc
59263d4abf0Sagc
59363d4abf0Sagc /* Parses a positive decimal integer. Returns -1 if the string does not
59463d4abf0Sagc contain a valid number. */
59563d4abf0Sagc static int
tre_parse_int(const tre_char_t ** regex,const tre_char_t * regex_end)59663d4abf0Sagc tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
59763d4abf0Sagc {
59863d4abf0Sagc int num = -1;
59963d4abf0Sagc const tre_char_t *r = *regex;
60063d4abf0Sagc while (r < regex_end && *r >= L'0' && *r <= L'9')
60163d4abf0Sagc {
60263d4abf0Sagc if (num < 0)
60363d4abf0Sagc num = 0;
60463d4abf0Sagc num = num * 10 + *r - L'0';
60563d4abf0Sagc r++;
60663d4abf0Sagc }
60763d4abf0Sagc *regex = r;
60863d4abf0Sagc return num;
60963d4abf0Sagc }
61063d4abf0Sagc
61163d4abf0Sagc
61263d4abf0Sagc static reg_errcode_t
tre_parse_bound(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)61363d4abf0Sagc tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
61463d4abf0Sagc {
61563d4abf0Sagc int min, max, i;
61663d4abf0Sagc int cost_ins, cost_del, cost_subst, cost_max;
61763d4abf0Sagc int limit_ins, limit_del, limit_subst, limit_err;
61863d4abf0Sagc const tre_char_t *r = ctx->re;
61963d4abf0Sagc const tre_char_t *start;
62063d4abf0Sagc int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
62163d4abf0Sagc int approx = 0;
62263d4abf0Sagc int costs_set = 0;
62363d4abf0Sagc int counts_set = 0;
62463d4abf0Sagc
62563d4abf0Sagc cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
62663d4abf0Sagc limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
62763d4abf0Sagc
62863d4abf0Sagc /* Parse number (minimum repetition count). */
62963d4abf0Sagc min = -1;
63063d4abf0Sagc if (r < ctx->re_end && *r >= L'0' && *r <= L'9') {
63163d4abf0Sagc DPRINT(("tre_parse: min count: '%.*" STRF "'\n", REST(r)));
63263d4abf0Sagc min = tre_parse_int(&r, ctx->re_end);
63363d4abf0Sagc }
63463d4abf0Sagc
63563d4abf0Sagc /* Parse comma and second number (maximum repetition count). */
63663d4abf0Sagc max = min;
63763d4abf0Sagc if (r < ctx->re_end && *r == CHAR_COMMA)
63863d4abf0Sagc {
63963d4abf0Sagc r++;
64063d4abf0Sagc DPRINT(("tre_parse: max count: '%.*" STRF "'\n", REST(r)));
64163d4abf0Sagc max = tre_parse_int(&r, ctx->re_end);
64263d4abf0Sagc }
64363d4abf0Sagc
64463d4abf0Sagc /* Check that the repeat counts are sane. */
64563d4abf0Sagc if ((max >= 0 && min > max) || max > RE_DUP_MAX)
64663d4abf0Sagc return REG_BADBR;
64763d4abf0Sagc
64863d4abf0Sagc
64963d4abf0Sagc /*
65063d4abf0Sagc '{'
65163d4abf0Sagc optionally followed immediately by a number == minimum repcount
65263d4abf0Sagc optionally followed by , then a number == maximum repcount
65363d4abf0Sagc + then a number == maximum insertion count
65463d4abf0Sagc - then a number == maximum deletion count
65563d4abf0Sagc # then a number == maximum substitution count
65663d4abf0Sagc ~ then a number == maximum number of errors
65763d4abf0Sagc Any of +, -, # or ~ without followed by a number means that
65863d4abf0Sagc the maximum count/number of errors is infinite.
65963d4abf0Sagc
66063d4abf0Sagc An equation of the form
66163d4abf0Sagc Xi + Yd + Zs < C
66263d4abf0Sagc can be specified to set costs and the cost limit to a value
66363d4abf0Sagc different from the default value:
66463d4abf0Sagc - X is the cost of an insertion
66563d4abf0Sagc - Y is the cost of a deletion
66663d4abf0Sagc - Z is the cost of a substitution
66763d4abf0Sagc - C is the maximum cost
66863d4abf0Sagc
66963d4abf0Sagc If no count limit or cost is set for an operation, the operation
67063d4abf0Sagc is not allowed at all.
67163d4abf0Sagc */
67263d4abf0Sagc
67363d4abf0Sagc
67463d4abf0Sagc do {
67563d4abf0Sagc int done;
67663d4abf0Sagc start = r;
67763d4abf0Sagc
67863d4abf0Sagc /* Parse count limit settings */
67963d4abf0Sagc done = 0;
68063d4abf0Sagc if (!counts_set)
68163d4abf0Sagc while (r + 1 < ctx->re_end && !done)
68263d4abf0Sagc {
68363d4abf0Sagc switch (*r)
68463d4abf0Sagc {
68563d4abf0Sagc case CHAR_PLUS: /* Insert limit */
68663d4abf0Sagc DPRINT(("tre_parse: ins limit: '%.*" STRF "'\n", REST(r)));
68763d4abf0Sagc r++;
68863d4abf0Sagc limit_ins = tre_parse_int(&r, ctx->re_end);
68963d4abf0Sagc if (limit_ins < 0)
69063d4abf0Sagc limit_ins = INT_MAX;
69163d4abf0Sagc counts_set = 1;
69263d4abf0Sagc break;
69363d4abf0Sagc case CHAR_MINUS: /* Delete limit */
69463d4abf0Sagc DPRINT(("tre_parse: del limit: '%.*" STRF "'\n", REST(r)));
69563d4abf0Sagc r++;
69663d4abf0Sagc limit_del = tre_parse_int(&r, ctx->re_end);
69763d4abf0Sagc if (limit_del < 0)
69863d4abf0Sagc limit_del = INT_MAX;
69963d4abf0Sagc counts_set = 1;
70063d4abf0Sagc break;
70163d4abf0Sagc case CHAR_HASH: /* Substitute limit */
70263d4abf0Sagc DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
70363d4abf0Sagc r++;
70463d4abf0Sagc limit_subst = tre_parse_int(&r, ctx->re_end);
70563d4abf0Sagc if (limit_subst < 0)
70663d4abf0Sagc limit_subst = INT_MAX;
70763d4abf0Sagc counts_set = 1;
70863d4abf0Sagc break;
70963d4abf0Sagc case CHAR_TILDE: /* Maximum number of changes */
71063d4abf0Sagc DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
71163d4abf0Sagc r++;
71263d4abf0Sagc limit_err = tre_parse_int(&r, ctx->re_end);
71363d4abf0Sagc if (limit_err < 0)
71463d4abf0Sagc limit_err = INT_MAX;
71563d4abf0Sagc approx = 1;
71663d4abf0Sagc break;
71763d4abf0Sagc case CHAR_COMMA:
71863d4abf0Sagc r++;
71963d4abf0Sagc break;
72063d4abf0Sagc case L' ':
72163d4abf0Sagc r++;
72263d4abf0Sagc break;
72363d4abf0Sagc case L'}':
72463d4abf0Sagc done = 1;
72563d4abf0Sagc break;
72663d4abf0Sagc default:
72763d4abf0Sagc done = 1;
72863d4abf0Sagc break;
72963d4abf0Sagc }
73063d4abf0Sagc }
73163d4abf0Sagc
73263d4abf0Sagc /* Parse cost restriction equation. */
73363d4abf0Sagc done = 0;
73463d4abf0Sagc if (!costs_set)
73563d4abf0Sagc while (r + 1 < ctx->re_end && !done)
73663d4abf0Sagc {
73763d4abf0Sagc switch (*r)
73863d4abf0Sagc {
73963d4abf0Sagc case CHAR_PLUS:
74063d4abf0Sagc case L' ':
74163d4abf0Sagc r++;
74263d4abf0Sagc break;
74363d4abf0Sagc case L'<':
74463d4abf0Sagc DPRINT(("tre_parse: max cost: '%.*" STRF "'\n", REST(r)));
74563d4abf0Sagc r++;
74663d4abf0Sagc while (*r == L' ')
74763d4abf0Sagc r++;
74863d4abf0Sagc cost_max = tre_parse_int(&r, ctx->re_end);
74963d4abf0Sagc if (cost_max < 0)
75063d4abf0Sagc cost_max = INT_MAX;
75163d4abf0Sagc else
75263d4abf0Sagc cost_max--;
75363d4abf0Sagc approx = 1;
75463d4abf0Sagc break;
75563d4abf0Sagc case CHAR_COMMA:
75663d4abf0Sagc r++;
75763d4abf0Sagc done = 1;
75863d4abf0Sagc break;
75963d4abf0Sagc default:
76063d4abf0Sagc if (*r >= L'0' && *r <= L'9')
76163d4abf0Sagc {
76263d4abf0Sagc #ifdef TRE_DEBUG
76363d4abf0Sagc const tre_char_t *sr = r;
76463d4abf0Sagc #endif /* TRE_DEBUG */
76563d4abf0Sagc int cost = tre_parse_int(&r, ctx->re_end);
76663d4abf0Sagc /* XXX - make sure r is not past end. */
76763d4abf0Sagc switch (*r)
76863d4abf0Sagc {
76963d4abf0Sagc case L'i': /* Insert cost */
77063d4abf0Sagc DPRINT(("tre_parse: ins cost: '%.*" STRF "'\n",
77163d4abf0Sagc REST(sr)));
77263d4abf0Sagc r++;
77363d4abf0Sagc cost_ins = cost;
77463d4abf0Sagc costs_set = 1;
77563d4abf0Sagc break;
77663d4abf0Sagc case L'd': /* Delete cost */
77763d4abf0Sagc DPRINT(("tre_parse: del cost: '%.*" STRF "'\n",
77863d4abf0Sagc REST(sr)));
77963d4abf0Sagc r++;
78063d4abf0Sagc cost_del = cost;
78163d4abf0Sagc costs_set = 1;
78263d4abf0Sagc break;
78363d4abf0Sagc case L's': /* Substitute cost */
78463d4abf0Sagc DPRINT(("tre_parse: subst cost: '%.*" STRF "'\n",
78563d4abf0Sagc REST(sr)));
78663d4abf0Sagc r++;
78763d4abf0Sagc cost_subst = cost;
78863d4abf0Sagc costs_set = 1;
78963d4abf0Sagc break;
79063d4abf0Sagc default:
79163d4abf0Sagc return REG_BADBR;
79263d4abf0Sagc }
79363d4abf0Sagc }
79463d4abf0Sagc else
79563d4abf0Sagc {
79663d4abf0Sagc done = 1;
79763d4abf0Sagc break;
79863d4abf0Sagc }
79963d4abf0Sagc }
80063d4abf0Sagc }
80163d4abf0Sagc } while (start != r);
80263d4abf0Sagc
80363d4abf0Sagc /* Missing }. */
80463d4abf0Sagc if (r >= ctx->re_end)
80563d4abf0Sagc return REG_EBRACE;
80663d4abf0Sagc
80763d4abf0Sagc /* Empty contents of {}. */
80863d4abf0Sagc if (r == ctx->re)
80963d4abf0Sagc return REG_BADBR;
81063d4abf0Sagc
81163d4abf0Sagc /* Parse the ending '}' or '\}'.*/
81263d4abf0Sagc if (ctx->cflags & REG_EXTENDED)
81363d4abf0Sagc {
81463d4abf0Sagc if (r >= ctx->re_end || *r != CHAR_RBRACE)
81563d4abf0Sagc return REG_BADBR;
81663d4abf0Sagc r++;
81763d4abf0Sagc }
81863d4abf0Sagc else
81963d4abf0Sagc {
82063d4abf0Sagc if (r + 1 >= ctx->re_end
82163d4abf0Sagc || *r != CHAR_BACKSLASH
82263d4abf0Sagc || *(r + 1) != CHAR_RBRACE)
82363d4abf0Sagc return REG_BADBR;
82463d4abf0Sagc r += 2;
82563d4abf0Sagc }
82663d4abf0Sagc
82763d4abf0Sagc
82863d4abf0Sagc /* Parse trailing '?' marking minimal repetition. */
82963d4abf0Sagc if (r < ctx->re_end)
83063d4abf0Sagc {
83163d4abf0Sagc if (*r == CHAR_QUESTIONMARK)
83263d4abf0Sagc {
83363d4abf0Sagc minimal = !(ctx->cflags & REG_UNGREEDY);
83463d4abf0Sagc r++;
83563d4abf0Sagc }
83663d4abf0Sagc else if (*r == CHAR_STAR || *r == CHAR_PLUS)
83763d4abf0Sagc {
83863d4abf0Sagc /* These are reserved for future extensions. */
83963d4abf0Sagc return REG_BADRPT;
84063d4abf0Sagc }
84163d4abf0Sagc }
84263d4abf0Sagc
84363d4abf0Sagc /* Create the AST node(s). */
84463d4abf0Sagc if (min == 0 && max == 0)
84563d4abf0Sagc {
84663d4abf0Sagc *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
84763d4abf0Sagc if (*result == NULL)
84863d4abf0Sagc return REG_ESPACE;
84963d4abf0Sagc }
85063d4abf0Sagc else
85163d4abf0Sagc {
85263d4abf0Sagc if (min < 0 && max < 0)
85363d4abf0Sagc /* Only approximate parameters set, no repetitions. */
85463d4abf0Sagc min = max = 1;
85563d4abf0Sagc
85663d4abf0Sagc *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
85763d4abf0Sagc if (!*result)
85863d4abf0Sagc return REG_ESPACE;
85963d4abf0Sagc
86063d4abf0Sagc /* If approximate matching parameters are set, add them to the
86163d4abf0Sagc iteration node. */
86263d4abf0Sagc if (approx || costs_set || counts_set)
86363d4abf0Sagc {
86463d4abf0Sagc int *params;
86563d4abf0Sagc tre_iteration_t *iter = (*result)->obj;
86663d4abf0Sagc
86763d4abf0Sagc if (costs_set || counts_set)
86863d4abf0Sagc {
86963d4abf0Sagc if (limit_ins == TRE_PARAM_UNSET)
87063d4abf0Sagc {
87163d4abf0Sagc if (cost_ins == TRE_PARAM_UNSET)
87263d4abf0Sagc limit_ins = 0;
87363d4abf0Sagc else
87463d4abf0Sagc limit_ins = INT_MAX;
87563d4abf0Sagc }
87663d4abf0Sagc
87763d4abf0Sagc if (limit_del == TRE_PARAM_UNSET)
87863d4abf0Sagc {
87963d4abf0Sagc if (cost_del == TRE_PARAM_UNSET)
88063d4abf0Sagc limit_del = 0;
88163d4abf0Sagc else
88263d4abf0Sagc limit_del = INT_MAX;
88363d4abf0Sagc }
88463d4abf0Sagc
88563d4abf0Sagc if (limit_subst == TRE_PARAM_UNSET)
88663d4abf0Sagc {
88763d4abf0Sagc if (cost_subst == TRE_PARAM_UNSET)
88863d4abf0Sagc limit_subst = 0;
88963d4abf0Sagc else
89063d4abf0Sagc limit_subst = INT_MAX;
89163d4abf0Sagc }
89263d4abf0Sagc }
89363d4abf0Sagc
89463d4abf0Sagc if (cost_max == TRE_PARAM_UNSET)
89563d4abf0Sagc cost_max = INT_MAX;
89663d4abf0Sagc if (limit_err == TRE_PARAM_UNSET)
89763d4abf0Sagc limit_err = INT_MAX;
89863d4abf0Sagc
89963d4abf0Sagc ctx->have_approx = 1;
90063d4abf0Sagc params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
90163d4abf0Sagc if (!params)
90263d4abf0Sagc return REG_ESPACE;
90363d4abf0Sagc for (i = 0; i < TRE_PARAM_LAST; i++)
90463d4abf0Sagc params[i] = TRE_PARAM_UNSET;
90563d4abf0Sagc params[TRE_PARAM_COST_INS] = cost_ins;
90663d4abf0Sagc params[TRE_PARAM_COST_DEL] = cost_del;
90763d4abf0Sagc params[TRE_PARAM_COST_SUBST] = cost_subst;
90863d4abf0Sagc params[TRE_PARAM_COST_MAX] = cost_max;
90963d4abf0Sagc params[TRE_PARAM_MAX_INS] = limit_ins;
91063d4abf0Sagc params[TRE_PARAM_MAX_DEL] = limit_del;
91163d4abf0Sagc params[TRE_PARAM_MAX_SUBST] = limit_subst;
91263d4abf0Sagc params[TRE_PARAM_MAX_ERR] = limit_err;
91363d4abf0Sagc iter->params = params;
91463d4abf0Sagc }
91563d4abf0Sagc }
91663d4abf0Sagc
91763d4abf0Sagc DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
91863d4abf0Sagc "limits [%d,%d,%d, total %d]\n",
91963d4abf0Sagc min, max, cost_ins, cost_del, cost_subst, cost_max,
92063d4abf0Sagc limit_ins, limit_del, limit_subst, limit_err));
92163d4abf0Sagc
92263d4abf0Sagc
92363d4abf0Sagc ctx->re = r;
92463d4abf0Sagc return REG_OK;
92563d4abf0Sagc }
92663d4abf0Sagc
92763d4abf0Sagc typedef enum {
92863d4abf0Sagc PARSE_RE = 0,
92963d4abf0Sagc PARSE_ATOM,
93063d4abf0Sagc PARSE_MARK_FOR_SUBMATCH,
93163d4abf0Sagc PARSE_BRANCH,
93263d4abf0Sagc PARSE_PIECE,
93363d4abf0Sagc PARSE_CATENATION,
93463d4abf0Sagc PARSE_POST_CATENATION,
93563d4abf0Sagc PARSE_UNION,
93663d4abf0Sagc PARSE_POST_UNION,
93763d4abf0Sagc PARSE_POSTFIX,
93863d4abf0Sagc PARSE_RESTORE_CFLAGS
93963d4abf0Sagc } tre_parse_re_stack_symbol_t;
94063d4abf0Sagc
94163d4abf0Sagc
94263d4abf0Sagc reg_errcode_t
tre_parse(tre_parse_ctx_t * ctx)94363d4abf0Sagc tre_parse(tre_parse_ctx_t *ctx)
94463d4abf0Sagc {
94563d4abf0Sagc tre_ast_node_t *result = NULL;
94663d4abf0Sagc tre_parse_re_stack_symbol_t symbol;
94763d4abf0Sagc reg_errcode_t status = REG_OK;
94863d4abf0Sagc tre_stack_t *stack = ctx->stack;
94963d4abf0Sagc int bottom = tre_stack_num_objects(stack);
95063d4abf0Sagc int depth = 0;
95163d4abf0Sagc int temporary_cflags = 0;
95263d4abf0Sagc
95363d4abf0Sagc DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n",
95463d4abf0Sagc ctx->len, ctx->re, ctx->len));
95563d4abf0Sagc
95663d4abf0Sagc if (!ctx->nofirstsub)
95763d4abf0Sagc {
958f2a3d147Schristos STACK_PUSH(stack, long, ctx->submatch_id);
959f2a3d147Schristos STACK_PUSH(stack, long, PARSE_MARK_FOR_SUBMATCH);
96063d4abf0Sagc ctx->submatch_id++;
96163d4abf0Sagc }
962f2a3d147Schristos STACK_PUSH(stack, long, PARSE_RE);
96363d4abf0Sagc ctx->re_start = ctx->re;
96463d4abf0Sagc ctx->re_end = ctx->re + ctx->len;
96563d4abf0Sagc
96663d4abf0Sagc
96763d4abf0Sagc /* The following is basically just a recursive descent parser. I use
96863d4abf0Sagc an explicit stack instead of recursive functions mostly because of
96963d4abf0Sagc two reasons: compatibility with systems which have an overflowable
97063d4abf0Sagc call stack, and efficiency (both in lines of code and speed). */
97163d4abf0Sagc while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
97263d4abf0Sagc {
97363d4abf0Sagc if (status != REG_OK)
97463d4abf0Sagc break;
97563d4abf0Sagc symbol = tre_stack_pop_int(stack);
97663d4abf0Sagc switch (symbol)
97763d4abf0Sagc {
97863d4abf0Sagc case PARSE_RE:
97963d4abf0Sagc /* Parse a full regexp. A regexp is one or more branches,
98063d4abf0Sagc separated by the union operator `|'. */
98163d4abf0Sagc #ifdef REG_LITERAL
98263d4abf0Sagc if (!(ctx->cflags & REG_LITERAL)
98363d4abf0Sagc && ctx->cflags & REG_EXTENDED)
98463d4abf0Sagc #endif /* REG_LITERAL */
985f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_UNION);
986f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_BRANCH);
98763d4abf0Sagc break;
98863d4abf0Sagc
98963d4abf0Sagc case PARSE_BRANCH:
99063d4abf0Sagc /* Parse a branch. A branch is one or more pieces, concatenated.
99163d4abf0Sagc A piece is an atom possibly followed by a postfix operator. */
992f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_CATENATION);
993f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_PIECE);
99463d4abf0Sagc break;
99563d4abf0Sagc
99663d4abf0Sagc case PARSE_PIECE:
99763d4abf0Sagc /* Parse a piece. A piece is an atom possibly followed by one
99863d4abf0Sagc or more postfix operators. */
99963d4abf0Sagc #ifdef REG_LITERAL
100063d4abf0Sagc if (!(ctx->cflags & REG_LITERAL))
100163d4abf0Sagc #endif /* REG_LITERAL */
1002f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POSTFIX);
1003f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_ATOM);
100463d4abf0Sagc break;
100563d4abf0Sagc
100663d4abf0Sagc case PARSE_CATENATION:
100763d4abf0Sagc /* If the expression has not ended, parse another piece. */
100863d4abf0Sagc {
100963d4abf0Sagc tre_char_t c;
101063d4abf0Sagc if (ctx->re >= ctx->re_end)
101163d4abf0Sagc break;
101263d4abf0Sagc c = *ctx->re;
101363d4abf0Sagc #ifdef REG_LITERAL
101463d4abf0Sagc if (!(ctx->cflags & REG_LITERAL))
101563d4abf0Sagc {
101663d4abf0Sagc #endif /* REG_LITERAL */
101763d4abf0Sagc if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
101863d4abf0Sagc break;
101963d4abf0Sagc if ((ctx->cflags & REG_EXTENDED
102063d4abf0Sagc && c == CHAR_RPAREN && depth > 0)
102163d4abf0Sagc || (!(ctx->cflags & REG_EXTENDED)
102263d4abf0Sagc && (c == CHAR_BACKSLASH
102363d4abf0Sagc && *(ctx->re + 1) == CHAR_RPAREN)))
102463d4abf0Sagc {
102563d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
102663d4abf0Sagc status = REG_EPAREN;
102763d4abf0Sagc DPRINT(("tre_parse: group end: '%.*" STRF "'\n",
102863d4abf0Sagc REST(ctx->re)));
102963d4abf0Sagc depth--;
103063d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED))
103163d4abf0Sagc ctx->re += 2;
103263d4abf0Sagc break;
103363d4abf0Sagc }
103463d4abf0Sagc #ifdef REG_LITERAL
103563d4abf0Sagc }
103663d4abf0Sagc #endif /* REG_LITERAL */
103763d4abf0Sagc
103863d4abf0Sagc #ifdef REG_RIGHT_ASSOC
103963d4abf0Sagc if (ctx->cflags & REG_RIGHT_ASSOC)
104063d4abf0Sagc {
104163d4abf0Sagc /* Right associative concatenation. */
104263d4abf0Sagc STACK_PUSHX(stack, voidptr, result);
1043f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POST_CATENATION);
1044f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_CATENATION);
1045f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_PIECE);
104663d4abf0Sagc }
104763d4abf0Sagc else
104863d4abf0Sagc #endif /* REG_RIGHT_ASSOC */
104963d4abf0Sagc {
105063d4abf0Sagc /* Default case, left associative concatenation. */
1051f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_CATENATION);
105263d4abf0Sagc STACK_PUSHX(stack, voidptr, result);
1053f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POST_CATENATION);
1054f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_PIECE);
105563d4abf0Sagc }
105663d4abf0Sagc break;
105763d4abf0Sagc }
105863d4abf0Sagc
105963d4abf0Sagc case PARSE_POST_CATENATION:
106063d4abf0Sagc {
106163d4abf0Sagc tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
106263d4abf0Sagc tre_ast_node_t *tmp_node;
106363d4abf0Sagc tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
106463d4abf0Sagc if (!tmp_node)
106563d4abf0Sagc return REG_ESPACE;
106663d4abf0Sagc result = tmp_node;
106763d4abf0Sagc break;
106863d4abf0Sagc }
106963d4abf0Sagc
107063d4abf0Sagc case PARSE_UNION:
107163d4abf0Sagc if (ctx->re >= ctx->re_end)
107263d4abf0Sagc break;
107363d4abf0Sagc #ifdef REG_LITERAL
107463d4abf0Sagc if (ctx->cflags & REG_LITERAL)
107563d4abf0Sagc break;
107663d4abf0Sagc #endif /* REG_LITERAL */
107763d4abf0Sagc switch (*ctx->re)
107863d4abf0Sagc {
107963d4abf0Sagc case CHAR_PIPE:
108063d4abf0Sagc DPRINT(("tre_parse: union: '%.*" STRF "'\n",
108163d4abf0Sagc REST(ctx->re)));
1082f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_UNION);
108363d4abf0Sagc STACK_PUSHX(stack, voidptr, result);
1084f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POST_UNION);
1085f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_BRANCH);
108663d4abf0Sagc ctx->re++;
108763d4abf0Sagc break;
108863d4abf0Sagc
108963d4abf0Sagc case CHAR_RPAREN:
109063d4abf0Sagc ctx->re++;
109163d4abf0Sagc break;
109263d4abf0Sagc
109363d4abf0Sagc default:
109463d4abf0Sagc break;
109563d4abf0Sagc }
109663d4abf0Sagc break;
109763d4abf0Sagc
109863d4abf0Sagc case PARSE_POST_UNION:
109963d4abf0Sagc {
110063d4abf0Sagc tre_ast_node_t *tmp_node;
110163d4abf0Sagc tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
110263d4abf0Sagc tmp_node = tre_ast_new_union(ctx->mem, tree, result);
110363d4abf0Sagc if (!tmp_node)
110463d4abf0Sagc return REG_ESPACE;
110563d4abf0Sagc result = tmp_node;
110663d4abf0Sagc break;
110763d4abf0Sagc }
110863d4abf0Sagc
110963d4abf0Sagc case PARSE_POSTFIX:
111063d4abf0Sagc /* Parse postfix operators. */
111163d4abf0Sagc if (ctx->re >= ctx->re_end)
111263d4abf0Sagc break;
111363d4abf0Sagc #ifdef REG_LITERAL
111463d4abf0Sagc if (ctx->cflags & REG_LITERAL)
111563d4abf0Sagc break;
111663d4abf0Sagc #endif /* REG_LITERAL */
111763d4abf0Sagc switch (*ctx->re)
111863d4abf0Sagc {
111963d4abf0Sagc case CHAR_PLUS:
112063d4abf0Sagc case CHAR_QUESTIONMARK:
112163d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED))
112263d4abf0Sagc break;
112363d4abf0Sagc /*FALLTHROUGH*/
112463d4abf0Sagc case CHAR_STAR:
112563d4abf0Sagc {
112663d4abf0Sagc tre_ast_node_t *tmp_node;
112763d4abf0Sagc int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
112863d4abf0Sagc int rep_min = 0;
112963d4abf0Sagc int rep_max = -1;
113063d4abf0Sagc #ifdef TRE_DEBUG
113163d4abf0Sagc const tre_char_t *tmp_re;
113263d4abf0Sagc #endif
113363d4abf0Sagc
113463d4abf0Sagc if (*ctx->re == CHAR_PLUS)
113563d4abf0Sagc rep_min = 1;
113663d4abf0Sagc if (*ctx->re == CHAR_QUESTIONMARK)
113763d4abf0Sagc rep_max = 1;
113863d4abf0Sagc #ifdef TRE_DEBUG
113963d4abf0Sagc tmp_re = ctx->re;
114063d4abf0Sagc #endif
114163d4abf0Sagc
114263d4abf0Sagc if (ctx->re + 1 < ctx->re_end)
114363d4abf0Sagc {
114463d4abf0Sagc if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
114563d4abf0Sagc {
114663d4abf0Sagc minimal = !(ctx->cflags & REG_UNGREEDY);
114763d4abf0Sagc ctx->re++;
114863d4abf0Sagc }
114963d4abf0Sagc else if (*(ctx->re + 1) == CHAR_STAR
115063d4abf0Sagc || *(ctx->re + 1) == CHAR_PLUS)
115163d4abf0Sagc {
115263d4abf0Sagc /* These are reserved for future extensions. */
115363d4abf0Sagc return REG_BADRPT;
115463d4abf0Sagc }
115563d4abf0Sagc }
115663d4abf0Sagc
115763d4abf0Sagc DPRINT(("tre_parse: %s star: '%.*" STRF "'\n",
115863d4abf0Sagc minimal ? " minimal" : "greedy", REST(tmp_re)));
115963d4abf0Sagc ctx->re++;
116063d4abf0Sagc tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
116163d4abf0Sagc minimal);
116263d4abf0Sagc if (tmp_node == NULL)
116363d4abf0Sagc return REG_ESPACE;
116463d4abf0Sagc result = tmp_node;
1165f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POSTFIX);
116663d4abf0Sagc }
116763d4abf0Sagc break;
116863d4abf0Sagc
116963d4abf0Sagc case CHAR_BACKSLASH:
117063d4abf0Sagc /* "\{" is special without REG_EXTENDED */
117163d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED)
117263d4abf0Sagc && ctx->re + 1 < ctx->re_end
117363d4abf0Sagc && *(ctx->re + 1) == CHAR_LBRACE)
117463d4abf0Sagc {
117563d4abf0Sagc ctx->re++;
117663d4abf0Sagc goto parse_brace;
117763d4abf0Sagc }
117863d4abf0Sagc else
117963d4abf0Sagc break;
118063d4abf0Sagc
118163d4abf0Sagc case CHAR_LBRACE:
118263d4abf0Sagc /* "{" is literal without REG_EXTENDED */
118363d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED))
118463d4abf0Sagc break;
118563d4abf0Sagc
118663d4abf0Sagc parse_brace:
118763d4abf0Sagc DPRINT(("tre_parse: bound: '%.*" STRF "'\n",
118863d4abf0Sagc REST(ctx->re)));
118963d4abf0Sagc ctx->re++;
119063d4abf0Sagc
119163d4abf0Sagc status = tre_parse_bound(ctx, &result);
119263d4abf0Sagc if (status != REG_OK)
119363d4abf0Sagc return status;
1194f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_POSTFIX);
119563d4abf0Sagc break;
119663d4abf0Sagc }
119763d4abf0Sagc break;
119863d4abf0Sagc
119963d4abf0Sagc case PARSE_ATOM:
120063d4abf0Sagc /* Parse an atom. An atom is a regular expression enclosed in `()',
120163d4abf0Sagc an empty set of `()', a bracket expression, `.', `^', `$',
120263d4abf0Sagc a `\' followed by a character, or a single character. */
120363d4abf0Sagc
120463d4abf0Sagc /* End of regexp? (empty string). */
120563d4abf0Sagc if (ctx->re >= ctx->re_end)
120663d4abf0Sagc goto parse_literal;
120763d4abf0Sagc
120863d4abf0Sagc #ifdef REG_LITERAL
120963d4abf0Sagc if (ctx->cflags & REG_LITERAL)
121063d4abf0Sagc goto parse_literal;
121163d4abf0Sagc #endif /* REG_LITERAL */
121263d4abf0Sagc
121363d4abf0Sagc switch (*ctx->re)
121463d4abf0Sagc {
121563d4abf0Sagc case CHAR_LPAREN: /* parenthesized subexpression */
121663d4abf0Sagc
121763d4abf0Sagc /* Handle "(?...)" extensions. They work in a way similar
121863d4abf0Sagc to Perls corresponding extensions. */
121963d4abf0Sagc if (ctx->cflags & REG_EXTENDED
122063d4abf0Sagc && *(ctx->re + 1) == CHAR_QUESTIONMARK)
122163d4abf0Sagc {
122263d4abf0Sagc int new_cflags = ctx->cflags;
122363d4abf0Sagc int bit = 1;
122463d4abf0Sagc DPRINT(("tre_parse: extension: '%.*" STRF "\n",
122563d4abf0Sagc REST(ctx->re)));
122663d4abf0Sagc ctx->re += 2;
122713498f30Srin while (/*CONSTCOND*/(void)1,1)
122863d4abf0Sagc {
122963d4abf0Sagc if (*ctx->re == L'i')
123063d4abf0Sagc {
123163d4abf0Sagc DPRINT(("tre_parse: icase: '%.*" STRF "\n",
123263d4abf0Sagc REST(ctx->re)));
123363d4abf0Sagc if (bit)
123463d4abf0Sagc new_cflags |= REG_ICASE;
123563d4abf0Sagc else
123663d4abf0Sagc new_cflags &= ~REG_ICASE;
123763d4abf0Sagc ctx->re++;
123863d4abf0Sagc }
123963d4abf0Sagc else if (*ctx->re == L'n')
124063d4abf0Sagc {
124163d4abf0Sagc DPRINT(("tre_parse: newline: '%.*" STRF "\n",
124263d4abf0Sagc REST(ctx->re)));
124363d4abf0Sagc if (bit)
124463d4abf0Sagc new_cflags |= REG_NEWLINE;
124563d4abf0Sagc else
124663d4abf0Sagc new_cflags &= ~REG_NEWLINE;
124763d4abf0Sagc ctx->re++;
124863d4abf0Sagc }
124963d4abf0Sagc #ifdef REG_RIGHT_ASSOC
125063d4abf0Sagc else if (*ctx->re == L'r')
125163d4abf0Sagc {
125263d4abf0Sagc DPRINT(("tre_parse: right assoc: '%.*" STRF "\n",
125363d4abf0Sagc REST(ctx->re)));
125463d4abf0Sagc if (bit)
125563d4abf0Sagc new_cflags |= REG_RIGHT_ASSOC;
125663d4abf0Sagc else
125763d4abf0Sagc new_cflags &= ~REG_RIGHT_ASSOC;
125863d4abf0Sagc ctx->re++;
125963d4abf0Sagc }
126063d4abf0Sagc #endif /* REG_RIGHT_ASSOC */
126163d4abf0Sagc #ifdef REG_UNGREEDY
126263d4abf0Sagc else if (*ctx->re == L'U')
126363d4abf0Sagc {
126463d4abf0Sagc DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n",
126563d4abf0Sagc REST(ctx->re)));
126663d4abf0Sagc if (bit)
126763d4abf0Sagc new_cflags |= REG_UNGREEDY;
126863d4abf0Sagc else
126963d4abf0Sagc new_cflags &= ~REG_UNGREEDY;
127063d4abf0Sagc ctx->re++;
127163d4abf0Sagc }
127263d4abf0Sagc #endif /* REG_UNGREEDY */
127363d4abf0Sagc else if (*ctx->re == CHAR_MINUS)
127463d4abf0Sagc {
127563d4abf0Sagc DPRINT(("tre_parse: turn off: '%.*" STRF "\n",
127663d4abf0Sagc REST(ctx->re)));
127763d4abf0Sagc ctx->re++;
127863d4abf0Sagc bit = 0;
127963d4abf0Sagc }
128063d4abf0Sagc else if (*ctx->re == CHAR_COLON)
128163d4abf0Sagc {
128263d4abf0Sagc DPRINT(("tre_parse: no group: '%.*" STRF "\n",
128363d4abf0Sagc REST(ctx->re)));
128463d4abf0Sagc ctx->re++;
128563d4abf0Sagc depth++;
128663d4abf0Sagc break;
128763d4abf0Sagc }
128863d4abf0Sagc else if (*ctx->re == CHAR_HASH)
128963d4abf0Sagc {
129063d4abf0Sagc DPRINT(("tre_parse: comment: '%.*" STRF "\n",
129163d4abf0Sagc REST(ctx->re)));
129263d4abf0Sagc /* A comment can contain any character except a
129363d4abf0Sagc right parenthesis */
129463d4abf0Sagc while (*ctx->re != CHAR_RPAREN
129563d4abf0Sagc && ctx->re < ctx->re_end)
129663d4abf0Sagc ctx->re++;
129763d4abf0Sagc if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
129863d4abf0Sagc {
129963d4abf0Sagc ctx->re++;
130063d4abf0Sagc break;
130163d4abf0Sagc }
130263d4abf0Sagc else
130363d4abf0Sagc return REG_BADPAT;
130463d4abf0Sagc }
130563d4abf0Sagc else if (*ctx->re == CHAR_RPAREN)
130663d4abf0Sagc {
130763d4abf0Sagc ctx->re++;
130863d4abf0Sagc break;
130963d4abf0Sagc }
131063d4abf0Sagc else
131163d4abf0Sagc return REG_BADPAT;
131263d4abf0Sagc }
131363d4abf0Sagc
131463d4abf0Sagc /* Turn on the cflags changes for the rest of the
131563d4abf0Sagc enclosing group. */
1316f2a3d147Schristos STACK_PUSHX(stack, long, ctx->cflags);
1317f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_RESTORE_CFLAGS);
1318f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_RE);
131963d4abf0Sagc ctx->cflags = new_cflags;
132063d4abf0Sagc break;
132163d4abf0Sagc }
132263d4abf0Sagc
132363d4abf0Sagc if (ctx->cflags & REG_EXTENDED
132463d4abf0Sagc || (ctx->re > ctx->re_start
132563d4abf0Sagc && *(ctx->re - 1) == CHAR_BACKSLASH))
132663d4abf0Sagc {
132763d4abf0Sagc depth++;
132863d4abf0Sagc if (ctx->re + 2 < ctx->re_end
132963d4abf0Sagc && *(ctx->re + 1) == CHAR_QUESTIONMARK
133063d4abf0Sagc && *(ctx->re + 2) == CHAR_COLON)
133163d4abf0Sagc {
133263d4abf0Sagc DPRINT(("tre_parse: group begin: '%.*" STRF
133363d4abf0Sagc "', no submatch\n", REST(ctx->re)));
133463d4abf0Sagc /* Don't mark for submatching. */
133563d4abf0Sagc ctx->re += 3;
1336f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_RE);
133763d4abf0Sagc }
133863d4abf0Sagc else
133963d4abf0Sagc {
134063d4abf0Sagc DPRINT(("tre_parse: group begin: '%.*" STRF
134163d4abf0Sagc "', submatch %d\n", REST(ctx->re),
134263d4abf0Sagc ctx->submatch_id));
134363d4abf0Sagc ctx->re++;
134463d4abf0Sagc /* First parse a whole RE, then mark the resulting tree
134563d4abf0Sagc for submatching. */
1346f2a3d147Schristos STACK_PUSHX(stack, long, ctx->submatch_id);
1347f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_MARK_FOR_SUBMATCH);
1348f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_RE);
134963d4abf0Sagc ctx->submatch_id++;
135063d4abf0Sagc }
135163d4abf0Sagc }
135263d4abf0Sagc else
135363d4abf0Sagc goto parse_literal;
135463d4abf0Sagc break;
135563d4abf0Sagc
135663d4abf0Sagc case CHAR_RPAREN: /* end of current subexpression */
135763d4abf0Sagc if ((ctx->cflags & REG_EXTENDED && depth > 0)
135863d4abf0Sagc || (ctx->re > ctx->re_start
135963d4abf0Sagc && *(ctx->re - 1) == CHAR_BACKSLASH))
136063d4abf0Sagc {
136163d4abf0Sagc DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
136263d4abf0Sagc REST(ctx->re)));
136363d4abf0Sagc /* We were expecting an atom, but instead the current
136463d4abf0Sagc subexpression was closed. POSIX leaves the meaning of
136563d4abf0Sagc this to be implementation-defined. We interpret this as
136663d4abf0Sagc an empty expression (which matches an empty string). */
136763d4abf0Sagc result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
136863d4abf0Sagc if (result == NULL)
136963d4abf0Sagc return REG_ESPACE;
137063d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED))
137163d4abf0Sagc ctx->re--;
137263d4abf0Sagc }
137363d4abf0Sagc else
137463d4abf0Sagc goto parse_literal;
137563d4abf0Sagc break;
137663d4abf0Sagc
137763d4abf0Sagc case CHAR_LBRACKET: /* bracket expression */
137863d4abf0Sagc DPRINT(("tre_parse: bracket: '%.*" STRF "'\n",
137963d4abf0Sagc REST(ctx->re)));
138063d4abf0Sagc ctx->re++;
138163d4abf0Sagc status = tre_parse_bracket(ctx, &result);
138263d4abf0Sagc if (status != REG_OK)
138363d4abf0Sagc return status;
138463d4abf0Sagc break;
138563d4abf0Sagc
138663d4abf0Sagc case CHAR_BACKSLASH:
138763d4abf0Sagc /* If this is "\(" or "\)" chew off the backslash and
138863d4abf0Sagc try again. */
138963d4abf0Sagc if (!(ctx->cflags & REG_EXTENDED)
139063d4abf0Sagc && ctx->re + 1 < ctx->re_end
139163d4abf0Sagc && (*(ctx->re + 1) == CHAR_LPAREN
139263d4abf0Sagc || *(ctx->re + 1) == CHAR_RPAREN))
139363d4abf0Sagc {
139463d4abf0Sagc ctx->re++;
1395f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_ATOM);
139663d4abf0Sagc break;
139763d4abf0Sagc }
139863d4abf0Sagc
139963d4abf0Sagc /* If a macro is used, parse the expanded macro recursively. */
140063d4abf0Sagc {
140163d4abf0Sagc tre_char_t buf[64];
140263d4abf0Sagc tre_expand_macro(ctx->re + 1, ctx->re_end,
140363d4abf0Sagc buf, elementsof(buf));
140463d4abf0Sagc if (buf[0] != 0)
140563d4abf0Sagc {
140663d4abf0Sagc tre_parse_ctx_t subctx;
140763d4abf0Sagc memcpy(&subctx, ctx, sizeof(subctx));
140863d4abf0Sagc subctx.re = buf;
140963d4abf0Sagc subctx.len = tre_strlen(buf);
141063d4abf0Sagc subctx.nofirstsub = 1;
141163d4abf0Sagc status = tre_parse(&subctx);
141263d4abf0Sagc if (status != REG_OK)
141363d4abf0Sagc return status;
141463d4abf0Sagc ctx->re += 2;
141563d4abf0Sagc ctx->position = subctx.position;
141663d4abf0Sagc result = subctx.result;
141763d4abf0Sagc break;
141863d4abf0Sagc }
141963d4abf0Sagc }
142063d4abf0Sagc
142163d4abf0Sagc if (ctx->re + 1 >= ctx->re_end)
142263d4abf0Sagc /* Trailing backslash. */
142363d4abf0Sagc return REG_EESCAPE;
142463d4abf0Sagc
142563d4abf0Sagc #ifdef REG_LITERAL
142663d4abf0Sagc if (*(ctx->re + 1) == L'Q')
142763d4abf0Sagc {
142863d4abf0Sagc DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
142963d4abf0Sagc REST(ctx->re)));
143063d4abf0Sagc ctx->cflags |= REG_LITERAL;
143163d4abf0Sagc temporary_cflags |= REG_LITERAL;
143263d4abf0Sagc ctx->re += 2;
1433f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_ATOM);
143463d4abf0Sagc break;
143563d4abf0Sagc }
143663d4abf0Sagc #endif /* REG_LITERAL */
143763d4abf0Sagc
143863d4abf0Sagc DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re)));
143963d4abf0Sagc ctx->re++;
144063d4abf0Sagc switch (*ctx->re)
144163d4abf0Sagc {
144263d4abf0Sagc case L'b':
144363d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
144463d4abf0Sagc ASSERT_AT_WB, -1);
144563d4abf0Sagc ctx->re++;
144663d4abf0Sagc break;
144763d4abf0Sagc case L'B':
144863d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
144963d4abf0Sagc ASSERT_AT_WB_NEG, -1);
145063d4abf0Sagc ctx->re++;
145163d4abf0Sagc break;
145263d4abf0Sagc case L'<':
145363d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
145463d4abf0Sagc ASSERT_AT_BOW, -1);
145563d4abf0Sagc ctx->re++;
145663d4abf0Sagc break;
145763d4abf0Sagc case L'>':
145863d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
145963d4abf0Sagc ASSERT_AT_EOW, -1);
146063d4abf0Sagc ctx->re++;
146163d4abf0Sagc break;
146263d4abf0Sagc case L'x':
146363d4abf0Sagc ctx->re++;
146463d4abf0Sagc if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
146563d4abf0Sagc {
146663d4abf0Sagc /* 8 bit hex char. */
146763d4abf0Sagc char tmp[3] = {0, 0, 0};
146863d4abf0Sagc long val;
146963d4abf0Sagc DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n",
147063d4abf0Sagc REST(ctx->re - 2)));
147163d4abf0Sagc
147263d4abf0Sagc if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
147363d4abf0Sagc {
147463d4abf0Sagc tmp[0] = (char)ctx->re[0];
147563d4abf0Sagc ctx->re++;
147663d4abf0Sagc }
147763d4abf0Sagc if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
147863d4abf0Sagc {
147963d4abf0Sagc tmp[1] = (char)ctx->re[0];
148063d4abf0Sagc ctx->re++;
148163d4abf0Sagc }
148263d4abf0Sagc val = strtol(tmp, NULL, 16);
148363d4abf0Sagc result = tre_ast_new_literal(ctx->mem, (int)val,
148463d4abf0Sagc (int)val, ctx->position);
148563d4abf0Sagc ctx->position++;
148663d4abf0Sagc break;
148763d4abf0Sagc }
148863d4abf0Sagc else if (ctx->re < ctx->re_end)
148963d4abf0Sagc {
149063d4abf0Sagc /* Wide char. */
149163d4abf0Sagc char tmp[32];
149263d4abf0Sagc long val;
149363d4abf0Sagc int i = 0;
149463d4abf0Sagc ctx->re++;
149563d4abf0Sagc while (ctx->re_end - ctx->re >= 0)
149663d4abf0Sagc {
149763d4abf0Sagc if (ctx->re[0] == CHAR_RBRACE)
149863d4abf0Sagc break;
149963d4abf0Sagc if (tre_isxdigit(ctx->re[0]))
150063d4abf0Sagc {
150163d4abf0Sagc tmp[i] = (char)ctx->re[0];
150263d4abf0Sagc i++;
150363d4abf0Sagc ctx->re++;
150463d4abf0Sagc continue;
150563d4abf0Sagc }
150663d4abf0Sagc return REG_EBRACE;
150763d4abf0Sagc }
150863d4abf0Sagc ctx->re++;
150963d4abf0Sagc tmp[i] = 0;
151063d4abf0Sagc val = strtol(tmp, NULL, 16);
151163d4abf0Sagc result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
151263d4abf0Sagc ctx->position);
151363d4abf0Sagc ctx->position++;
151463d4abf0Sagc break;
151563d4abf0Sagc }
151663d4abf0Sagc /*FALLTHROUGH*/
151763d4abf0Sagc
151863d4abf0Sagc default:
151963d4abf0Sagc if (tre_isdigit(*ctx->re))
152063d4abf0Sagc {
152163d4abf0Sagc /* Back reference. */
152263d4abf0Sagc int val = *ctx->re - L'0';
152363d4abf0Sagc DPRINT(("tre_parse: backref: '%.*" STRF "'\n",
152463d4abf0Sagc REST(ctx->re - 1)));
152563d4abf0Sagc result = tre_ast_new_literal(ctx->mem, BACKREF, val,
152663d4abf0Sagc ctx->position);
152763d4abf0Sagc if (result == NULL)
152863d4abf0Sagc return REG_ESPACE;
152963d4abf0Sagc ctx->position++;
153063d4abf0Sagc ctx->max_backref = MAX(val, ctx->max_backref);
153163d4abf0Sagc ctx->re++;
153263d4abf0Sagc }
153363d4abf0Sagc else
153463d4abf0Sagc {
153563d4abf0Sagc /* Escaped character. */
153663d4abf0Sagc DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
153763d4abf0Sagc REST(ctx->re - 1)));
153863d4abf0Sagc result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
153963d4abf0Sagc ctx->position);
154063d4abf0Sagc ctx->position++;
154163d4abf0Sagc ctx->re++;
154263d4abf0Sagc }
154363d4abf0Sagc break;
154463d4abf0Sagc }
154563d4abf0Sagc if (result == NULL)
154663d4abf0Sagc return REG_ESPACE;
154763d4abf0Sagc break;
154863d4abf0Sagc
154963d4abf0Sagc case CHAR_PERIOD: /* the any-symbol */
155063d4abf0Sagc DPRINT(("tre_parse: any: '%.*" STRF "'\n",
155163d4abf0Sagc REST(ctx->re)));
155263d4abf0Sagc if (ctx->cflags & REG_NEWLINE)
155363d4abf0Sagc {
155463d4abf0Sagc tre_ast_node_t *tmp1;
155563d4abf0Sagc tre_ast_node_t *tmp2;
155663d4abf0Sagc tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
155763d4abf0Sagc ctx->position);
155863d4abf0Sagc if (!tmp1)
155963d4abf0Sagc return REG_ESPACE;
156063d4abf0Sagc tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
156163d4abf0Sagc ctx->position + 1);
156263d4abf0Sagc if (!tmp2)
156363d4abf0Sagc return REG_ESPACE;
156463d4abf0Sagc result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
156563d4abf0Sagc if (!result)
156663d4abf0Sagc return REG_ESPACE;
156763d4abf0Sagc ctx->position += 2;
156863d4abf0Sagc }
156963d4abf0Sagc else
157063d4abf0Sagc {
157163d4abf0Sagc result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
157263d4abf0Sagc ctx->position);
157363d4abf0Sagc if (!result)
157463d4abf0Sagc return REG_ESPACE;
157563d4abf0Sagc ctx->position++;
157663d4abf0Sagc }
157763d4abf0Sagc ctx->re++;
157863d4abf0Sagc break;
157963d4abf0Sagc
158063d4abf0Sagc case CHAR_CARET: /* beginning of line assertion */
158163d4abf0Sagc /* '^' has a special meaning everywhere in EREs, and in the
158263d4abf0Sagc beginning of the RE and after \( is BREs. */
158363d4abf0Sagc if (ctx->cflags & REG_EXTENDED
158463d4abf0Sagc || (ctx->re - 2 >= ctx->re_start
158563d4abf0Sagc && *(ctx->re - 2) == CHAR_BACKSLASH
158663d4abf0Sagc && *(ctx->re - 1) == CHAR_LPAREN)
158763d4abf0Sagc || ctx->re == ctx->re_start)
158863d4abf0Sagc {
158963d4abf0Sagc DPRINT(("tre_parse: BOL: '%.*" STRF "'\n",
159063d4abf0Sagc REST(ctx->re)));
159163d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
159263d4abf0Sagc ASSERT_AT_BOL, -1);
159363d4abf0Sagc if (result == NULL)
159463d4abf0Sagc return REG_ESPACE;
159563d4abf0Sagc ctx->re++;
159663d4abf0Sagc }
159763d4abf0Sagc else
159863d4abf0Sagc goto parse_literal;
159963d4abf0Sagc break;
160063d4abf0Sagc
160163d4abf0Sagc case CHAR_DOLLAR: /* end of line assertion. */
160263d4abf0Sagc /* '$' is special everywhere in EREs, and in the end of the
160363d4abf0Sagc string and before \) is BREs. */
160463d4abf0Sagc if (ctx->cflags & REG_EXTENDED
160563d4abf0Sagc || (ctx->re + 2 < ctx->re_end
160663d4abf0Sagc && *(ctx->re + 1) == CHAR_BACKSLASH
160763d4abf0Sagc && *(ctx->re + 2) == CHAR_RPAREN)
160863d4abf0Sagc || ctx->re + 1 == ctx->re_end)
160963d4abf0Sagc {
161063d4abf0Sagc DPRINT(("tre_parse: EOL: '%.*" STRF "'\n",
161163d4abf0Sagc REST(ctx->re)));
161263d4abf0Sagc result = tre_ast_new_literal(ctx->mem, ASSERTION,
161363d4abf0Sagc ASSERT_AT_EOL, -1);
161463d4abf0Sagc if (result == NULL)
161563d4abf0Sagc return REG_ESPACE;
161663d4abf0Sagc ctx->re++;
161763d4abf0Sagc }
161863d4abf0Sagc else
161963d4abf0Sagc goto parse_literal;
162063d4abf0Sagc break;
162163d4abf0Sagc
162263d4abf0Sagc default:
162363d4abf0Sagc parse_literal:
162463d4abf0Sagc
162563d4abf0Sagc if (temporary_cflags && ctx->re + 1 < ctx->re_end
162663d4abf0Sagc && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
162763d4abf0Sagc {
162863d4abf0Sagc DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n",
162963d4abf0Sagc REST(ctx->re)));
163063d4abf0Sagc ctx->cflags &= ~temporary_cflags;
163163d4abf0Sagc temporary_cflags = 0;
163263d4abf0Sagc ctx->re += 2;
1633f2a3d147Schristos STACK_PUSHX(stack, long, PARSE_PIECE);
163463d4abf0Sagc break;
163563d4abf0Sagc }
163663d4abf0Sagc
163763d4abf0Sagc
163863d4abf0Sagc /* We are expecting an atom. If the subexpression (or the whole
163963d4abf0Sagc regexp ends here, we interpret it as an empty expression
164063d4abf0Sagc (which matches an empty string). */
164163d4abf0Sagc if (
164263d4abf0Sagc #ifdef REG_LITERAL
164363d4abf0Sagc !(ctx->cflags & REG_LITERAL) &&
164463d4abf0Sagc #endif /* REG_LITERAL */
164563d4abf0Sagc (ctx->re >= ctx->re_end
164663d4abf0Sagc || *ctx->re == CHAR_STAR
164763d4abf0Sagc || (ctx->cflags & REG_EXTENDED
164863d4abf0Sagc && (*ctx->re == CHAR_PIPE
164963d4abf0Sagc || *ctx->re == CHAR_LBRACE
165063d4abf0Sagc || *ctx->re == CHAR_PLUS
165163d4abf0Sagc || *ctx->re == CHAR_QUESTIONMARK))
165263d4abf0Sagc /* Test for "\)" in BRE mode. */
165363d4abf0Sagc || (!(ctx->cflags & REG_EXTENDED)
165463d4abf0Sagc && ctx->re + 1 < ctx->re_end
165563d4abf0Sagc && *ctx->re == CHAR_BACKSLASH
165663d4abf0Sagc && *(ctx->re + 1) == CHAR_LBRACE)))
165763d4abf0Sagc {
165863d4abf0Sagc DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
165963d4abf0Sagc REST(ctx->re)));
166063d4abf0Sagc result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
166163d4abf0Sagc if (!result)
166263d4abf0Sagc return REG_ESPACE;
166363d4abf0Sagc break;
166463d4abf0Sagc }
166563d4abf0Sagc
166663d4abf0Sagc DPRINT(("tre_parse: literal: '%.*" STRF "'\n",
166763d4abf0Sagc REST(ctx->re)));
166863d4abf0Sagc /* Note that we can't use an tre_isalpha() test here, since there
166963d4abf0Sagc may be characters which are alphabetic but neither upper or
167063d4abf0Sagc lower case. */
167163d4abf0Sagc if (ctx->cflags & REG_ICASE
167263d4abf0Sagc && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
167363d4abf0Sagc {
167463d4abf0Sagc tre_ast_node_t *tmp1;
167563d4abf0Sagc tre_ast_node_t *tmp2;
167663d4abf0Sagc
167763d4abf0Sagc /* XXX - Can there be more than one opposite-case
167863d4abf0Sagc counterpoints for some character in some locale? Or
167963d4abf0Sagc more than two characters which all should be regarded
168063d4abf0Sagc the same character if case is ignored? If yes, there
168163d4abf0Sagc does not seem to be a portable way to detect it. I guess
168263d4abf0Sagc that at least for multi-character collating elements there
168363d4abf0Sagc could be several opposite-case counterpoints, but they
168463d4abf0Sagc cannot be supported portably anyway. */
168563d4abf0Sagc tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re),
168663d4abf0Sagc tre_toupper(*ctx->re),
168763d4abf0Sagc ctx->position);
168863d4abf0Sagc if (!tmp1)
168963d4abf0Sagc return REG_ESPACE;
169063d4abf0Sagc tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
169163d4abf0Sagc tre_tolower(*ctx->re),
169263d4abf0Sagc ctx->position);
169363d4abf0Sagc if (!tmp2)
169463d4abf0Sagc return REG_ESPACE;
169563d4abf0Sagc result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
169663d4abf0Sagc if (!result)
169763d4abf0Sagc return REG_ESPACE;
169863d4abf0Sagc }
169963d4abf0Sagc else
170063d4abf0Sagc {
170163d4abf0Sagc result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
170263d4abf0Sagc ctx->position);
170363d4abf0Sagc if (!result)
170463d4abf0Sagc return REG_ESPACE;
170563d4abf0Sagc }
170663d4abf0Sagc ctx->position++;
170763d4abf0Sagc ctx->re++;
170863d4abf0Sagc break;
170963d4abf0Sagc }
171063d4abf0Sagc break;
171163d4abf0Sagc
171263d4abf0Sagc case PARSE_MARK_FOR_SUBMATCH:
171363d4abf0Sagc {
171463d4abf0Sagc int submatch_id = tre_stack_pop_int(stack);
171563d4abf0Sagc
171663d4abf0Sagc if (result->submatch_id >= 0)
171763d4abf0Sagc {
171863d4abf0Sagc tre_ast_node_t *n, *tmp_node;
171963d4abf0Sagc n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
172063d4abf0Sagc if (n == NULL)
172163d4abf0Sagc return REG_ESPACE;
172263d4abf0Sagc tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
172363d4abf0Sagc if (tmp_node == NULL)
172463d4abf0Sagc return REG_ESPACE;
172563d4abf0Sagc tmp_node->num_submatches = result->num_submatches;
172663d4abf0Sagc result = tmp_node;
172763d4abf0Sagc }
172863d4abf0Sagc result->submatch_id = submatch_id;
172963d4abf0Sagc result->num_submatches++;
173063d4abf0Sagc break;
173163d4abf0Sagc }
173263d4abf0Sagc
173363d4abf0Sagc case PARSE_RESTORE_CFLAGS:
173463d4abf0Sagc ctx->cflags = tre_stack_pop_int(stack);
173563d4abf0Sagc break;
173663d4abf0Sagc
173763d4abf0Sagc default:
173863d4abf0Sagc assert(0);
173963d4abf0Sagc break;
174063d4abf0Sagc }
174163d4abf0Sagc }
174263d4abf0Sagc
174363d4abf0Sagc /* Check for missing closing parentheses. */
174463d4abf0Sagc if (depth > 0)
174563d4abf0Sagc return REG_EPAREN;
174663d4abf0Sagc
174763d4abf0Sagc if (status == REG_OK)
174863d4abf0Sagc ctx->result = result;
174963d4abf0Sagc
175063d4abf0Sagc return status;
175163d4abf0Sagc }
175263d4abf0Sagc
175363d4abf0Sagc /* EOF */
1754