xref: /netbsd-src/external/bsd/tre/dist/lib/tre-parse.c (revision f6cec12c6830c04a37196b5b91dd80d83b170e5e)
163d4abf0Sagc /*
263d4abf0Sagc   tre-parse.c - Regexp parser
363d4abf0Sagc 
463d4abf0Sagc   This software is released under a BSD-style license.
563d4abf0Sagc   See the file LICENSE for details and copyright.
663d4abf0Sagc 
763d4abf0Sagc */
863d4abf0Sagc 
963d4abf0Sagc /*
1063d4abf0Sagc   This parser is just a simple recursive descent parser for POSIX.2
1163d4abf0Sagc   regexps.  The parser supports both the obsolete default syntax and
1263d4abf0Sagc   the "extended" syntax, and some nonstandard extensions.
1363d4abf0Sagc */
1463d4abf0Sagc 
1563d4abf0Sagc 
1663d4abf0Sagc #ifdef HAVE_CONFIG_H
1763d4abf0Sagc #include <config.h>
1863d4abf0Sagc #endif /* HAVE_CONFIG_H */
1963d4abf0Sagc #include <string.h>
2063d4abf0Sagc #include <assert.h>
2163d4abf0Sagc #include <limits.h>
2263d4abf0Sagc 
2363d4abf0Sagc #include "xmalloc.h"
2463d4abf0Sagc #include "tre-mem.h"
2563d4abf0Sagc #include "tre-ast.h"
2663d4abf0Sagc #include "tre-stack.h"
2763d4abf0Sagc #include "tre-parse.h"
2863d4abf0Sagc 
2963d4abf0Sagc 
3063d4abf0Sagc /* Characters with special meanings in regexp syntax. */
3163d4abf0Sagc #define CHAR_PIPE	   L'|'
3263d4abf0Sagc #define CHAR_LPAREN	   L'('
3363d4abf0Sagc #define CHAR_RPAREN	   L')'
3463d4abf0Sagc #define CHAR_LBRACE	   L'{'
3563d4abf0Sagc #define CHAR_RBRACE	   L'}'
3663d4abf0Sagc #define CHAR_LBRACKET	   L'['
3763d4abf0Sagc #define CHAR_RBRACKET	   L']'
3863d4abf0Sagc #define CHAR_MINUS	   L'-'
3963d4abf0Sagc #define CHAR_STAR	   L'*'
4063d4abf0Sagc #define CHAR_QUESTIONMARK  L'?'
4163d4abf0Sagc #define CHAR_PLUS	   L'+'
4263d4abf0Sagc #define CHAR_PERIOD	   L'.'
4363d4abf0Sagc #define CHAR_COLON	   L':'
4463d4abf0Sagc #define CHAR_EQUAL	   L'='
4563d4abf0Sagc #define CHAR_COMMA	   L','
4663d4abf0Sagc #define CHAR_CARET	   L'^'
4763d4abf0Sagc #define CHAR_DOLLAR	   L'$'
4863d4abf0Sagc #define CHAR_BACKSLASH	   L'\\'
4963d4abf0Sagc #define CHAR_HASH	   L'#'
5063d4abf0Sagc #define CHAR_TILDE	   L'~'
5163d4abf0Sagc 
5263d4abf0Sagc 
5363d4abf0Sagc /* Some macros for expanding \w, \s, etc. */
5463d4abf0Sagc static const struct tre_macro_struct {
5563d4abf0Sagc   const char c;
5663d4abf0Sagc   const char *expansion;
5763d4abf0Sagc } tre_macros[] =
5863d4abf0Sagc   { {'t', "\t"},	   {'n', "\n"},		   {'r', "\r"},
5963d4abf0Sagc     {'f', "\f"},	   {'a', "\a"},		   {'e', "\033"},
6063d4abf0Sagc     {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
6163d4abf0Sagc     {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
6263d4abf0Sagc     { 0, NULL }
6363d4abf0Sagc   };
6463d4abf0Sagc 
6563d4abf0Sagc 
6663d4abf0Sagc /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
6763d4abf0Sagc    must have at least `len' items.  Sets buf[0] to zero if the there
6863d4abf0Sagc    is no match in `tre_macros'. */
6963d4abf0Sagc static void
tre_expand_macro(const tre_char_t * regex,const tre_char_t * regex_end,tre_char_t * buf,size_t buf_len)7063d4abf0Sagc tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
7163d4abf0Sagc 		 tre_char_t *buf, size_t buf_len)
7263d4abf0Sagc {
7363d4abf0Sagc   int i;
7463d4abf0Sagc 
7563d4abf0Sagc   buf[0] = 0;
7663d4abf0Sagc   if (regex >= regex_end)
7763d4abf0Sagc     return;
7863d4abf0Sagc 
7963d4abf0Sagc   for (i = 0; tre_macros[i].expansion; i++)
8063d4abf0Sagc     {
8163d4abf0Sagc       if (tre_macros[i].c == *regex)
8263d4abf0Sagc 	{
8363d4abf0Sagc 	  unsigned int j;
8463d4abf0Sagc 	  DPRINT(("Expanding macro '%c' => '%s'\n",
8563d4abf0Sagc 		  tre_macros[i].c, tre_macros[i].expansion));
8663d4abf0Sagc 	  for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
8763d4abf0Sagc 	    buf[j] = tre_macros[i].expansion[j];
8863d4abf0Sagc 	  buf[j] = 0;
8963d4abf0Sagc 	  break;
9063d4abf0Sagc 	}
9163d4abf0Sagc     }
9263d4abf0Sagc }
9363d4abf0Sagc 
9463d4abf0Sagc static reg_errcode_t
tre_new_item(tre_mem_t mem,int min,int max,int * i,int * max_i,tre_ast_node_t *** items)9563d4abf0Sagc tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
9663d4abf0Sagc 	 tre_ast_node_t ***items)
9763d4abf0Sagc {
9863d4abf0Sagc   reg_errcode_t status;
9963d4abf0Sagc   tre_ast_node_t **array = *items;
10063d4abf0Sagc   /* Allocate more space if necessary. */
10163d4abf0Sagc   if (*i >= *max_i)
10263d4abf0Sagc     {
10363d4abf0Sagc       tre_ast_node_t **new_items;
10463d4abf0Sagc       DPRINT(("out of array space, i = %d\n", *i));
10563d4abf0Sagc       /* If the array is already 1024 items large, give up -- there's
10663d4abf0Sagc 	 probably an error in the regexp (e.g. not a '\0' terminated
10763d4abf0Sagc 	 string and missing ']') */
10863d4abf0Sagc       if (*max_i > 1024)
10963d4abf0Sagc 	return REG_ESPACE;
11063d4abf0Sagc       *max_i *= 2;
111*f6cec12cSrin       new_items = xrealloc(array, sizeof(*array) * *max_i);
11263d4abf0Sagc       if (new_items == NULL)
11363d4abf0Sagc 	return REG_ESPACE;
11463d4abf0Sagc       *items = array = new_items;
11563d4abf0Sagc     }
11663d4abf0Sagc   array[*i] = tre_ast_new_literal(mem, min, max, -1);
11763d4abf0Sagc   status = array[*i] == NULL ? REG_ESPACE : REG_OK;
11863d4abf0Sagc   (*i)++;
11963d4abf0Sagc   return status;
12063d4abf0Sagc }
12163d4abf0Sagc 
12263d4abf0Sagc 
12363d4abf0Sagc /* Expands a character class to character ranges. */
12463d4abf0Sagc static reg_errcode_t
tre_expand_ctype(tre_mem_t mem,tre_ctype_t class,tre_ast_node_t *** items,int * i,int * max_i,int cflags)12563d4abf0Sagc tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
12663d4abf0Sagc 		 int *i, int *max_i, int cflags)
12763d4abf0Sagc {
12863d4abf0Sagc   reg_errcode_t status = REG_OK;
12963d4abf0Sagc   tre_cint_t c;
13063d4abf0Sagc   int j, min = -1, max = 0;
13163d4abf0Sagc   assert(TRE_MB_CUR_MAX == 1);
13263d4abf0Sagc 
13363d4abf0Sagc   DPRINT(("  expanding class to character ranges\n"));
13463d4abf0Sagc   for (j = 0; (j < 256) && (status == REG_OK); j++)
13563d4abf0Sagc     {
13613498f30Srin       c = (tre_cint_t) j;
13763d4abf0Sagc       if (tre_isctype(c, class)
13863d4abf0Sagc 	  || ((cflags & REG_ICASE)
13963d4abf0Sagc 	      && (tre_isctype(tre_tolower(c), class)
14063d4abf0Sagc 		  || tre_isctype(tre_toupper(c), class))))
14163d4abf0Sagc {
14263d4abf0Sagc 	  if (min < 0)
14363d4abf0Sagc 	    min = c;
14463d4abf0Sagc 	  max = c;
14563d4abf0Sagc 	}
14663d4abf0Sagc       else if (min >= 0)
14763d4abf0Sagc 	{
14863d4abf0Sagc 	  DPRINT(("  range %c (%d) to %c (%d)\n", min, min, max, max));
14963d4abf0Sagc 	  status = tre_new_item(mem, min, max, i, max_i, items);
15063d4abf0Sagc 	  min = -1;
15163d4abf0Sagc 	}
15263d4abf0Sagc     }
15363d4abf0Sagc   if (min >= 0 && status == REG_OK)
15463d4abf0Sagc     status = tre_new_item(mem, min, max, i, max_i, items);
15563d4abf0Sagc   return status;
15663d4abf0Sagc }
15763d4abf0Sagc 
15863d4abf0Sagc 
15963d4abf0Sagc static int
tre_compare_items(const void * a,const void * b)16063d4abf0Sagc tre_compare_items(const void *a, const void *b)
16163d4abf0Sagc {
16263d4abf0Sagc   const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
16363d4abf0Sagc   const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
16463d4abf0Sagc   tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
16563d4abf0Sagc   int a_min = l_a->code_min, b_min = l_b->code_min;
16663d4abf0Sagc 
16763d4abf0Sagc   if (a_min < b_min)
16863d4abf0Sagc     return -1;
16963d4abf0Sagc   else if (a_min > b_min)
17063d4abf0Sagc     return 1;
17163d4abf0Sagc   else
17263d4abf0Sagc     return 0;
17363d4abf0Sagc }
17463d4abf0Sagc 
17563d4abf0Sagc #ifndef TRE_USE_SYSTEM_WCTYPE
17663d4abf0Sagc 
17782e82fcaSahoka int tre_isalnum_func(tre_cint_t);
17882e82fcaSahoka int tre_isalpha_func(tre_cint_t);
17982e82fcaSahoka int tre_isascii_func(tre_cint_t);
18082e82fcaSahoka int tre_isblank_func(tre_cint_t);
18182e82fcaSahoka int tre_iscntrl_func(tre_cint_t);
18282e82fcaSahoka int tre_isdigit_func(tre_cint_t);
18382e82fcaSahoka int tre_isgraph_func(tre_cint_t);
18482e82fcaSahoka int tre_islower_func(tre_cint_t);
18582e82fcaSahoka int tre_isprint_func(tre_cint_t);
18682e82fcaSahoka int tre_ispunct_func(tre_cint_t);
18782e82fcaSahoka int tre_isspace_func(tre_cint_t);
18882e82fcaSahoka int tre_isupper_func(tre_cint_t);
18982e82fcaSahoka int tre_isxdigit_func(tre_cint_t);
19082e82fcaSahoka 
19163d4abf0Sagc /* isalnum() and the rest may be macros, so wrap them to functions. */
tre_isalnum_func(tre_cint_t c)19263d4abf0Sagc int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
tre_isalpha_func(tre_cint_t c)19363d4abf0Sagc int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
19463d4abf0Sagc 
19563d4abf0Sagc #ifdef tre_isascii
tre_isascii_func(tre_cint_t c)19663d4abf0Sagc int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
19763d4abf0Sagc #else /* !tre_isascii */
tre_isascii_func(tre_cint_t c)19863d4abf0Sagc int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
19963d4abf0Sagc #endif /* !tre_isascii */
20063d4abf0Sagc 
20163d4abf0Sagc #ifdef tre_isblank
tre_isblank_func(tre_cint_t c)20263d4abf0Sagc int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
20363d4abf0Sagc #else /* !tre_isblank */
tre_isblank_func(tre_cint_t c)20463d4abf0Sagc int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
20563d4abf0Sagc #endif /* !tre_isblank */
20663d4abf0Sagc 
tre_iscntrl_func(tre_cint_t c)20763d4abf0Sagc int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
tre_isdigit_func(tre_cint_t c)20863d4abf0Sagc int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
tre_isgraph_func(tre_cint_t c)20963d4abf0Sagc int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
tre_islower_func(tre_cint_t c)21063d4abf0Sagc int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
tre_isprint_func(tre_cint_t c)21163d4abf0Sagc int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
tre_ispunct_func(tre_cint_t c)21263d4abf0Sagc int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
tre_isspace_func(tre_cint_t c)21363d4abf0Sagc int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
tre_isupper_func(tre_cint_t c)21463d4abf0Sagc int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
tre_isxdigit_func(tre_cint_t c)21563d4abf0Sagc int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
21663d4abf0Sagc 
21763d4abf0Sagc struct {
21882e82fcaSahoka   const char *name;
21963d4abf0Sagc   int (*func)(tre_cint_t);
22063d4abf0Sagc } tre_ctype_map[] = {
22163d4abf0Sagc   { "alnum", &tre_isalnum_func },
22263d4abf0Sagc   { "alpha", &tre_isalpha_func },
22363d4abf0Sagc #ifdef tre_isascii
22463d4abf0Sagc   { "ascii", &tre_isascii_func },
22563d4abf0Sagc #endif /* tre_isascii */
22663d4abf0Sagc #ifdef tre_isblank
22763d4abf0Sagc   { "blank", &tre_isblank_func },
22863d4abf0Sagc #endif /* tre_isblank */
22963d4abf0Sagc   { "cntrl", &tre_iscntrl_func },
23063d4abf0Sagc   { "digit", &tre_isdigit_func },
23163d4abf0Sagc   { "graph", &tre_isgraph_func },
23263d4abf0Sagc   { "lower", &tre_islower_func },
23363d4abf0Sagc   { "print", &tre_isprint_func },
23463d4abf0Sagc   { "punct", &tre_ispunct_func },
23563d4abf0Sagc   { "space", &tre_isspace_func },
23663d4abf0Sagc   { "upper", &tre_isupper_func },
23763d4abf0Sagc   { "xdigit", &tre_isxdigit_func },
23863d4abf0Sagc   { NULL, NULL}
23963d4abf0Sagc };
24063d4abf0Sagc 
tre_ctype(const char * name)24163d4abf0Sagc tre_ctype_t tre_ctype(const char *name)
24263d4abf0Sagc {
24363d4abf0Sagc   int i;
24463d4abf0Sagc   for (i = 0; tre_ctype_map[i].name != NULL; i++)
24563d4abf0Sagc     {
24663d4abf0Sagc       if (strcmp(name, tre_ctype_map[i].name) == 0)
24763d4abf0Sagc 	return tre_ctype_map[i].func;
24863d4abf0Sagc     }
24963d4abf0Sagc   return (tre_ctype_t)0;
25063d4abf0Sagc }
25163d4abf0Sagc #endif /* !TRE_USE_SYSTEM_WCTYPE */
25263d4abf0Sagc 
25363d4abf0Sagc /* Maximum number of character classes that can occur in a negated bracket
25463d4abf0Sagc    expression.	*/
25563d4abf0Sagc #define MAX_NEG_CLASSES 64
25663d4abf0Sagc 
25763d4abf0Sagc /* Maximum length of character class names. */
25863d4abf0Sagc #define MAX_CLASS_NAME
25963d4abf0Sagc 
26063d4abf0Sagc #define REST(re) (int)(ctx->re_end - (re)), (re)
26163d4abf0Sagc 
26263d4abf0Sagc static reg_errcode_t
tre_parse_bracket_items(tre_parse_ctx_t * ctx,int negate,tre_ctype_t neg_classes[],int * num_neg_classes,tre_ast_node_t *** items,int * num_items,int * items_size)26363d4abf0Sagc tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
26463d4abf0Sagc 			tre_ctype_t neg_classes[], int *num_neg_classes,
26563d4abf0Sagc 			tre_ast_node_t ***items, int *num_items,
26663d4abf0Sagc 			int *items_size)
26763d4abf0Sagc {
26863d4abf0Sagc   const tre_char_t *re = ctx->re;
26963d4abf0Sagc   reg_errcode_t status = REG_OK;
27063d4abf0Sagc   tre_ctype_t class = (tre_ctype_t)0;
27163d4abf0Sagc   int i = *num_items;
27263d4abf0Sagc   int max_i = *items_size;
27363d4abf0Sagc   int skip;
27463d4abf0Sagc 
27563d4abf0Sagc   /* Build an array of the items in the bracket expression. */
27663d4abf0Sagc   while (status == REG_OK)
27763d4abf0Sagc     {
27863d4abf0Sagc       skip = 0;
27963d4abf0Sagc       if (re == ctx->re_end)
28063d4abf0Sagc 	{
28163d4abf0Sagc 	  status = REG_EBRACK;
28263d4abf0Sagc 	}
28363d4abf0Sagc       else if (*re == CHAR_RBRACKET && re > ctx->re)
28463d4abf0Sagc 	{
28563d4abf0Sagc 	  DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re)));
28663d4abf0Sagc 	  re++;
28763d4abf0Sagc 	  break;
28863d4abf0Sagc 	}
28963d4abf0Sagc       else
29063d4abf0Sagc 	{
29163d4abf0Sagc 	  tre_cint_t min = 0, max = 0;
29263d4abf0Sagc 
29363d4abf0Sagc 	  class = (tre_ctype_t)0;
29463d4abf0Sagc 	  if (re + 2 < ctx->re_end
29563d4abf0Sagc 	      && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
29663d4abf0Sagc 	    {
29763d4abf0Sagc 	      DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re)));
29863d4abf0Sagc 	      min = *re;
29963d4abf0Sagc 	      max = *(re + 2);
30063d4abf0Sagc 	      re += 3;
30163d4abf0Sagc 	      /* XXX - Should use collation order instead of encoding values
30263d4abf0Sagc 		 in character ranges. */
30363d4abf0Sagc 	      if (min > max)
30463d4abf0Sagc 		status = REG_ERANGE;
30563d4abf0Sagc 	    }
30663d4abf0Sagc 	  else if (re + 1 < ctx->re_end
30763d4abf0Sagc 		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
30863d4abf0Sagc 	    status = REG_ECOLLATE;
30963d4abf0Sagc 	  else if (re + 1 < ctx->re_end
31063d4abf0Sagc 		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
31163d4abf0Sagc 	    status = REG_ECOLLATE;
31263d4abf0Sagc 	  else if (re + 1 < ctx->re_end
31363d4abf0Sagc 		   && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
31463d4abf0Sagc 	    {
31563d4abf0Sagc 	      char tmp_str[64];
31663d4abf0Sagc 	      const tre_char_t *endptr = re + 2;
317f2a3d147Schristos 	      size_t len;
31863d4abf0Sagc 	      DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
31963d4abf0Sagc 	      while (endptr < ctx->re_end && *endptr != CHAR_COLON)
32063d4abf0Sagc 		endptr++;
32163d4abf0Sagc 	      if (endptr != ctx->re_end)
32263d4abf0Sagc 		{
32363d4abf0Sagc 		  len = MIN(endptr - re - 2, 63);
32463d4abf0Sagc #ifdef TRE_WCHAR
32563d4abf0Sagc 		  {
32663d4abf0Sagc 		    tre_char_t tmp_wcs[64];
32763d4abf0Sagc 		    wcsncpy(tmp_wcs, re + 2, (size_t)len);
32863d4abf0Sagc 		    tmp_wcs[len] = L'\0';
32963d4abf0Sagc #if defined HAVE_WCSRTOMBS
33063d4abf0Sagc 		    {
33163d4abf0Sagc 		      mbstate_t state;
33263d4abf0Sagc 		      const tre_char_t *src = tmp_wcs;
33363d4abf0Sagc 		      memset(&state, '\0', sizeof(state));
33463d4abf0Sagc 		      len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state);
33563d4abf0Sagc 		    }
33663d4abf0Sagc #elif defined HAVE_WCSTOMBS
33763d4abf0Sagc 		    len = wcstombs(tmp_str, tmp_wcs, 63);
33863d4abf0Sagc #endif /* defined HAVE_WCSTOMBS */
33963d4abf0Sagc 		  }
34063d4abf0Sagc #else /* !TRE_WCHAR */
34182e82fcaSahoka 		  /* LINTED */strncpy(tmp_str, (const char*)re + 2, len);
34263d4abf0Sagc #endif /* !TRE_WCHAR */
34363d4abf0Sagc 		  tmp_str[len] = '\0';
34463d4abf0Sagc 		  DPRINT(("  class name: %s\n", tmp_str));
34563d4abf0Sagc 		  class = tre_ctype(tmp_str);
34663d4abf0Sagc 		  if (!class)
34763d4abf0Sagc 		    status = REG_ECTYPE;
34863d4abf0Sagc 		  /* Optimize character classes for 8 bit character sets. */
34913498f30Srin 		  if (status == REG_OK && ctx->cur_max == 1)
35063d4abf0Sagc 		    {
35163d4abf0Sagc 		      status = tre_expand_ctype(ctx->mem, class, items,
35263d4abf0Sagc 						&i, &max_i, ctx->cflags);
35363d4abf0Sagc 		      class = (tre_ctype_t)0;
35463d4abf0Sagc 		      skip = 1;
35563d4abf0Sagc 		    }
35663d4abf0Sagc 		  re = endptr + 2;
35763d4abf0Sagc 		}
35863d4abf0Sagc 	      else
35963d4abf0Sagc 		status = REG_ECTYPE;
36063d4abf0Sagc 	      min = 0;
36163d4abf0Sagc 	      max = TRE_CHAR_MAX;
36263d4abf0Sagc 	    }
36363d4abf0Sagc 	  else
36463d4abf0Sagc 	    {
36563d4abf0Sagc 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
36663d4abf0Sagc 	      if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
36763d4abf0Sagc 		  && ctx->re != re)
36863d4abf0Sagc 		/* Two ranges are not allowed to share and endpoint. */
36963d4abf0Sagc 		status = REG_ERANGE;
37063d4abf0Sagc 	      min = max = *re++;
37163d4abf0Sagc 	    }
37263d4abf0Sagc 
37363d4abf0Sagc 	  if (status != REG_OK)
37463d4abf0Sagc 	    break;
37563d4abf0Sagc 
37663d4abf0Sagc 	  if (class && negate)
37763d4abf0Sagc 	    if (*num_neg_classes >= MAX_NEG_CLASSES)
37863d4abf0Sagc 	      status = REG_ESPACE;
37963d4abf0Sagc 	    else
38063d4abf0Sagc 	      neg_classes[(*num_neg_classes)++] = class;
38163d4abf0Sagc 	  else if (!skip)
38263d4abf0Sagc 	    {
38363d4abf0Sagc 	      status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
38463d4abf0Sagc 	      if (status != REG_OK)
38563d4abf0Sagc 		break;
38663d4abf0Sagc 	      ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
38763d4abf0Sagc 	    }
38863d4abf0Sagc 
38963d4abf0Sagc 	  /* Add opposite-case counterpoints if REG_ICASE is present.
39063d4abf0Sagc 	     This is broken if there are more than two "same" characters. */
39163d4abf0Sagc 	  if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
39263d4abf0Sagc 	    {
39363d4abf0Sagc 	      tre_cint_t cmin, ccurr;
39463d4abf0Sagc 
39563d4abf0Sagc 	      DPRINT(("adding opposite-case counterpoints\n"));
39663d4abf0Sagc 	      while (min <= max)
39763d4abf0Sagc 		{
39863d4abf0Sagc 		  if (tre_islower(min))
39963d4abf0Sagc 		    {
40063d4abf0Sagc 		      cmin = ccurr = tre_toupper(min++);
40163d4abf0Sagc 		      while (tre_islower(min) && tre_toupper(min) == ccurr + 1
40263d4abf0Sagc 			     && min <= max)
40363d4abf0Sagc 			ccurr = tre_toupper(min++);
40463d4abf0Sagc 		      status = tre_new_item(ctx->mem, cmin, ccurr,
40563d4abf0Sagc 					    &i, &max_i, items);
40663d4abf0Sagc 		    }
40763d4abf0Sagc 		  else if (tre_isupper(min))
40863d4abf0Sagc 		    {
40963d4abf0Sagc 		      cmin = ccurr = tre_tolower(min++);
41063d4abf0Sagc 		      while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
41163d4abf0Sagc 			     && min <= max)
41263d4abf0Sagc 			ccurr = tre_tolower(min++);
41363d4abf0Sagc 		      status = tre_new_item(ctx->mem, cmin, ccurr,
41463d4abf0Sagc 					    &i, &max_i, items);
41563d4abf0Sagc 		    }
41663d4abf0Sagc 		  else min++;
41763d4abf0Sagc 		  if (status != REG_OK)
41863d4abf0Sagc 		    break;
41963d4abf0Sagc 		}
42063d4abf0Sagc 	      if (status != REG_OK)
42163d4abf0Sagc 		break;
42263d4abf0Sagc 	    }
42363d4abf0Sagc 	}
42463d4abf0Sagc     }
42563d4abf0Sagc   *num_items = i;
42663d4abf0Sagc   *items_size = max_i;
42763d4abf0Sagc   ctx->re = re;
42863d4abf0Sagc   return status;
42963d4abf0Sagc }
43063d4abf0Sagc 
43163d4abf0Sagc static reg_errcode_t
tre_parse_bracket(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)43263d4abf0Sagc tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
43363d4abf0Sagc {
43463d4abf0Sagc   tre_ast_node_t *node = NULL;
43563d4abf0Sagc   int negate = 0;
43663d4abf0Sagc   reg_errcode_t status = REG_OK;
43763d4abf0Sagc   tre_ast_node_t **items, *u, *n;
43863d4abf0Sagc   int i = 0, j, max_i = 32, curr_max, curr_min;
43963d4abf0Sagc   tre_ctype_t neg_classes[MAX_NEG_CLASSES];
44063d4abf0Sagc   int num_neg_classes = 0;
44163d4abf0Sagc 
44263d4abf0Sagc   /* Start off with an array of `max_i' elements. */
44363d4abf0Sagc   items = xmalloc(sizeof(*items) * max_i);
44463d4abf0Sagc   if (items == NULL)
44563d4abf0Sagc     return REG_ESPACE;
44663d4abf0Sagc 
44763d4abf0Sagc   if (*ctx->re == CHAR_CARET)
44863d4abf0Sagc     {
44963d4abf0Sagc       DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
45063d4abf0Sagc       negate = 1;
45163d4abf0Sagc       ctx->re++;
45263d4abf0Sagc     }
45363d4abf0Sagc 
45463d4abf0Sagc   status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
45563d4abf0Sagc 				   &items, &i, &max_i);
45663d4abf0Sagc 
45763d4abf0Sagc   if (status != REG_OK)
45863d4abf0Sagc     goto parse_bracket_done;
45963d4abf0Sagc 
46063d4abf0Sagc   /* Sort the array if we need to negate it. */
46163d4abf0Sagc   if (negate)
46263d4abf0Sagc     qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
46363d4abf0Sagc 
46463d4abf0Sagc   curr_max = curr_min = 0;
46563d4abf0Sagc   /* Build a union of the items in the array, negated if necessary. */
46663d4abf0Sagc   for (j = 0; j < i && status == REG_OK; j++)
46763d4abf0Sagc     {
46863d4abf0Sagc       int min, max;
46963d4abf0Sagc       tre_literal_t *l = items[j]->obj;
47063d4abf0Sagc       min = l->code_min;
47163d4abf0Sagc       max = l->code_max;
47263d4abf0Sagc 
47363d4abf0Sagc       DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
47463d4abf0Sagc 	      (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));
47563d4abf0Sagc 
47663d4abf0Sagc       if (negate)
47763d4abf0Sagc 	{
47863d4abf0Sagc 	  if (min < curr_max)
47963d4abf0Sagc 	    {
48063d4abf0Sagc 	      /* Overlap. */
48163d4abf0Sagc 	      curr_max = MAX(max + 1, curr_max);
48263d4abf0Sagc 	      DPRINT(("overlap, curr_max = %d\n", curr_max));
48363d4abf0Sagc 	      l = NULL;
48463d4abf0Sagc 	    }
48563d4abf0Sagc 	  else
48663d4abf0Sagc 	    {
48763d4abf0Sagc 	      /* No overlap. */
48863d4abf0Sagc 	      curr_max = min - 1;
48963d4abf0Sagc 	      if (curr_max >= curr_min)
49063d4abf0Sagc 		{
49163d4abf0Sagc 		  DPRINT(("no overlap\n"));
49263d4abf0Sagc 		  l->code_min = curr_min;
49363d4abf0Sagc 		  l->code_max = curr_max;
49463d4abf0Sagc 		}
49563d4abf0Sagc 	      else
49663d4abf0Sagc 		{
49763d4abf0Sagc 		  DPRINT(("no overlap, zero room\n"));
49863d4abf0Sagc 		  l = NULL;
49963d4abf0Sagc 		}
50063d4abf0Sagc 	      curr_min = curr_max = max + 1;
50163d4abf0Sagc 	    }
50263d4abf0Sagc 	}
50363d4abf0Sagc 
50463d4abf0Sagc       if (l != NULL)
50563d4abf0Sagc 	{
50663d4abf0Sagc 	  int k;
50763d4abf0Sagc 	  DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
50863d4abf0Sagc 	  l->position = ctx->position;
50963d4abf0Sagc 	  if (num_neg_classes > 0)
51063d4abf0Sagc 	    {
51163d4abf0Sagc 	      l->neg_classes = tre_mem_alloc(ctx->mem,
512*f6cec12cSrin 					     (sizeof(*l->neg_classes)
51363d4abf0Sagc 					      * (num_neg_classes + 1)));
51463d4abf0Sagc 	      if (l->neg_classes == NULL)
51563d4abf0Sagc 		{
51663d4abf0Sagc 		  status = REG_ESPACE;
51763d4abf0Sagc 		  break;
51863d4abf0Sagc 		}
51963d4abf0Sagc 	      for (k = 0; k < num_neg_classes; k++)
52063d4abf0Sagc 		l->neg_classes[k] = neg_classes[k];
52163d4abf0Sagc 	      l->neg_classes[k] = (tre_ctype_t)0;
52263d4abf0Sagc 	    }
52363d4abf0Sagc 	  else
52463d4abf0Sagc 	    l->neg_classes = NULL;
52563d4abf0Sagc 	  if (node == NULL)
52663d4abf0Sagc 	    node = items[j];
52763d4abf0Sagc 	  else
52863d4abf0Sagc 	    {
52963d4abf0Sagc 	      u = tre_ast_new_union(ctx->mem, node, items[j]);
53063d4abf0Sagc 	      if (u == NULL)
53163d4abf0Sagc 		status = REG_ESPACE;
53263d4abf0Sagc 	      node = u;
53363d4abf0Sagc 	    }
53463d4abf0Sagc 	}
53563d4abf0Sagc     }
53663d4abf0Sagc 
53763d4abf0Sagc   if (status != REG_OK)
53863d4abf0Sagc     goto parse_bracket_done;
53963d4abf0Sagc 
54063d4abf0Sagc   if (negate)
54163d4abf0Sagc     {
54263d4abf0Sagc       int k;
54363d4abf0Sagc       DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
54463d4abf0Sagc       n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
54563d4abf0Sagc       if (n == NULL)
54663d4abf0Sagc 	status = REG_ESPACE;
54763d4abf0Sagc       else
54863d4abf0Sagc 	{
54963d4abf0Sagc 	  tre_literal_t *l = n->obj;
55063d4abf0Sagc 	  if (num_neg_classes > 0)
55163d4abf0Sagc 	    {
55263d4abf0Sagc 	      l->neg_classes = tre_mem_alloc(ctx->mem,
553*f6cec12cSrin 					     (sizeof(*l->neg_classes)
55463d4abf0Sagc 					      * (num_neg_classes + 1)));
55563d4abf0Sagc 	      if (l->neg_classes == NULL)
55663d4abf0Sagc 		{
55763d4abf0Sagc 		  status = REG_ESPACE;
55863d4abf0Sagc 		  goto parse_bracket_done;
55963d4abf0Sagc 		}
56063d4abf0Sagc 	      for (k = 0; k < num_neg_classes; k++)
56163d4abf0Sagc 		l->neg_classes[k] = neg_classes[k];
56263d4abf0Sagc 	      l->neg_classes[k] = (tre_ctype_t)0;
56363d4abf0Sagc 	    }
56463d4abf0Sagc 	  else
56563d4abf0Sagc 	    l->neg_classes = NULL;
56663d4abf0Sagc 	  if (node == NULL)
56763d4abf0Sagc 	    node = n;
56863d4abf0Sagc 	  else
56963d4abf0Sagc 	    {
57063d4abf0Sagc 	      u = tre_ast_new_union(ctx->mem, node, n);
57163d4abf0Sagc 	      if (u == NULL)
57263d4abf0Sagc 		status = REG_ESPACE;
57363d4abf0Sagc 	      node = u;
57463d4abf0Sagc 	    }
57563d4abf0Sagc 	}
57663d4abf0Sagc     }
57763d4abf0Sagc 
57863d4abf0Sagc   if (status != REG_OK)
57963d4abf0Sagc     goto parse_bracket_done;
58063d4abf0Sagc 
58163d4abf0Sagc #ifdef TRE_DEBUG
58263d4abf0Sagc   tre_ast_print(node);
58363d4abf0Sagc #endif /* TRE_DEBUG */
58463d4abf0Sagc 
58563d4abf0Sagc  parse_bracket_done:
58663d4abf0Sagc   xfree(items);
58763d4abf0Sagc   ctx->position++;
58863d4abf0Sagc   *result = node;
58963d4abf0Sagc   return status;
59063d4abf0Sagc }
59163d4abf0Sagc 
59263d4abf0Sagc 
59363d4abf0Sagc /* Parses a positive decimal integer.  Returns -1 if the string does not
59463d4abf0Sagc    contain a valid number. */
59563d4abf0Sagc static int
tre_parse_int(const tre_char_t ** regex,const tre_char_t * regex_end)59663d4abf0Sagc tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
59763d4abf0Sagc {
59863d4abf0Sagc   int num = -1;
59963d4abf0Sagc   const tre_char_t *r = *regex;
60063d4abf0Sagc   while (r < regex_end && *r >= L'0' && *r <= L'9')
60163d4abf0Sagc     {
60263d4abf0Sagc       if (num < 0)
60363d4abf0Sagc 	num = 0;
60463d4abf0Sagc       num = num * 10 + *r - L'0';
60563d4abf0Sagc       r++;
60663d4abf0Sagc     }
60763d4abf0Sagc   *regex = r;
60863d4abf0Sagc   return num;
60963d4abf0Sagc }
61063d4abf0Sagc 
61163d4abf0Sagc 
61263d4abf0Sagc static reg_errcode_t
tre_parse_bound(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)61363d4abf0Sagc tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
61463d4abf0Sagc {
61563d4abf0Sagc   int min, max, i;
61663d4abf0Sagc   int cost_ins, cost_del, cost_subst, cost_max;
61763d4abf0Sagc   int limit_ins, limit_del, limit_subst, limit_err;
61863d4abf0Sagc   const tre_char_t *r = ctx->re;
61963d4abf0Sagc   const tre_char_t *start;
62063d4abf0Sagc   int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
62163d4abf0Sagc   int approx = 0;
62263d4abf0Sagc   int costs_set = 0;
62363d4abf0Sagc   int counts_set = 0;
62463d4abf0Sagc 
62563d4abf0Sagc   cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
62663d4abf0Sagc   limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
62763d4abf0Sagc 
62863d4abf0Sagc   /* Parse number (minimum repetition count). */
62963d4abf0Sagc   min = -1;
63063d4abf0Sagc   if (r < ctx->re_end && *r >= L'0' && *r <= L'9') {
63163d4abf0Sagc     DPRINT(("tre_parse:	  min count: '%.*" STRF "'\n", REST(r)));
63263d4abf0Sagc     min = tre_parse_int(&r, ctx->re_end);
63363d4abf0Sagc   }
63463d4abf0Sagc 
63563d4abf0Sagc   /* Parse comma and second number (maximum repetition count). */
63663d4abf0Sagc   max = min;
63763d4abf0Sagc   if (r < ctx->re_end && *r == CHAR_COMMA)
63863d4abf0Sagc     {
63963d4abf0Sagc       r++;
64063d4abf0Sagc       DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
64163d4abf0Sagc       max = tre_parse_int(&r, ctx->re_end);
64263d4abf0Sagc     }
64363d4abf0Sagc 
64463d4abf0Sagc   /* Check that the repeat counts are sane. */
64563d4abf0Sagc   if ((max >= 0 && min > max) || max > RE_DUP_MAX)
64663d4abf0Sagc     return REG_BADBR;
64763d4abf0Sagc 
64863d4abf0Sagc 
64963d4abf0Sagc   /*
65063d4abf0Sagc    '{'
65163d4abf0Sagc      optionally followed immediately by a number == minimum repcount
65263d4abf0Sagc      optionally followed by , then a number == maximum repcount
65363d4abf0Sagc       + then a number == maximum insertion count
65463d4abf0Sagc       - then a number == maximum deletion count
65563d4abf0Sagc       # then a number == maximum substitution count
65663d4abf0Sagc       ~ then a number == maximum number of errors
65763d4abf0Sagc       Any of +, -, # or ~ without followed by a number means that
65863d4abf0Sagc       the maximum count/number of errors is infinite.
65963d4abf0Sagc 
66063d4abf0Sagc       An equation of the form
66163d4abf0Sagc 	Xi + Yd + Zs < C
66263d4abf0Sagc       can be specified to set costs and the cost limit to a value
66363d4abf0Sagc       different from the default value:
66463d4abf0Sagc 	- X is the cost of an insertion
66563d4abf0Sagc 	- Y is the cost of a deletion
66663d4abf0Sagc 	- Z is the cost of a substitution
66763d4abf0Sagc 	- C is the maximum cost
66863d4abf0Sagc 
66963d4abf0Sagc       If no count limit or cost is set for an operation, the operation
67063d4abf0Sagc       is not allowed at all.
67163d4abf0Sagc   */
67263d4abf0Sagc 
67363d4abf0Sagc 
67463d4abf0Sagc   do {
67563d4abf0Sagc     int done;
67663d4abf0Sagc     start = r;
67763d4abf0Sagc 
67863d4abf0Sagc     /* Parse count limit settings */
67963d4abf0Sagc     done = 0;
68063d4abf0Sagc     if (!counts_set)
68163d4abf0Sagc       while (r + 1 < ctx->re_end && !done)
68263d4abf0Sagc 	{
68363d4abf0Sagc 	  switch (*r)
68463d4abf0Sagc 	    {
68563d4abf0Sagc 	    case CHAR_PLUS:  /* Insert limit */
68663d4abf0Sagc 	      DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
68763d4abf0Sagc 	      r++;
68863d4abf0Sagc 	      limit_ins = tre_parse_int(&r, ctx->re_end);
68963d4abf0Sagc 	      if (limit_ins < 0)
69063d4abf0Sagc 		limit_ins = INT_MAX;
69163d4abf0Sagc 	      counts_set = 1;
69263d4abf0Sagc 	      break;
69363d4abf0Sagc 	    case CHAR_MINUS: /* Delete limit */
69463d4abf0Sagc 	      DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
69563d4abf0Sagc 	      r++;
69663d4abf0Sagc 	      limit_del = tre_parse_int(&r, ctx->re_end);
69763d4abf0Sagc 	      if (limit_del < 0)
69863d4abf0Sagc 		limit_del = INT_MAX;
69963d4abf0Sagc 	      counts_set = 1;
70063d4abf0Sagc 	      break;
70163d4abf0Sagc 	    case CHAR_HASH:  /* Substitute limit */
70263d4abf0Sagc 	      DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
70363d4abf0Sagc 	      r++;
70463d4abf0Sagc 	      limit_subst = tre_parse_int(&r, ctx->re_end);
70563d4abf0Sagc 	      if (limit_subst < 0)
70663d4abf0Sagc 		limit_subst = INT_MAX;
70763d4abf0Sagc 	      counts_set = 1;
70863d4abf0Sagc 	      break;
70963d4abf0Sagc 	    case CHAR_TILDE: /* Maximum number of changes */
71063d4abf0Sagc 	      DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
71163d4abf0Sagc 	      r++;
71263d4abf0Sagc 	      limit_err = tre_parse_int(&r, ctx->re_end);
71363d4abf0Sagc 	      if (limit_err < 0)
71463d4abf0Sagc 		limit_err = INT_MAX;
71563d4abf0Sagc 	      approx = 1;
71663d4abf0Sagc 	      break;
71763d4abf0Sagc 	    case CHAR_COMMA:
71863d4abf0Sagc 	      r++;
71963d4abf0Sagc 	      break;
72063d4abf0Sagc 	    case L' ':
72163d4abf0Sagc 	      r++;
72263d4abf0Sagc 	      break;
72363d4abf0Sagc 	    case L'}':
72463d4abf0Sagc 	      done = 1;
72563d4abf0Sagc 	      break;
72663d4abf0Sagc 	    default:
72763d4abf0Sagc 	      done = 1;
72863d4abf0Sagc 	      break;
72963d4abf0Sagc 	    }
73063d4abf0Sagc 	}
73163d4abf0Sagc 
73263d4abf0Sagc     /* Parse cost restriction equation. */
73363d4abf0Sagc     done = 0;
73463d4abf0Sagc     if (!costs_set)
73563d4abf0Sagc       while (r + 1 < ctx->re_end && !done)
73663d4abf0Sagc 	{
73763d4abf0Sagc 	  switch (*r)
73863d4abf0Sagc 	    {
73963d4abf0Sagc 	    case CHAR_PLUS:
74063d4abf0Sagc 	    case L' ':
74163d4abf0Sagc 	      r++;
74263d4abf0Sagc 	      break;
74363d4abf0Sagc 	    case L'<':
74463d4abf0Sagc 	      DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
74563d4abf0Sagc 	      r++;
74663d4abf0Sagc 	      while (*r == L' ')
74763d4abf0Sagc 		r++;
74863d4abf0Sagc 	      cost_max = tre_parse_int(&r, ctx->re_end);
74963d4abf0Sagc 	      if (cost_max < 0)
75063d4abf0Sagc 		cost_max = INT_MAX;
75163d4abf0Sagc 	      else
75263d4abf0Sagc 		cost_max--;
75363d4abf0Sagc 	      approx = 1;
75463d4abf0Sagc 	      break;
75563d4abf0Sagc 	    case CHAR_COMMA:
75663d4abf0Sagc 	      r++;
75763d4abf0Sagc 	      done = 1;
75863d4abf0Sagc 	      break;
75963d4abf0Sagc 	    default:
76063d4abf0Sagc 	      if (*r >= L'0' && *r <= L'9')
76163d4abf0Sagc 		{
76263d4abf0Sagc #ifdef TRE_DEBUG
76363d4abf0Sagc 		  const tre_char_t *sr = r;
76463d4abf0Sagc #endif /* TRE_DEBUG */
76563d4abf0Sagc 		  int cost = tre_parse_int(&r, ctx->re_end);
76663d4abf0Sagc 		  /* XXX - make sure r is not past end. */
76763d4abf0Sagc 		  switch (*r)
76863d4abf0Sagc 		    {
76963d4abf0Sagc 		    case L'i':	/* Insert cost */
77063d4abf0Sagc 		      DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
77163d4abf0Sagc 			      REST(sr)));
77263d4abf0Sagc 		      r++;
77363d4abf0Sagc 		      cost_ins = cost;
77463d4abf0Sagc 		      costs_set = 1;
77563d4abf0Sagc 		      break;
77663d4abf0Sagc 		    case L'd':	/* Delete cost */
77763d4abf0Sagc 		      DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
77863d4abf0Sagc 			      REST(sr)));
77963d4abf0Sagc 		      r++;
78063d4abf0Sagc 		      cost_del = cost;
78163d4abf0Sagc 		      costs_set = 1;
78263d4abf0Sagc 		      break;
78363d4abf0Sagc 		    case L's':	/* Substitute cost */
78463d4abf0Sagc 		      DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
78563d4abf0Sagc 			      REST(sr)));
78663d4abf0Sagc 		      r++;
78763d4abf0Sagc 		      cost_subst = cost;
78863d4abf0Sagc 		      costs_set = 1;
78963d4abf0Sagc 		      break;
79063d4abf0Sagc 		    default:
79163d4abf0Sagc 		      return REG_BADBR;
79263d4abf0Sagc 		    }
79363d4abf0Sagc 		}
79463d4abf0Sagc 	      else
79563d4abf0Sagc 		{
79663d4abf0Sagc 		  done = 1;
79763d4abf0Sagc 		  break;
79863d4abf0Sagc 		}
79963d4abf0Sagc 	    }
80063d4abf0Sagc 	}
80163d4abf0Sagc   } while (start != r);
80263d4abf0Sagc 
80363d4abf0Sagc   /* Missing }. */
80463d4abf0Sagc   if (r >= ctx->re_end)
80563d4abf0Sagc     return REG_EBRACE;
80663d4abf0Sagc 
80763d4abf0Sagc   /* Empty contents of {}. */
80863d4abf0Sagc   if (r == ctx->re)
80963d4abf0Sagc     return REG_BADBR;
81063d4abf0Sagc 
81163d4abf0Sagc   /* Parse the ending '}' or '\}'.*/
81263d4abf0Sagc   if (ctx->cflags & REG_EXTENDED)
81363d4abf0Sagc     {
81463d4abf0Sagc       if (r >= ctx->re_end || *r != CHAR_RBRACE)
81563d4abf0Sagc 	return REG_BADBR;
81663d4abf0Sagc       r++;
81763d4abf0Sagc     }
81863d4abf0Sagc   else
81963d4abf0Sagc     {
82063d4abf0Sagc       if (r + 1 >= ctx->re_end
82163d4abf0Sagc 	  || *r != CHAR_BACKSLASH
82263d4abf0Sagc 	  || *(r + 1) != CHAR_RBRACE)
82363d4abf0Sagc 	return REG_BADBR;
82463d4abf0Sagc       r += 2;
82563d4abf0Sagc     }
82663d4abf0Sagc 
82763d4abf0Sagc 
82863d4abf0Sagc   /* Parse trailing '?' marking minimal repetition. */
82963d4abf0Sagc   if (r < ctx->re_end)
83063d4abf0Sagc     {
83163d4abf0Sagc       if (*r == CHAR_QUESTIONMARK)
83263d4abf0Sagc 	{
83363d4abf0Sagc 	  minimal = !(ctx->cflags & REG_UNGREEDY);
83463d4abf0Sagc 	  r++;
83563d4abf0Sagc 	}
83663d4abf0Sagc       else if (*r == CHAR_STAR || *r == CHAR_PLUS)
83763d4abf0Sagc 	{
83863d4abf0Sagc 	  /* These are reserved for future extensions. */
83963d4abf0Sagc 	  return REG_BADRPT;
84063d4abf0Sagc 	}
84163d4abf0Sagc     }
84263d4abf0Sagc 
84363d4abf0Sagc   /* Create the AST node(s). */
84463d4abf0Sagc   if (min == 0 && max == 0)
84563d4abf0Sagc     {
84663d4abf0Sagc       *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
84763d4abf0Sagc       if (*result == NULL)
84863d4abf0Sagc 	return REG_ESPACE;
84963d4abf0Sagc     }
85063d4abf0Sagc   else
85163d4abf0Sagc     {
85263d4abf0Sagc       if (min < 0 && max < 0)
85363d4abf0Sagc 	/* Only approximate parameters set, no repetitions. */
85463d4abf0Sagc 	min = max = 1;
85563d4abf0Sagc 
85663d4abf0Sagc       *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
85763d4abf0Sagc       if (!*result)
85863d4abf0Sagc 	return REG_ESPACE;
85963d4abf0Sagc 
86063d4abf0Sagc       /* If approximate matching parameters are set, add them to the
86163d4abf0Sagc 	 iteration node. */
86263d4abf0Sagc       if (approx || costs_set || counts_set)
86363d4abf0Sagc 	{
86463d4abf0Sagc 	  int *params;
86563d4abf0Sagc 	  tre_iteration_t *iter = (*result)->obj;
86663d4abf0Sagc 
86763d4abf0Sagc 	  if (costs_set || counts_set)
86863d4abf0Sagc 	    {
86963d4abf0Sagc 	      if (limit_ins == TRE_PARAM_UNSET)
87063d4abf0Sagc 		{
87163d4abf0Sagc 		  if (cost_ins == TRE_PARAM_UNSET)
87263d4abf0Sagc 		    limit_ins = 0;
87363d4abf0Sagc 		  else
87463d4abf0Sagc 		    limit_ins = INT_MAX;
87563d4abf0Sagc 		}
87663d4abf0Sagc 
87763d4abf0Sagc 	      if (limit_del == TRE_PARAM_UNSET)
87863d4abf0Sagc 		{
87963d4abf0Sagc 		  if (cost_del == TRE_PARAM_UNSET)
88063d4abf0Sagc 		    limit_del = 0;
88163d4abf0Sagc 		  else
88263d4abf0Sagc 		    limit_del = INT_MAX;
88363d4abf0Sagc 		}
88463d4abf0Sagc 
88563d4abf0Sagc 	      if (limit_subst == TRE_PARAM_UNSET)
88663d4abf0Sagc 		{
88763d4abf0Sagc 		  if (cost_subst == TRE_PARAM_UNSET)
88863d4abf0Sagc 		    limit_subst = 0;
88963d4abf0Sagc 		  else
89063d4abf0Sagc 		    limit_subst = INT_MAX;
89163d4abf0Sagc 		}
89263d4abf0Sagc 	    }
89363d4abf0Sagc 
89463d4abf0Sagc 	  if (cost_max == TRE_PARAM_UNSET)
89563d4abf0Sagc 	    cost_max = INT_MAX;
89663d4abf0Sagc 	  if (limit_err == TRE_PARAM_UNSET)
89763d4abf0Sagc 	    limit_err = INT_MAX;
89863d4abf0Sagc 
89963d4abf0Sagc 	  ctx->have_approx = 1;
90063d4abf0Sagc 	  params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
90163d4abf0Sagc 	  if (!params)
90263d4abf0Sagc 	    return REG_ESPACE;
90363d4abf0Sagc 	  for (i = 0; i < TRE_PARAM_LAST; i++)
90463d4abf0Sagc 	    params[i] = TRE_PARAM_UNSET;
90563d4abf0Sagc 	  params[TRE_PARAM_COST_INS] = cost_ins;
90663d4abf0Sagc 	  params[TRE_PARAM_COST_DEL] = cost_del;
90763d4abf0Sagc 	  params[TRE_PARAM_COST_SUBST] = cost_subst;
90863d4abf0Sagc 	  params[TRE_PARAM_COST_MAX] = cost_max;
90963d4abf0Sagc 	  params[TRE_PARAM_MAX_INS] = limit_ins;
91063d4abf0Sagc 	  params[TRE_PARAM_MAX_DEL] = limit_del;
91163d4abf0Sagc 	  params[TRE_PARAM_MAX_SUBST] = limit_subst;
91263d4abf0Sagc 	  params[TRE_PARAM_MAX_ERR] = limit_err;
91363d4abf0Sagc 	  iter->params = params;
91463d4abf0Sagc 	}
91563d4abf0Sagc     }
91663d4abf0Sagc 
91763d4abf0Sagc   DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
91863d4abf0Sagc 	  "limits [%d,%d,%d, total %d]\n",
91963d4abf0Sagc 	  min, max, cost_ins, cost_del, cost_subst, cost_max,
92063d4abf0Sagc 	  limit_ins, limit_del, limit_subst, limit_err));
92163d4abf0Sagc 
92263d4abf0Sagc 
92363d4abf0Sagc   ctx->re = r;
92463d4abf0Sagc   return REG_OK;
92563d4abf0Sagc }
92663d4abf0Sagc 
92763d4abf0Sagc typedef enum {
92863d4abf0Sagc   PARSE_RE = 0,
92963d4abf0Sagc   PARSE_ATOM,
93063d4abf0Sagc   PARSE_MARK_FOR_SUBMATCH,
93163d4abf0Sagc   PARSE_BRANCH,
93263d4abf0Sagc   PARSE_PIECE,
93363d4abf0Sagc   PARSE_CATENATION,
93463d4abf0Sagc   PARSE_POST_CATENATION,
93563d4abf0Sagc   PARSE_UNION,
93663d4abf0Sagc   PARSE_POST_UNION,
93763d4abf0Sagc   PARSE_POSTFIX,
93863d4abf0Sagc   PARSE_RESTORE_CFLAGS
93963d4abf0Sagc } tre_parse_re_stack_symbol_t;
94063d4abf0Sagc 
94163d4abf0Sagc 
94263d4abf0Sagc reg_errcode_t
tre_parse(tre_parse_ctx_t * ctx)94363d4abf0Sagc tre_parse(tre_parse_ctx_t *ctx)
94463d4abf0Sagc {
94563d4abf0Sagc   tre_ast_node_t *result = NULL;
94663d4abf0Sagc   tre_parse_re_stack_symbol_t symbol;
94763d4abf0Sagc   reg_errcode_t status = REG_OK;
94863d4abf0Sagc   tre_stack_t *stack = ctx->stack;
94963d4abf0Sagc   int bottom = tre_stack_num_objects(stack);
95063d4abf0Sagc   int depth = 0;
95163d4abf0Sagc   int temporary_cflags = 0;
95263d4abf0Sagc 
95363d4abf0Sagc   DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n",
95463d4abf0Sagc 	  ctx->len, ctx->re, ctx->len));
95563d4abf0Sagc 
95663d4abf0Sagc   if (!ctx->nofirstsub)
95763d4abf0Sagc     {
958f2a3d147Schristos       STACK_PUSH(stack, long, ctx->submatch_id);
959f2a3d147Schristos       STACK_PUSH(stack, long, PARSE_MARK_FOR_SUBMATCH);
96063d4abf0Sagc       ctx->submatch_id++;
96163d4abf0Sagc     }
962f2a3d147Schristos   STACK_PUSH(stack, long, PARSE_RE);
96363d4abf0Sagc   ctx->re_start = ctx->re;
96463d4abf0Sagc   ctx->re_end = ctx->re + ctx->len;
96563d4abf0Sagc 
96663d4abf0Sagc 
96763d4abf0Sagc   /* The following is basically just a recursive descent parser.  I use
96863d4abf0Sagc      an explicit stack instead of recursive functions mostly because of
96963d4abf0Sagc      two reasons: compatibility with systems which have an overflowable
97063d4abf0Sagc      call stack, and efficiency (both in lines of code and speed).  */
97163d4abf0Sagc   while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
97263d4abf0Sagc     {
97363d4abf0Sagc       if (status != REG_OK)
97463d4abf0Sagc 	break;
97563d4abf0Sagc       symbol = tre_stack_pop_int(stack);
97663d4abf0Sagc       switch (symbol)
97763d4abf0Sagc 	{
97863d4abf0Sagc 	case PARSE_RE:
97963d4abf0Sagc 	  /* Parse a full regexp.  A regexp is one or more branches,
98063d4abf0Sagc 	     separated by the union operator `|'. */
98163d4abf0Sagc #ifdef REG_LITERAL
98263d4abf0Sagc 	  if (!(ctx->cflags & REG_LITERAL)
98363d4abf0Sagc 	      && ctx->cflags & REG_EXTENDED)
98463d4abf0Sagc #endif /* REG_LITERAL */
985f2a3d147Schristos 	    STACK_PUSHX(stack, long, PARSE_UNION);
986f2a3d147Schristos 	  STACK_PUSHX(stack, long, PARSE_BRANCH);
98763d4abf0Sagc 	  break;
98863d4abf0Sagc 
98963d4abf0Sagc 	case PARSE_BRANCH:
99063d4abf0Sagc 	  /* Parse a branch.  A branch is one or more pieces, concatenated.
99163d4abf0Sagc 	     A piece is an atom possibly followed by a postfix operator. */
992f2a3d147Schristos 	  STACK_PUSHX(stack, long, PARSE_CATENATION);
993f2a3d147Schristos 	  STACK_PUSHX(stack, long, PARSE_PIECE);
99463d4abf0Sagc 	  break;
99563d4abf0Sagc 
99663d4abf0Sagc 	case PARSE_PIECE:
99763d4abf0Sagc 	  /* Parse a piece.  A piece is an atom possibly followed by one
99863d4abf0Sagc 	     or more postfix operators. */
99963d4abf0Sagc #ifdef REG_LITERAL
100063d4abf0Sagc 	  if (!(ctx->cflags & REG_LITERAL))
100163d4abf0Sagc #endif /* REG_LITERAL */
1002f2a3d147Schristos 	    STACK_PUSHX(stack, long, PARSE_POSTFIX);
1003f2a3d147Schristos 	  STACK_PUSHX(stack, long, PARSE_ATOM);
100463d4abf0Sagc 	  break;
100563d4abf0Sagc 
100663d4abf0Sagc 	case PARSE_CATENATION:
100763d4abf0Sagc 	  /* If the expression has not ended, parse another piece. */
100863d4abf0Sagc 	  {
100963d4abf0Sagc 	    tre_char_t c;
101063d4abf0Sagc 	    if (ctx->re >= ctx->re_end)
101163d4abf0Sagc 	      break;
101263d4abf0Sagc 	    c = *ctx->re;
101363d4abf0Sagc #ifdef REG_LITERAL
101463d4abf0Sagc 	    if (!(ctx->cflags & REG_LITERAL))
101563d4abf0Sagc 	      {
101663d4abf0Sagc #endif /* REG_LITERAL */
101763d4abf0Sagc 		if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
101863d4abf0Sagc 		  break;
101963d4abf0Sagc 		if ((ctx->cflags & REG_EXTENDED
102063d4abf0Sagc 		     && c == CHAR_RPAREN && depth > 0)
102163d4abf0Sagc 		    || (!(ctx->cflags & REG_EXTENDED)
102263d4abf0Sagc 			&& (c == CHAR_BACKSLASH
102363d4abf0Sagc 			    && *(ctx->re + 1) == CHAR_RPAREN)))
102463d4abf0Sagc 		  {
102563d4abf0Sagc 		    if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
102663d4abf0Sagc 		      status = REG_EPAREN;
102763d4abf0Sagc 		    DPRINT(("tre_parse:	  group end: '%.*" STRF "'\n",
102863d4abf0Sagc 			    REST(ctx->re)));
102963d4abf0Sagc 		    depth--;
103063d4abf0Sagc 		    if (!(ctx->cflags & REG_EXTENDED))
103163d4abf0Sagc 		      ctx->re += 2;
103263d4abf0Sagc 		    break;
103363d4abf0Sagc 		  }
103463d4abf0Sagc #ifdef REG_LITERAL
103563d4abf0Sagc 	      }
103663d4abf0Sagc #endif /* REG_LITERAL */
103763d4abf0Sagc 
103863d4abf0Sagc #ifdef REG_RIGHT_ASSOC
103963d4abf0Sagc 	    if (ctx->cflags & REG_RIGHT_ASSOC)
104063d4abf0Sagc 	      {
104163d4abf0Sagc 		/* Right associative concatenation. */
104263d4abf0Sagc 		STACK_PUSHX(stack, voidptr, result);
1043f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_POST_CATENATION);
1044f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_CATENATION);
1045f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_PIECE);
104663d4abf0Sagc 	      }
104763d4abf0Sagc 	    else
104863d4abf0Sagc #endif /* REG_RIGHT_ASSOC */
104963d4abf0Sagc 	      {
105063d4abf0Sagc 		/* Default case, left associative concatenation. */
1051f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_CATENATION);
105263d4abf0Sagc 		STACK_PUSHX(stack, voidptr, result);
1053f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_POST_CATENATION);
1054f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_PIECE);
105563d4abf0Sagc 	      }
105663d4abf0Sagc 	    break;
105763d4abf0Sagc 	  }
105863d4abf0Sagc 
105963d4abf0Sagc 	case PARSE_POST_CATENATION:
106063d4abf0Sagc 	  {
106163d4abf0Sagc 	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
106263d4abf0Sagc 	    tre_ast_node_t *tmp_node;
106363d4abf0Sagc 	    tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
106463d4abf0Sagc 	    if (!tmp_node)
106563d4abf0Sagc 	      return REG_ESPACE;
106663d4abf0Sagc 	    result = tmp_node;
106763d4abf0Sagc 	    break;
106863d4abf0Sagc 	  }
106963d4abf0Sagc 
107063d4abf0Sagc 	case PARSE_UNION:
107163d4abf0Sagc 	  if (ctx->re >= ctx->re_end)
107263d4abf0Sagc 	    break;
107363d4abf0Sagc #ifdef REG_LITERAL
107463d4abf0Sagc 	  if (ctx->cflags & REG_LITERAL)
107563d4abf0Sagc 	    break;
107663d4abf0Sagc #endif /* REG_LITERAL */
107763d4abf0Sagc 	  switch (*ctx->re)
107863d4abf0Sagc 	    {
107963d4abf0Sagc 	    case CHAR_PIPE:
108063d4abf0Sagc 	      DPRINT(("tre_parse:	union: '%.*" STRF "'\n",
108163d4abf0Sagc 		      REST(ctx->re)));
1082f2a3d147Schristos 	      STACK_PUSHX(stack, long, PARSE_UNION);
108363d4abf0Sagc 	      STACK_PUSHX(stack, voidptr, result);
1084f2a3d147Schristos 	      STACK_PUSHX(stack, long, PARSE_POST_UNION);
1085f2a3d147Schristos 	      STACK_PUSHX(stack, long, PARSE_BRANCH);
108663d4abf0Sagc 	      ctx->re++;
108763d4abf0Sagc 	      break;
108863d4abf0Sagc 
108963d4abf0Sagc 	    case CHAR_RPAREN:
109063d4abf0Sagc 	      ctx->re++;
109163d4abf0Sagc 	      break;
109263d4abf0Sagc 
109363d4abf0Sagc 	    default:
109463d4abf0Sagc 	      break;
109563d4abf0Sagc 	    }
109663d4abf0Sagc 	  break;
109763d4abf0Sagc 
109863d4abf0Sagc 	case PARSE_POST_UNION:
109963d4abf0Sagc 	  {
110063d4abf0Sagc 	    tre_ast_node_t *tmp_node;
110163d4abf0Sagc 	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
110263d4abf0Sagc 	    tmp_node = tre_ast_new_union(ctx->mem, tree, result);
110363d4abf0Sagc 	    if (!tmp_node)
110463d4abf0Sagc 	      return REG_ESPACE;
110563d4abf0Sagc 	    result = tmp_node;
110663d4abf0Sagc 	    break;
110763d4abf0Sagc 	  }
110863d4abf0Sagc 
110963d4abf0Sagc 	case PARSE_POSTFIX:
111063d4abf0Sagc 	  /* Parse postfix operators. */
111163d4abf0Sagc 	  if (ctx->re >= ctx->re_end)
111263d4abf0Sagc 	    break;
111363d4abf0Sagc #ifdef REG_LITERAL
111463d4abf0Sagc 	  if (ctx->cflags & REG_LITERAL)
111563d4abf0Sagc 	    break;
111663d4abf0Sagc #endif /* REG_LITERAL */
111763d4abf0Sagc 	  switch (*ctx->re)
111863d4abf0Sagc 	    {
111963d4abf0Sagc 	    case CHAR_PLUS:
112063d4abf0Sagc 	    case CHAR_QUESTIONMARK:
112163d4abf0Sagc 	      if (!(ctx->cflags & REG_EXTENDED))
112263d4abf0Sagc 		break;
112363d4abf0Sagc 		/*FALLTHROUGH*/
112463d4abf0Sagc 	    case CHAR_STAR:
112563d4abf0Sagc 	      {
112663d4abf0Sagc 		tre_ast_node_t *tmp_node;
112763d4abf0Sagc 		int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
112863d4abf0Sagc 		int rep_min = 0;
112963d4abf0Sagc 		int rep_max = -1;
113063d4abf0Sagc #ifdef TRE_DEBUG
113163d4abf0Sagc 		const tre_char_t *tmp_re;
113263d4abf0Sagc #endif
113363d4abf0Sagc 
113463d4abf0Sagc 		if (*ctx->re == CHAR_PLUS)
113563d4abf0Sagc 		  rep_min = 1;
113663d4abf0Sagc 		if (*ctx->re == CHAR_QUESTIONMARK)
113763d4abf0Sagc 		  rep_max = 1;
113863d4abf0Sagc #ifdef TRE_DEBUG
113963d4abf0Sagc 		tmp_re = ctx->re;
114063d4abf0Sagc #endif
114163d4abf0Sagc 
114263d4abf0Sagc 		if (ctx->re + 1 < ctx->re_end)
114363d4abf0Sagc 		  {
114463d4abf0Sagc 		    if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
114563d4abf0Sagc 		      {
114663d4abf0Sagc 			minimal = !(ctx->cflags & REG_UNGREEDY);
114763d4abf0Sagc 			ctx->re++;
114863d4abf0Sagc 		      }
114963d4abf0Sagc 		    else if (*(ctx->re + 1) == CHAR_STAR
115063d4abf0Sagc 			     || *(ctx->re + 1) == CHAR_PLUS)
115163d4abf0Sagc 		      {
115263d4abf0Sagc 			/* These are reserved for future extensions. */
115363d4abf0Sagc 			return REG_BADRPT;
115463d4abf0Sagc 		      }
115563d4abf0Sagc 		  }
115663d4abf0Sagc 
115763d4abf0Sagc 		DPRINT(("tre_parse: %s star: '%.*" STRF "'\n",
115863d4abf0Sagc 			minimal ? "  minimal" : "greedy", REST(tmp_re)));
115963d4abf0Sagc 		ctx->re++;
116063d4abf0Sagc 		tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
116163d4abf0Sagc 					    minimal);
116263d4abf0Sagc 		if (tmp_node == NULL)
116363d4abf0Sagc 		  return REG_ESPACE;
116463d4abf0Sagc 		result = tmp_node;
1165f2a3d147Schristos 		STACK_PUSHX(stack, long, PARSE_POSTFIX);
116663d4abf0Sagc 	      }
116763d4abf0Sagc 	      break;
116863d4abf0Sagc 
116963d4abf0Sagc 	    case CHAR_BACKSLASH:
117063d4abf0Sagc 	      /* "\{" is special without REG_EXTENDED */
117163d4abf0Sagc 	      if (!(ctx->cflags & REG_EXTENDED)
117263d4abf0Sagc 		  && ctx->re + 1 < ctx->re_end
117363d4abf0Sagc 		  && *(ctx->re + 1) == CHAR_LBRACE)
117463d4abf0Sagc 		{
117563d4abf0Sagc 		  ctx->re++;
117663d4abf0Sagc 		  goto parse_brace;
117763d4abf0Sagc 		}
117863d4abf0Sagc 	      else
117963d4abf0Sagc 		break;
118063d4abf0Sagc 
118163d4abf0Sagc 	    case CHAR_LBRACE:
118263d4abf0Sagc 	      /* "{" is literal without REG_EXTENDED */
118363d4abf0Sagc 	      if (!(ctx->cflags & REG_EXTENDED))
118463d4abf0Sagc 		break;
118563d4abf0Sagc 
118663d4abf0Sagc 	    parse_brace:
118763d4abf0Sagc 	      DPRINT(("tre_parse:	bound: '%.*" STRF "'\n",
118863d4abf0Sagc 		      REST(ctx->re)));
118963d4abf0Sagc 	      ctx->re++;
119063d4abf0Sagc 
119163d4abf0Sagc 	      status = tre_parse_bound(ctx, &result);
119263d4abf0Sagc 	      if (status != REG_OK)
119363d4abf0Sagc 		return status;
1194f2a3d147Schristos 	      STACK_PUSHX(stack, long, PARSE_POSTFIX);
119563d4abf0Sagc 	      break;
119663d4abf0Sagc 	    }
119763d4abf0Sagc 	  break;
119863d4abf0Sagc 
119963d4abf0Sagc 	case PARSE_ATOM:
120063d4abf0Sagc 	  /* Parse an atom.  An atom is a regular expression enclosed in `()',
120163d4abf0Sagc 	     an empty set of `()', a bracket expression, `.', `^', `$',
120263d4abf0Sagc 	     a `\' followed by a character, or a single character. */
120363d4abf0Sagc 
120463d4abf0Sagc 	  /* End of regexp? (empty string). */
120563d4abf0Sagc 	  if (ctx->re >= ctx->re_end)
120663d4abf0Sagc 	    goto parse_literal;
120763d4abf0Sagc 
120863d4abf0Sagc #ifdef REG_LITERAL
120963d4abf0Sagc 	  if (ctx->cflags & REG_LITERAL)
121063d4abf0Sagc 	    goto parse_literal;
121163d4abf0Sagc #endif /* REG_LITERAL */
121263d4abf0Sagc 
121363d4abf0Sagc 	  switch (*ctx->re)
121463d4abf0Sagc 	    {
121563d4abf0Sagc 	    case CHAR_LPAREN:  /* parenthesized subexpression */
121663d4abf0Sagc 
121763d4abf0Sagc 	      /* Handle "(?...)" extensions.  They work in a way similar
121863d4abf0Sagc 		 to Perls corresponding extensions. */
121963d4abf0Sagc 	      if (ctx->cflags & REG_EXTENDED
122063d4abf0Sagc 		  && *(ctx->re + 1) == CHAR_QUESTIONMARK)
122163d4abf0Sagc 		{
122263d4abf0Sagc 		  int new_cflags = ctx->cflags;
122363d4abf0Sagc 		  int bit = 1;
122463d4abf0Sagc 		  DPRINT(("tre_parse:	extension: '%.*" STRF "\n",
122563d4abf0Sagc 			  REST(ctx->re)));
122663d4abf0Sagc 		  ctx->re += 2;
122713498f30Srin 		  while (/*CONSTCOND*/(void)1,1)
122863d4abf0Sagc 		    {
122963d4abf0Sagc 		      if (*ctx->re == L'i')
123063d4abf0Sagc 			{
123163d4abf0Sagc 			  DPRINT(("tre_parse:	    icase: '%.*" STRF "\n",
123263d4abf0Sagc 				  REST(ctx->re)));
123363d4abf0Sagc 			  if (bit)
123463d4abf0Sagc 			    new_cflags |= REG_ICASE;
123563d4abf0Sagc 			  else
123663d4abf0Sagc 			    new_cflags &= ~REG_ICASE;
123763d4abf0Sagc 			  ctx->re++;
123863d4abf0Sagc 			}
123963d4abf0Sagc 		      else if (*ctx->re == L'n')
124063d4abf0Sagc 			{
124163d4abf0Sagc 			  DPRINT(("tre_parse:	  newline: '%.*" STRF "\n",
124263d4abf0Sagc 				  REST(ctx->re)));
124363d4abf0Sagc 			  if (bit)
124463d4abf0Sagc 			    new_cflags |= REG_NEWLINE;
124563d4abf0Sagc 			  else
124663d4abf0Sagc 			    new_cflags &= ~REG_NEWLINE;
124763d4abf0Sagc 			  ctx->re++;
124863d4abf0Sagc 			}
124963d4abf0Sagc #ifdef REG_RIGHT_ASSOC
125063d4abf0Sagc 		      else if (*ctx->re == L'r')
125163d4abf0Sagc 			{
125263d4abf0Sagc 			  DPRINT(("tre_parse: right assoc: '%.*" STRF "\n",
125363d4abf0Sagc 				  REST(ctx->re)));
125463d4abf0Sagc 			  if (bit)
125563d4abf0Sagc 			    new_cflags |= REG_RIGHT_ASSOC;
125663d4abf0Sagc 			  else
125763d4abf0Sagc 			    new_cflags &= ~REG_RIGHT_ASSOC;
125863d4abf0Sagc 			  ctx->re++;
125963d4abf0Sagc 			}
126063d4abf0Sagc #endif /* REG_RIGHT_ASSOC */
126163d4abf0Sagc #ifdef REG_UNGREEDY
126263d4abf0Sagc 		      else if (*ctx->re == L'U')
126363d4abf0Sagc 			{
126463d4abf0Sagc 			  DPRINT(("tre_parse:    ungreedy: '%.*" STRF "\n",
126563d4abf0Sagc 				  REST(ctx->re)));
126663d4abf0Sagc 			  if (bit)
126763d4abf0Sagc 			    new_cflags |= REG_UNGREEDY;
126863d4abf0Sagc 			  else
126963d4abf0Sagc 			    new_cflags &= ~REG_UNGREEDY;
127063d4abf0Sagc 			  ctx->re++;
127163d4abf0Sagc 			}
127263d4abf0Sagc #endif /* REG_UNGREEDY */
127363d4abf0Sagc 		      else if (*ctx->re == CHAR_MINUS)
127463d4abf0Sagc 			{
127563d4abf0Sagc 			  DPRINT(("tre_parse:	 turn off: '%.*" STRF "\n",
127663d4abf0Sagc 				  REST(ctx->re)));
127763d4abf0Sagc 			  ctx->re++;
127863d4abf0Sagc 			  bit = 0;
127963d4abf0Sagc 			}
128063d4abf0Sagc 		      else if (*ctx->re == CHAR_COLON)
128163d4abf0Sagc 			{
128263d4abf0Sagc 			  DPRINT(("tre_parse:	 no group: '%.*" STRF "\n",
128363d4abf0Sagc 				  REST(ctx->re)));
128463d4abf0Sagc 			  ctx->re++;
128563d4abf0Sagc 			  depth++;
128663d4abf0Sagc 			  break;
128763d4abf0Sagc 			}
128863d4abf0Sagc 		      else if (*ctx->re == CHAR_HASH)
128963d4abf0Sagc 			{
129063d4abf0Sagc 			  DPRINT(("tre_parse:    comment: '%.*" STRF "\n",
129163d4abf0Sagc 				  REST(ctx->re)));
129263d4abf0Sagc 			  /* A comment can contain any character except a
129363d4abf0Sagc 			     right parenthesis */
129463d4abf0Sagc 			  while (*ctx->re != CHAR_RPAREN
129563d4abf0Sagc 				 && ctx->re < ctx->re_end)
129663d4abf0Sagc 			    ctx->re++;
129763d4abf0Sagc 			  if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
129863d4abf0Sagc 			    {
129963d4abf0Sagc 			      ctx->re++;
130063d4abf0Sagc 			      break;
130163d4abf0Sagc 			    }
130263d4abf0Sagc 			  else
130363d4abf0Sagc 			    return REG_BADPAT;
130463d4abf0Sagc 			}
130563d4abf0Sagc 		      else if (*ctx->re == CHAR_RPAREN)
130663d4abf0Sagc 			{
130763d4abf0Sagc 			  ctx->re++;
130863d4abf0Sagc 			  break;
130963d4abf0Sagc 			}
131063d4abf0Sagc 		      else
131163d4abf0Sagc 			return REG_BADPAT;
131263d4abf0Sagc 		    }
131363d4abf0Sagc 
131463d4abf0Sagc 		  /* Turn on the cflags changes for the rest of the
131563d4abf0Sagc 		     enclosing group. */
1316f2a3d147Schristos 		  STACK_PUSHX(stack, long, ctx->cflags);
1317f2a3d147Schristos 		  STACK_PUSHX(stack, long, PARSE_RESTORE_CFLAGS);
1318f2a3d147Schristos 		  STACK_PUSHX(stack, long, PARSE_RE);
131963d4abf0Sagc 		  ctx->cflags = new_cflags;
132063d4abf0Sagc 		  break;
132163d4abf0Sagc 		}
132263d4abf0Sagc 
132363d4abf0Sagc 	      if (ctx->cflags & REG_EXTENDED
132463d4abf0Sagc 		  || (ctx->re > ctx->re_start
132563d4abf0Sagc 		      && *(ctx->re - 1) == CHAR_BACKSLASH))
132663d4abf0Sagc 		{
132763d4abf0Sagc 		  depth++;
132863d4abf0Sagc 		  if (ctx->re + 2 < ctx->re_end
132963d4abf0Sagc 		      && *(ctx->re + 1) == CHAR_QUESTIONMARK
133063d4abf0Sagc 		      && *(ctx->re + 2) == CHAR_COLON)
133163d4abf0Sagc 		    {
133263d4abf0Sagc 		      DPRINT(("tre_parse: group begin: '%.*" STRF
133363d4abf0Sagc 			      "', no submatch\n", REST(ctx->re)));
133463d4abf0Sagc 		      /* Don't mark for submatching. */
133563d4abf0Sagc 		      ctx->re += 3;
1336f2a3d147Schristos 		      STACK_PUSHX(stack, long, PARSE_RE);
133763d4abf0Sagc 		    }
133863d4abf0Sagc 		  else
133963d4abf0Sagc 		    {
134063d4abf0Sagc 		      DPRINT(("tre_parse: group begin: '%.*" STRF
134163d4abf0Sagc 			      "', submatch %d\n", REST(ctx->re),
134263d4abf0Sagc 			      ctx->submatch_id));
134363d4abf0Sagc 		      ctx->re++;
134463d4abf0Sagc 		      /* First parse a whole RE, then mark the resulting tree
134563d4abf0Sagc 			 for submatching. */
1346f2a3d147Schristos 		      STACK_PUSHX(stack, long, ctx->submatch_id);
1347f2a3d147Schristos 		      STACK_PUSHX(stack, long, PARSE_MARK_FOR_SUBMATCH);
1348f2a3d147Schristos 		      STACK_PUSHX(stack, long, PARSE_RE);
134963d4abf0Sagc 		      ctx->submatch_id++;
135063d4abf0Sagc 		    }
135163d4abf0Sagc 		}
135263d4abf0Sagc 	      else
135363d4abf0Sagc 		goto parse_literal;
135463d4abf0Sagc 	      break;
135563d4abf0Sagc 
135663d4abf0Sagc 	    case CHAR_RPAREN:  /* end of current subexpression */
135763d4abf0Sagc 	      if ((ctx->cflags & REG_EXTENDED && depth > 0)
135863d4abf0Sagc 		  || (ctx->re > ctx->re_start
135963d4abf0Sagc 		      && *(ctx->re - 1) == CHAR_BACKSLASH))
136063d4abf0Sagc 		{
136163d4abf0Sagc 		  DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n",
136263d4abf0Sagc 			  REST(ctx->re)));
136363d4abf0Sagc 		  /* We were expecting an atom, but instead the current
136463d4abf0Sagc 		     subexpression was closed.	POSIX leaves the meaning of
136563d4abf0Sagc 		     this to be implementation-defined.	 We interpret this as
136663d4abf0Sagc 		     an empty expression (which matches an empty string).  */
136763d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
136863d4abf0Sagc 		  if (result == NULL)
136963d4abf0Sagc 		    return REG_ESPACE;
137063d4abf0Sagc 		  if (!(ctx->cflags & REG_EXTENDED))
137163d4abf0Sagc 		    ctx->re--;
137263d4abf0Sagc 		}
137363d4abf0Sagc 	      else
137463d4abf0Sagc 		goto parse_literal;
137563d4abf0Sagc 	      break;
137663d4abf0Sagc 
137763d4abf0Sagc 	    case CHAR_LBRACKET: /* bracket expression */
137863d4abf0Sagc 	      DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n",
137963d4abf0Sagc 		      REST(ctx->re)));
138063d4abf0Sagc 	      ctx->re++;
138163d4abf0Sagc 	      status = tre_parse_bracket(ctx, &result);
138263d4abf0Sagc 	      if (status != REG_OK)
138363d4abf0Sagc 		return status;
138463d4abf0Sagc 	      break;
138563d4abf0Sagc 
138663d4abf0Sagc 	    case CHAR_BACKSLASH:
138763d4abf0Sagc 	      /* If this is "\(" or "\)" chew off the backslash and
138863d4abf0Sagc 		 try again. */
138963d4abf0Sagc 	      if (!(ctx->cflags & REG_EXTENDED)
139063d4abf0Sagc 		  && ctx->re + 1 < ctx->re_end
139163d4abf0Sagc 		  && (*(ctx->re + 1) == CHAR_LPAREN
139263d4abf0Sagc 		      || *(ctx->re + 1) == CHAR_RPAREN))
139363d4abf0Sagc 		{
139463d4abf0Sagc 		  ctx->re++;
1395f2a3d147Schristos 		  STACK_PUSHX(stack, long, PARSE_ATOM);
139663d4abf0Sagc 		  break;
139763d4abf0Sagc 		}
139863d4abf0Sagc 
139963d4abf0Sagc 	      /* If a macro is used, parse the expanded macro recursively. */
140063d4abf0Sagc 	      {
140163d4abf0Sagc 		tre_char_t buf[64];
140263d4abf0Sagc 		tre_expand_macro(ctx->re + 1, ctx->re_end,
140363d4abf0Sagc 				 buf, elementsof(buf));
140463d4abf0Sagc 		if (buf[0] != 0)
140563d4abf0Sagc 		  {
140663d4abf0Sagc 		    tre_parse_ctx_t subctx;
140763d4abf0Sagc 		    memcpy(&subctx, ctx, sizeof(subctx));
140863d4abf0Sagc 		    subctx.re = buf;
140963d4abf0Sagc 		    subctx.len = tre_strlen(buf);
141063d4abf0Sagc 		    subctx.nofirstsub = 1;
141163d4abf0Sagc 		    status = tre_parse(&subctx);
141263d4abf0Sagc 		    if (status != REG_OK)
141363d4abf0Sagc 		      return status;
141463d4abf0Sagc 		    ctx->re += 2;
141563d4abf0Sagc 		    ctx->position = subctx.position;
141663d4abf0Sagc 		    result = subctx.result;
141763d4abf0Sagc 		    break;
141863d4abf0Sagc 		  }
141963d4abf0Sagc 	      }
142063d4abf0Sagc 
142163d4abf0Sagc 	      if (ctx->re + 1 >= ctx->re_end)
142263d4abf0Sagc 		/* Trailing backslash. */
142363d4abf0Sagc 		return REG_EESCAPE;
142463d4abf0Sagc 
142563d4abf0Sagc #ifdef REG_LITERAL
142663d4abf0Sagc 	      if (*(ctx->re + 1) == L'Q')
142763d4abf0Sagc 		{
142863d4abf0Sagc 		  DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
142963d4abf0Sagc 			  REST(ctx->re)));
143063d4abf0Sagc 		  ctx->cflags |= REG_LITERAL;
143163d4abf0Sagc 		  temporary_cflags |= REG_LITERAL;
143263d4abf0Sagc 		  ctx->re += 2;
1433f2a3d147Schristos 		  STACK_PUSHX(stack, long, PARSE_ATOM);
143463d4abf0Sagc 		  break;
143563d4abf0Sagc 		}
143663d4abf0Sagc #endif /* REG_LITERAL */
143763d4abf0Sagc 
143863d4abf0Sagc 	      DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
143963d4abf0Sagc 	      ctx->re++;
144063d4abf0Sagc 	      switch (*ctx->re)
144163d4abf0Sagc 		{
144263d4abf0Sagc 		case L'b':
144363d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
144463d4abf0Sagc 					       ASSERT_AT_WB, -1);
144563d4abf0Sagc 		  ctx->re++;
144663d4abf0Sagc 		  break;
144763d4abf0Sagc 		case L'B':
144863d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
144963d4abf0Sagc 					       ASSERT_AT_WB_NEG, -1);
145063d4abf0Sagc 		  ctx->re++;
145163d4abf0Sagc 		  break;
145263d4abf0Sagc 		case L'<':
145363d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
145463d4abf0Sagc 					       ASSERT_AT_BOW, -1);
145563d4abf0Sagc 		  ctx->re++;
145663d4abf0Sagc 		  break;
145763d4abf0Sagc 		case L'>':
145863d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
145963d4abf0Sagc 					       ASSERT_AT_EOW, -1);
146063d4abf0Sagc 		  ctx->re++;
146163d4abf0Sagc 		  break;
146263d4abf0Sagc 		case L'x':
146363d4abf0Sagc 		  ctx->re++;
146463d4abf0Sagc 		  if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
146563d4abf0Sagc 		    {
146663d4abf0Sagc 		      /* 8 bit hex char. */
146763d4abf0Sagc 		      char tmp[3] = {0, 0, 0};
146863d4abf0Sagc 		      long val;
146963d4abf0Sagc 		      DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
147063d4abf0Sagc 			      REST(ctx->re - 2)));
147163d4abf0Sagc 
147263d4abf0Sagc 		      if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
147363d4abf0Sagc 			{
147463d4abf0Sagc 			  tmp[0] = (char)ctx->re[0];
147563d4abf0Sagc 			  ctx->re++;
147663d4abf0Sagc 			}
147763d4abf0Sagc 		      if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
147863d4abf0Sagc 			{
147963d4abf0Sagc 			  tmp[1] = (char)ctx->re[0];
148063d4abf0Sagc 			  ctx->re++;
148163d4abf0Sagc 			}
148263d4abf0Sagc 		      val = strtol(tmp, NULL, 16);
148363d4abf0Sagc 		      result = tre_ast_new_literal(ctx->mem, (int)val,
148463d4abf0Sagc 						   (int)val, ctx->position);
148563d4abf0Sagc 		      ctx->position++;
148663d4abf0Sagc 		      break;
148763d4abf0Sagc 		    }
148863d4abf0Sagc 		  else if (ctx->re < ctx->re_end)
148963d4abf0Sagc 		    {
149063d4abf0Sagc 		      /* Wide char. */
149163d4abf0Sagc 		      char tmp[32];
149263d4abf0Sagc 		      long val;
149363d4abf0Sagc 		      int i = 0;
149463d4abf0Sagc 		      ctx->re++;
149563d4abf0Sagc 		      while (ctx->re_end - ctx->re >= 0)
149663d4abf0Sagc 			{
149763d4abf0Sagc 			  if (ctx->re[0] == CHAR_RBRACE)
149863d4abf0Sagc 			    break;
149963d4abf0Sagc 			  if (tre_isxdigit(ctx->re[0]))
150063d4abf0Sagc 			    {
150163d4abf0Sagc 			      tmp[i] = (char)ctx->re[0];
150263d4abf0Sagc 			      i++;
150363d4abf0Sagc 			      ctx->re++;
150463d4abf0Sagc 			      continue;
150563d4abf0Sagc 			    }
150663d4abf0Sagc 			  return REG_EBRACE;
150763d4abf0Sagc 			}
150863d4abf0Sagc 		      ctx->re++;
150963d4abf0Sagc 		      tmp[i] = 0;
151063d4abf0Sagc 		      val = strtol(tmp, NULL, 16);
151163d4abf0Sagc 		      result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
151263d4abf0Sagc 						   ctx->position);
151363d4abf0Sagc 		      ctx->position++;
151463d4abf0Sagc 		      break;
151563d4abf0Sagc 		    }
151663d4abf0Sagc 		  /*FALLTHROUGH*/
151763d4abf0Sagc 
151863d4abf0Sagc 		default:
151963d4abf0Sagc 		  if (tre_isdigit(*ctx->re))
152063d4abf0Sagc 		    {
152163d4abf0Sagc 		      /* Back reference. */
152263d4abf0Sagc 		      int val = *ctx->re - L'0';
152363d4abf0Sagc 		      DPRINT(("tre_parse:     backref: '%.*" STRF "'\n",
152463d4abf0Sagc 			      REST(ctx->re - 1)));
152563d4abf0Sagc 		      result = tre_ast_new_literal(ctx->mem, BACKREF, val,
152663d4abf0Sagc 						   ctx->position);
152763d4abf0Sagc 		      if (result == NULL)
152863d4abf0Sagc 			return REG_ESPACE;
152963d4abf0Sagc 		      ctx->position++;
153063d4abf0Sagc 		      ctx->max_backref = MAX(val, ctx->max_backref);
153163d4abf0Sagc 		      ctx->re++;
153263d4abf0Sagc 		    }
153363d4abf0Sagc 		  else
153463d4abf0Sagc 		    {
153563d4abf0Sagc 		      /* Escaped character. */
153663d4abf0Sagc 		      DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n",
153763d4abf0Sagc 			      REST(ctx->re - 1)));
153863d4abf0Sagc 		      result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
153963d4abf0Sagc 						   ctx->position);
154063d4abf0Sagc 		      ctx->position++;
154163d4abf0Sagc 		      ctx->re++;
154263d4abf0Sagc 		    }
154363d4abf0Sagc 		  break;
154463d4abf0Sagc 		}
154563d4abf0Sagc 	      if (result == NULL)
154663d4abf0Sagc 		return REG_ESPACE;
154763d4abf0Sagc 	      break;
154863d4abf0Sagc 
154963d4abf0Sagc 	    case CHAR_PERIOD:	 /* the any-symbol */
155063d4abf0Sagc 	      DPRINT(("tre_parse:	  any: '%.*" STRF "'\n",
155163d4abf0Sagc 		      REST(ctx->re)));
155263d4abf0Sagc 	      if (ctx->cflags & REG_NEWLINE)
155363d4abf0Sagc 		{
155463d4abf0Sagc 		  tre_ast_node_t *tmp1;
155563d4abf0Sagc 		  tre_ast_node_t *tmp2;
155663d4abf0Sagc 		  tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
155763d4abf0Sagc 					     ctx->position);
155863d4abf0Sagc 		  if (!tmp1)
155963d4abf0Sagc 		    return REG_ESPACE;
156063d4abf0Sagc 		  tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
156163d4abf0Sagc 					     ctx->position + 1);
156263d4abf0Sagc 		  if (!tmp2)
156363d4abf0Sagc 		    return REG_ESPACE;
156463d4abf0Sagc 		  result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
156563d4abf0Sagc 		  if (!result)
156663d4abf0Sagc 		    return REG_ESPACE;
156763d4abf0Sagc 		  ctx->position += 2;
156863d4abf0Sagc 		}
156963d4abf0Sagc 	      else
157063d4abf0Sagc 		{
157163d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
157263d4abf0Sagc 					       ctx->position);
157363d4abf0Sagc 		  if (!result)
157463d4abf0Sagc 		    return REG_ESPACE;
157563d4abf0Sagc 		  ctx->position++;
157663d4abf0Sagc 		}
157763d4abf0Sagc 	      ctx->re++;
157863d4abf0Sagc 	      break;
157963d4abf0Sagc 
158063d4abf0Sagc 	    case CHAR_CARET:	 /* beginning of line assertion */
158163d4abf0Sagc 	      /* '^' has a special meaning everywhere in EREs, and in the
158263d4abf0Sagc 		 beginning of the RE and after \( is BREs. */
158363d4abf0Sagc 	      if (ctx->cflags & REG_EXTENDED
158463d4abf0Sagc 		  || (ctx->re - 2 >= ctx->re_start
158563d4abf0Sagc 		      && *(ctx->re - 2) == CHAR_BACKSLASH
158663d4abf0Sagc 		      && *(ctx->re - 1) == CHAR_LPAREN)
158763d4abf0Sagc 		  || ctx->re == ctx->re_start)
158863d4abf0Sagc 		{
158963d4abf0Sagc 		  DPRINT(("tre_parse:	      BOL: '%.*" STRF "'\n",
159063d4abf0Sagc 			  REST(ctx->re)));
159163d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
159263d4abf0Sagc 					       ASSERT_AT_BOL, -1);
159363d4abf0Sagc 		  if (result == NULL)
159463d4abf0Sagc 		    return REG_ESPACE;
159563d4abf0Sagc 		  ctx->re++;
159663d4abf0Sagc 		}
159763d4abf0Sagc 	      else
159863d4abf0Sagc 		goto parse_literal;
159963d4abf0Sagc 	      break;
160063d4abf0Sagc 
160163d4abf0Sagc 	    case CHAR_DOLLAR:	 /* end of line assertion. */
160263d4abf0Sagc 	      /* '$' is special everywhere in EREs, and in the end of the
160363d4abf0Sagc 		 string and before \) is BREs. */
160463d4abf0Sagc 	      if (ctx->cflags & REG_EXTENDED
160563d4abf0Sagc 		  || (ctx->re + 2 < ctx->re_end
160663d4abf0Sagc 		      && *(ctx->re + 1) == CHAR_BACKSLASH
160763d4abf0Sagc 		      && *(ctx->re + 2) == CHAR_RPAREN)
160863d4abf0Sagc 		  || ctx->re + 1 == ctx->re_end)
160963d4abf0Sagc 		{
161063d4abf0Sagc 		  DPRINT(("tre_parse:	      EOL: '%.*" STRF "'\n",
161163d4abf0Sagc 			  REST(ctx->re)));
161263d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, ASSERTION,
161363d4abf0Sagc 					       ASSERT_AT_EOL, -1);
161463d4abf0Sagc 		  if (result == NULL)
161563d4abf0Sagc 		    return REG_ESPACE;
161663d4abf0Sagc 		  ctx->re++;
161763d4abf0Sagc 		}
161863d4abf0Sagc 	      else
161963d4abf0Sagc 		goto parse_literal;
162063d4abf0Sagc 	      break;
162163d4abf0Sagc 
162263d4abf0Sagc 	    default:
162363d4abf0Sagc 	    parse_literal:
162463d4abf0Sagc 
162563d4abf0Sagc 	      if (temporary_cflags && ctx->re + 1 < ctx->re_end
162663d4abf0Sagc 		  && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
162763d4abf0Sagc 		{
162863d4abf0Sagc 		  DPRINT(("tre_parse:	 end tmps: '%.*" STRF "'\n",
162963d4abf0Sagc 			  REST(ctx->re)));
163063d4abf0Sagc 		  ctx->cflags &= ~temporary_cflags;
163163d4abf0Sagc 		  temporary_cflags = 0;
163263d4abf0Sagc 		  ctx->re += 2;
1633f2a3d147Schristos 		  STACK_PUSHX(stack, long, PARSE_PIECE);
163463d4abf0Sagc 		  break;
163563d4abf0Sagc 		}
163663d4abf0Sagc 
163763d4abf0Sagc 
163863d4abf0Sagc 	      /* We are expecting an atom.  If the subexpression (or the whole
163963d4abf0Sagc 		 regexp ends here, we interpret it as an empty expression
164063d4abf0Sagc 		 (which matches an empty string).  */
164163d4abf0Sagc 	      if (
164263d4abf0Sagc #ifdef REG_LITERAL
164363d4abf0Sagc 		  !(ctx->cflags & REG_LITERAL) &&
164463d4abf0Sagc #endif /* REG_LITERAL */
164563d4abf0Sagc 		  (ctx->re >= ctx->re_end
164663d4abf0Sagc 		   || *ctx->re == CHAR_STAR
164763d4abf0Sagc 		   || (ctx->cflags & REG_EXTENDED
164863d4abf0Sagc 		       && (*ctx->re == CHAR_PIPE
164963d4abf0Sagc 			   || *ctx->re == CHAR_LBRACE
165063d4abf0Sagc 			   || *ctx->re == CHAR_PLUS
165163d4abf0Sagc 			   || *ctx->re == CHAR_QUESTIONMARK))
165263d4abf0Sagc 		   /* Test for "\)" in BRE mode. */
165363d4abf0Sagc 		   || (!(ctx->cflags & REG_EXTENDED)
165463d4abf0Sagc 		       && ctx->re + 1 < ctx->re_end
165563d4abf0Sagc 		       && *ctx->re == CHAR_BACKSLASH
165663d4abf0Sagc 		       && *(ctx->re + 1) == CHAR_LBRACE)))
165763d4abf0Sagc 		{
165863d4abf0Sagc 		  DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n",
165963d4abf0Sagc 			  REST(ctx->re)));
166063d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
166163d4abf0Sagc 		  if (!result)
166263d4abf0Sagc 		    return REG_ESPACE;
166363d4abf0Sagc 		  break;
166463d4abf0Sagc 		}
166563d4abf0Sagc 
166663d4abf0Sagc 	      DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
166763d4abf0Sagc 		      REST(ctx->re)));
166863d4abf0Sagc 	      /* Note that we can't use an tre_isalpha() test here, since there
166963d4abf0Sagc 		 may be characters which are alphabetic but neither upper or
167063d4abf0Sagc 		 lower case. */
167163d4abf0Sagc 	      if (ctx->cflags & REG_ICASE
167263d4abf0Sagc 		  && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
167363d4abf0Sagc 		{
167463d4abf0Sagc 		  tre_ast_node_t *tmp1;
167563d4abf0Sagc 		  tre_ast_node_t *tmp2;
167663d4abf0Sagc 
167763d4abf0Sagc 		  /* XXX - Can there be more than one opposite-case
167863d4abf0Sagc 		     counterpoints for some character in some locale?  Or
167963d4abf0Sagc 		     more than two characters which all should be regarded
168063d4abf0Sagc 		     the same character if case is ignored?  If yes, there
168163d4abf0Sagc 		     does not seem to be a portable way to detect it.  I guess
168263d4abf0Sagc 		     that at least for multi-character collating elements there
168363d4abf0Sagc 		     could be several opposite-case counterpoints, but they
168463d4abf0Sagc 		     cannot be supported portably anyway. */
168563d4abf0Sagc 		  tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re),
168663d4abf0Sagc 					     tre_toupper(*ctx->re),
168763d4abf0Sagc 					     ctx->position);
168863d4abf0Sagc 		  if (!tmp1)
168963d4abf0Sagc 		    return REG_ESPACE;
169063d4abf0Sagc 		  tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
169163d4abf0Sagc 					     tre_tolower(*ctx->re),
169263d4abf0Sagc 					     ctx->position);
169363d4abf0Sagc 		  if (!tmp2)
169463d4abf0Sagc 		    return REG_ESPACE;
169563d4abf0Sagc 		  result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
169663d4abf0Sagc 		  if (!result)
169763d4abf0Sagc 		    return REG_ESPACE;
169863d4abf0Sagc 		}
169963d4abf0Sagc 	      else
170063d4abf0Sagc 		{
170163d4abf0Sagc 		  result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
170263d4abf0Sagc 					       ctx->position);
170363d4abf0Sagc 		  if (!result)
170463d4abf0Sagc 		    return REG_ESPACE;
170563d4abf0Sagc 		}
170663d4abf0Sagc 	      ctx->position++;
170763d4abf0Sagc 	      ctx->re++;
170863d4abf0Sagc 	      break;
170963d4abf0Sagc 	    }
171063d4abf0Sagc 	  break;
171163d4abf0Sagc 
171263d4abf0Sagc 	case PARSE_MARK_FOR_SUBMATCH:
171363d4abf0Sagc 	  {
171463d4abf0Sagc 	    int submatch_id = tre_stack_pop_int(stack);
171563d4abf0Sagc 
171663d4abf0Sagc 	    if (result->submatch_id >= 0)
171763d4abf0Sagc 	      {
171863d4abf0Sagc 		tre_ast_node_t *n, *tmp_node;
171963d4abf0Sagc 		n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
172063d4abf0Sagc 		if (n == NULL)
172163d4abf0Sagc 		  return REG_ESPACE;
172263d4abf0Sagc 		tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
172363d4abf0Sagc 		if (tmp_node == NULL)
172463d4abf0Sagc 		  return REG_ESPACE;
172563d4abf0Sagc 		tmp_node->num_submatches = result->num_submatches;
172663d4abf0Sagc 		result = tmp_node;
172763d4abf0Sagc 	      }
172863d4abf0Sagc 	    result->submatch_id = submatch_id;
172963d4abf0Sagc 	    result->num_submatches++;
173063d4abf0Sagc 	    break;
173163d4abf0Sagc 	  }
173263d4abf0Sagc 
173363d4abf0Sagc 	case PARSE_RESTORE_CFLAGS:
173463d4abf0Sagc 	  ctx->cflags = tre_stack_pop_int(stack);
173563d4abf0Sagc 	  break;
173663d4abf0Sagc 
173763d4abf0Sagc 	default:
173863d4abf0Sagc 	  assert(0);
173963d4abf0Sagc 	  break;
174063d4abf0Sagc 	}
174163d4abf0Sagc     }
174263d4abf0Sagc 
174363d4abf0Sagc   /* Check for missing closing parentheses. */
174463d4abf0Sagc   if (depth > 0)
174563d4abf0Sagc     return REG_EPAREN;
174663d4abf0Sagc 
174763d4abf0Sagc   if (status == REG_OK)
174863d4abf0Sagc     ctx->result = result;
174963d4abf0Sagc 
175063d4abf0Sagc   return status;
175163d4abf0Sagc }
175263d4abf0Sagc 
175363d4abf0Sagc /* EOF */
1754