xref: /dflybsd-src/contrib/tre/lib/tre-parse.c (revision 122b686e384c0fb6cfc3dd8cf3a16b87651fc609)
15f2eab64SJohn Marino /*
25f2eab64SJohn Marino   tre-parse.c - Regexp parser
35f2eab64SJohn Marino 
45f2eab64SJohn Marino   This software is released under a BSD-style license.
55f2eab64SJohn Marino   See the file LICENSE for details and copyright.
65f2eab64SJohn Marino 
75f2eab64SJohn Marino */
85f2eab64SJohn Marino 
95f2eab64SJohn Marino /*
105f2eab64SJohn Marino   This parser is just a simple recursive descent parser for POSIX.2
115f2eab64SJohn Marino   regexps.  The parser supports both the obsolete default syntax and
125f2eab64SJohn Marino   the "extended" syntax, and some nonstandard extensions.
135f2eab64SJohn Marino */
145f2eab64SJohn Marino 
155f2eab64SJohn Marino 
165f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
175f2eab64SJohn Marino #include <config.h>
185f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
195f2eab64SJohn Marino #include <string.h>
205f2eab64SJohn Marino #include <assert.h>
215f2eab64SJohn Marino #include <limits.h>
22d5f8dde1SJohn Marino #include <stddef.h>
235f2eab64SJohn Marino 
245f2eab64SJohn Marino #include "xmalloc.h"
255f2eab64SJohn Marino #include "tre-mem.h"
265f2eab64SJohn Marino #include "tre-ast.h"
275f2eab64SJohn Marino #include "tre-stack.h"
285f2eab64SJohn Marino #include "tre-parse.h"
295f2eab64SJohn Marino 
30d5f8dde1SJohn Marino #include "xlocale_private.h"
31d5f8dde1SJohn Marino #include "collate.h"
32d5f8dde1SJohn Marino 
33d5f8dde1SJohn Marino /* BSD compatibility:
34d5f8dde1SJohn Marino      Before looking up a collating symbol, check if the name matches in
35d5f8dde1SJohn Marino      the character names (cnames) array; if so, use the corresponding
36d5f8dde1SJohn Marino      character.
37d5f8dde1SJohn Marino 
38d5f8dde1SJohn Marino      Also set ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND, which will preserve
39d5f8dde1SJohn Marino      the implementation choice that for ERE, a non-numeric character following
40d5f8dde1SJohn Marino      a left brace that would normally be a bound, causes the left brace to be
41d5f8dde1SJohn Marino      literal. */
42d5f8dde1SJohn Marino #define BSD_COMPATIBILITY
43d5f8dde1SJohn Marino #ifdef BSD_COMPATIBILITY
44d5f8dde1SJohn Marino #include "cname.h"
45d5f8dde1SJohn Marino #define ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
46d5f8dde1SJohn Marino #endif /* BSD_COMPATIBILITY */
475f2eab64SJohn Marino 
485f2eab64SJohn Marino /* Characters with special meanings in regexp syntax. */
495f2eab64SJohn Marino #define CHAR_PIPE	   L'|'
505f2eab64SJohn Marino #define CHAR_LPAREN	   L'('
515f2eab64SJohn Marino #define CHAR_RPAREN	   L')'
525f2eab64SJohn Marino #define CHAR_LBRACE	   L'{'
535f2eab64SJohn Marino #define CHAR_RBRACE	   L'}'
545f2eab64SJohn Marino #define CHAR_LBRACKET	   L'['
555f2eab64SJohn Marino #define CHAR_RBRACKET	   L']'
565f2eab64SJohn Marino #define CHAR_MINUS	   L'-'
575f2eab64SJohn Marino #define CHAR_STAR	   L'*'
585f2eab64SJohn Marino #define CHAR_QUESTIONMARK  L'?'
595f2eab64SJohn Marino #define CHAR_PLUS	   L'+'
605f2eab64SJohn Marino #define CHAR_PERIOD	   L'.'
615f2eab64SJohn Marino #define CHAR_COLON	   L':'
625f2eab64SJohn Marino #define CHAR_EQUAL	   L'='
635f2eab64SJohn Marino #define CHAR_COMMA	   L','
645f2eab64SJohn Marino #define CHAR_CARET	   L'^'
655f2eab64SJohn Marino #define CHAR_DOLLAR	   L'$'
665f2eab64SJohn Marino #define CHAR_BACKSLASH	   L'\\'
675f2eab64SJohn Marino #define CHAR_HASH	   L'#'
685f2eab64SJohn Marino #define CHAR_TILDE	   L'~'
695f2eab64SJohn Marino 
705f2eab64SJohn Marino 
715f2eab64SJohn Marino /* Some macros for expanding \w, \s, etc. */
725f2eab64SJohn Marino static const struct tre_macro_struct {
735f2eab64SJohn Marino   const char c;
745f2eab64SJohn Marino   const char *expansion;
755f2eab64SJohn Marino } tre_macros[] =
765f2eab64SJohn Marino   { {'t', "\t"},	   {'n', "\n"},		   {'r', "\r"},
775f2eab64SJohn Marino     {'f', "\f"},	   {'a', "\a"},		   {'e', "\033"},
785f2eab64SJohn Marino     {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
795f2eab64SJohn Marino     {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
805f2eab64SJohn Marino     { 0, NULL }
815f2eab64SJohn Marino   };
825f2eab64SJohn Marino 
835f2eab64SJohn Marino 
845f2eab64SJohn Marino /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
855f2eab64SJohn Marino    must have at least `len' items.  Sets buf[0] to zero if the there
865f2eab64SJohn Marino    is no match in `tre_macros'. */
875f2eab64SJohn Marino static void
tre_expand_macro(const tre_char_t * regex,const tre_char_t * regex_end,tre_char_t * buf,size_t buf_len)885f2eab64SJohn Marino tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
895f2eab64SJohn Marino 		 tre_char_t *buf, size_t buf_len)
905f2eab64SJohn Marino {
915f2eab64SJohn Marino   int i;
925f2eab64SJohn Marino 
935f2eab64SJohn Marino   buf[0] = 0;
945f2eab64SJohn Marino   if (regex >= regex_end)
955f2eab64SJohn Marino     return;
965f2eab64SJohn Marino 
975f2eab64SJohn Marino   for (i = 0; tre_macros[i].expansion; i++)
985f2eab64SJohn Marino     {
995f2eab64SJohn Marino       if (tre_macros[i].c == *regex)
1005f2eab64SJohn Marino 	{
1015f2eab64SJohn Marino 	  unsigned int j;
1025f2eab64SJohn Marino 	  DPRINT(("Expanding macro '%c' => '%s'\n",
1035f2eab64SJohn Marino 		  tre_macros[i].c, tre_macros[i].expansion));
1045f2eab64SJohn Marino 	  for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
1055f2eab64SJohn Marino 	    buf[j] = tre_macros[i].expansion[j];
1065f2eab64SJohn Marino 	  buf[j] = 0;
1075f2eab64SJohn Marino 	  break;
1085f2eab64SJohn Marino 	}
1095f2eab64SJohn Marino     }
1105f2eab64SJohn Marino }
1115f2eab64SJohn Marino 
1125f2eab64SJohn Marino static reg_errcode_t
tre_new_item(tre_mem_t mem,int type,int val,int * max_i,tre_bracket_match_list_t ** items)113d5f8dde1SJohn Marino tre_new_item(tre_mem_t mem, int type, int val, int *max_i,
114d5f8dde1SJohn Marino 	 tre_bracket_match_list_t **items)
1155f2eab64SJohn Marino {
116d5f8dde1SJohn Marino   reg_errcode_t status = REG_OK;
117d5f8dde1SJohn Marino   tre_bracket_match_list_t *array = *items;
118d5f8dde1SJohn Marino   int i = array->num_bracket_matches;
1195f2eab64SJohn Marino   /* Allocate more space if necessary. */
120d5f8dde1SJohn Marino   if (i >= *max_i)
1215f2eab64SJohn Marino     {
122d5f8dde1SJohn Marino       tre_bracket_match_list_t *new_items;
123d5f8dde1SJohn Marino       DPRINT(("out of tre_bracket_match_list_t array space (%d)\n", i));
1245f2eab64SJohn Marino       /* If the array is already 1024 items large, give up -- there's
1255f2eab64SJohn Marino 	 probably an error in the regexp (e.g. not a '\0' terminated
1265f2eab64SJohn Marino 	 string and missing ']') */
127d5f8dde1SJohn Marino       if (*max_i >= 1024)
1285f2eab64SJohn Marino 	return REG_ESPACE;
1295f2eab64SJohn Marino       *max_i *= 2;
130d5f8dde1SJohn Marino       new_items = xrealloc(array, SIZEOF_BRACKET_MATCH_LIST_N(*max_i));
1315f2eab64SJohn Marino       if (new_items == NULL)
1325f2eab64SJohn Marino 	return REG_ESPACE;
1335f2eab64SJohn Marino       *items = array = new_items;
1345f2eab64SJohn Marino     }
135d5f8dde1SJohn Marino   array->bracket_matches[i].type = type;
136d5f8dde1SJohn Marino   array->bracket_matches[i].value = val;
137d5f8dde1SJohn Marino   array->num_bracket_matches++;
1385f2eab64SJohn Marino   return status;
1395f2eab64SJohn Marino }
1405f2eab64SJohn Marino 
1415f2eab64SJohn Marino #ifndef TRE_USE_SYSTEM_WCTYPE
1425f2eab64SJohn Marino 
1435f2eab64SJohn Marino /* isalnum() and the rest may be macros, so wrap them to functions. */
tre_isalnum_func(tre_cint_t c)1445f2eab64SJohn Marino int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
tre_isalpha_func(tre_cint_t c)1455f2eab64SJohn Marino int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
1465f2eab64SJohn Marino 
1475f2eab64SJohn Marino #ifdef tre_isascii
tre_isascii_func(tre_cint_t c)1485f2eab64SJohn Marino int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
1495f2eab64SJohn Marino #else /* !tre_isascii */
tre_isascii_func(tre_cint_t c)1505f2eab64SJohn Marino int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
1515f2eab64SJohn Marino #endif /* !tre_isascii */
1525f2eab64SJohn Marino 
1535f2eab64SJohn Marino #ifdef tre_isblank
tre_isblank_func(tre_cint_t c)1545f2eab64SJohn Marino int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
1555f2eab64SJohn Marino #else /* !tre_isblank */
tre_isblank_func(tre_cint_t c)1565f2eab64SJohn Marino int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
1575f2eab64SJohn Marino #endif /* !tre_isblank */
1585f2eab64SJohn Marino 
tre_iscntrl_func(tre_cint_t c)1595f2eab64SJohn Marino int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
tre_isdigit_func(tre_cint_t c)1605f2eab64SJohn Marino int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
tre_isgraph_func(tre_cint_t c)1615f2eab64SJohn Marino int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
tre_islower_func(tre_cint_t c)1625f2eab64SJohn Marino int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
tre_isprint_func(tre_cint_t c)1635f2eab64SJohn Marino int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
tre_ispunct_func(tre_cint_t c)1645f2eab64SJohn Marino int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
tre_isspace_func(tre_cint_t c)1655f2eab64SJohn Marino int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
tre_isupper_func(tre_cint_t c)1665f2eab64SJohn Marino int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
tre_isxdigit_func(tre_cint_t c)1675f2eab64SJohn Marino int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
1685f2eab64SJohn Marino 
1695f2eab64SJohn Marino struct {
1705f2eab64SJohn Marino   char *name;
1715f2eab64SJohn Marino   int (*func)(tre_cint_t);
1725f2eab64SJohn Marino } tre_ctype_map[] = {
1735f2eab64SJohn Marino   { "alnum", &tre_isalnum_func },
1745f2eab64SJohn Marino   { "alpha", &tre_isalpha_func },
1755f2eab64SJohn Marino #ifdef tre_isascii
1765f2eab64SJohn Marino   { "ascii", &tre_isascii_func },
1775f2eab64SJohn Marino #endif /* tre_isascii */
1785f2eab64SJohn Marino #ifdef tre_isblank
1795f2eab64SJohn Marino   { "blank", &tre_isblank_func },
1805f2eab64SJohn Marino #endif /* tre_isblank */
1815f2eab64SJohn Marino   { "cntrl", &tre_iscntrl_func },
1825f2eab64SJohn Marino   { "digit", &tre_isdigit_func },
1835f2eab64SJohn Marino   { "graph", &tre_isgraph_func },
1845f2eab64SJohn Marino   { "lower", &tre_islower_func },
1855f2eab64SJohn Marino   { "print", &tre_isprint_func },
1865f2eab64SJohn Marino   { "punct", &tre_ispunct_func },
1875f2eab64SJohn Marino   { "space", &tre_isspace_func },
1885f2eab64SJohn Marino   { "upper", &tre_isupper_func },
1895f2eab64SJohn Marino   { "xdigit", &tre_isxdigit_func },
1905f2eab64SJohn Marino   { NULL, NULL}
1915f2eab64SJohn Marino };
1925f2eab64SJohn Marino 
tre_ctype(const char * name)1935f2eab64SJohn Marino tre_ctype_t tre_ctype(const char *name)
1945f2eab64SJohn Marino {
1955f2eab64SJohn Marino   int i;
1965f2eab64SJohn Marino   for (i = 0; tre_ctype_map[i].name != NULL; i++)
1975f2eab64SJohn Marino     {
1985f2eab64SJohn Marino       if (strcmp(name, tre_ctype_map[i].name) == 0)
1995f2eab64SJohn Marino 	return tre_ctype_map[i].func;
2005f2eab64SJohn Marino     }
2015f2eab64SJohn Marino   return (tre_ctype_t)0;
2025f2eab64SJohn Marino }
2035f2eab64SJohn Marino #endif /* !TRE_USE_SYSTEM_WCTYPE */
2045f2eab64SJohn Marino 
2055f2eab64SJohn Marino #define REST(re) (int)(ctx->re_end - (re)), (re)
2065f2eab64SJohn Marino 
207d5f8dde1SJohn Marino #define START_COLLATING_SYMBOLS		16
208d5f8dde1SJohn Marino #define MAX_COLLATING_SYMBOL_LEN	4
209d5f8dde1SJohn Marino 
210d5f8dde1SJohn Marino typedef struct {
211d5f8dde1SJohn Marino   const tre_char_t *start;
212d5f8dde1SJohn Marino   int len;
213d5f8dde1SJohn Marino } tre_collating_symbol;
214d5f8dde1SJohn Marino 
215d5f8dde1SJohn Marino #ifdef BSD_COMPATIBILITY
216d5f8dde1SJohn Marino static wchar_t
tre_search_cnames(const wchar_t * name,size_t len)217d5f8dde1SJohn Marino tre_search_cnames(const wchar_t *name, size_t len)
218d5f8dde1SJohn Marino {
219d5f8dde1SJohn Marino   size_t low = 0;
220d5f8dde1SJohn Marino   size_t high = NCNAMES - 1;
221d5f8dde1SJohn Marino   size_t cur;
222d5f8dde1SJohn Marino   int cmp;
223d5f8dde1SJohn Marino 
224d5f8dde1SJohn Marino   while(low <= high)
225d5f8dde1SJohn Marino     {
226d5f8dde1SJohn Marino       cur = (low + high) / 2;
227d5f8dde1SJohn Marino       cmp = wcsncmp(name, cnames[cur].name, len);
228d5f8dde1SJohn Marino       if (cmp == 0 && cnames[cur].name[len] == 0) return cnames[cur].code;
229d5f8dde1SJohn Marino       if (cmp > 0) low = cur + 1;
230d5f8dde1SJohn Marino       else high = cur - 1;
231d5f8dde1SJohn Marino     }
232d5f8dde1SJohn Marino   return (wchar_t)-1;
233d5f8dde1SJohn Marino }
234d5f8dde1SJohn Marino #endif /* BSD_COMPATIBILITY */
235d5f8dde1SJohn Marino 
236d5f8dde1SJohn Marino /* Scan the contents of a bracket expression, and create a
237d5f8dde1SJohn Marino  * tre_bracket_match_list_t encoding the bracket expression.  If during
238d5f8dde1SJohn Marino  * the scan, multi-character collating symbols are detected, switch
239d5f8dde1SJohn Marino  * into a mode to collect those MCCSs into a tre_collating_symbol
240d5f8dde1SJohn Marino  * list and pass them back.  tre_parse_bracket will use that to
241d5f8dde1SJohn Marino  * create a new string composed of a union of the bracket expression
242d5f8dde1SJohn Marino  * without the MCCSs and the MCCSs (e.g., [x[.ch.]] => [x]|ch), and
243d5f8dde1SJohn Marino  * call tre_parse (recursive) to parse that new string (which will
244d5f8dde1SJohn Marino  * call tre_parse_bracket and tre_parse_bracket_items again. */
2455f2eab64SJohn Marino static reg_errcode_t
tre_parse_bracket_items(tre_parse_ctx_t * ctx,tre_bracket_match_list_t ** items,int * items_size,tre_collating_symbol ** result)246d5f8dde1SJohn Marino tre_parse_bracket_items(tre_parse_ctx_t *ctx, tre_bracket_match_list_t **items,
247d5f8dde1SJohn Marino 			int *items_size, tre_collating_symbol **result)
2485f2eab64SJohn Marino {
2495f2eab64SJohn Marino   const tre_char_t *re = ctx->re;
250d5f8dde1SJohn Marino   const tre_char_t *re_end = ctx->re_end;
251d5f8dde1SJohn Marino   tre_collating_symbol *col_syms = NULL;
252d5f8dde1SJohn Marino   tre_collating_symbol *cp = NULL;
253d5f8dde1SJohn Marino   int n_col_syms = 0;
254d5f8dde1SJohn Marino   reg_errcode_t status;
2555f2eab64SJohn Marino   int max_i = *items_size;
256d5f8dde1SJohn Marino   int other = 0;  /* contains content other than multi-character collating
257d5f8dde1SJohn Marino 		   * symbols */
258d5f8dde1SJohn Marino   int range = -1; /* -1 unset, 0 begin range set, +1 end range expected */
259d5f8dde1SJohn Marino   tre_cint_t min, c;
260d5f8dde1SJohn Marino   int invert = ((*items)->flags & TRE_BRACKET_MATCH_FLAG_NEGATE);
261d5f8dde1SJohn Marino   int collect_MCCS = 0;
262d5f8dde1SJohn Marino   const tre_char_t *start;
2635f2eab64SJohn Marino 
264d5f8dde1SJohn Marino   for ( ;re < re_end; re++)
2655f2eab64SJohn Marino     {
266d5f8dde1SJohn Marino       switch (*re)
267d5f8dde1SJohn Marino 	{
268d5f8dde1SJohn Marino 	case CHAR_MINUS:
269d5f8dde1SJohn Marino 	  /* A first hyphen */
270d5f8dde1SJohn Marino 	  if (re == ctx->re)
271d5f8dde1SJohn Marino 	    {
272d5f8dde1SJohn Marino 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
273d5f8dde1SJohn Marino 	      min = CHAR_MINUS;
274d5f8dde1SJohn Marino 	      other++;
275d5f8dde1SJohn Marino 	      range = 0;
276d5f8dde1SJohn Marino 	      break;
277d5f8dde1SJohn Marino 	    }
278d5f8dde1SJohn Marino 	  /* The hyphen is the end range */
279d5f8dde1SJohn Marino 	  if (range > 0)
280d5f8dde1SJohn Marino 	    {
281d5f8dde1SJohn Marino 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
282d5f8dde1SJohn Marino 	      c = CHAR_MINUS;
283d5f8dde1SJohn Marino 	      goto process_end_range;
284d5f8dde1SJohn Marino 	    }
285d5f8dde1SJohn Marino 	  if (re + 1 >= re_end)
2865f2eab64SJohn Marino 	    {
2875f2eab64SJohn Marino 	      status = REG_EBRACK;
288d5f8dde1SJohn Marino 	      goto error;
2895f2eab64SJohn Marino 	    }
290d5f8dde1SJohn Marino 	  /* The hyphen is at the end */
291d5f8dde1SJohn Marino 	  if (re[1] == CHAR_RBRACKET)
2925f2eab64SJohn Marino 	    {
293d5f8dde1SJohn Marino 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
294d5f8dde1SJohn Marino 	      c = CHAR_MINUS;
295d5f8dde1SJohn Marino 	      goto process_begin_range;
296d5f8dde1SJohn Marino 	    }
297d5f8dde1SJohn Marino 	  /* Two ranges are not allowed to share an endpoint, or begin
298d5f8dde1SJohn Marino 	   * range is illegal. */
299d5f8dde1SJohn Marino 	  if (range < 0)
300d5f8dde1SJohn Marino 	    {
301d5f8dde1SJohn Marino 	      status = REG_ERANGE;
302d5f8dde1SJohn Marino 	      goto error;
303d5f8dde1SJohn Marino 	    }
304d5f8dde1SJohn Marino 	  range = 1; /* Expect end range */
305d5f8dde1SJohn Marino 	  DPRINT(("tre_parse_bracket:   range: '%.*" STRF "'\n", REST(re)));
306d5f8dde1SJohn Marino 	  break;
307d5f8dde1SJohn Marino 
308d5f8dde1SJohn Marino 	case CHAR_LBRACKET:
309d5f8dde1SJohn Marino 	  if (re + 1 >= re_end)
310d5f8dde1SJohn Marino 	    {
311d5f8dde1SJohn Marino 	      status = REG_EBRACK;
312d5f8dde1SJohn Marino 	      goto error;
313d5f8dde1SJohn Marino 	    }
314d5f8dde1SJohn Marino 	  switch (re[1])
315d5f8dde1SJohn Marino 	    {
316d5f8dde1SJohn Marino 	    case CHAR_PERIOD:
317d5f8dde1SJohn Marino 	      {
318d5f8dde1SJohn Marino 		re += 2;
319d5f8dde1SJohn Marino 		start = re;
320d5f8dde1SJohn Marino 		for (;; re++)
321d5f8dde1SJohn Marino 		  {
322d5f8dde1SJohn Marino 		    if (re >= re_end)
323d5f8dde1SJohn Marino 		      {
324d5f8dde1SJohn Marino 			status = REG_ECOLLATE;
325d5f8dde1SJohn Marino 			goto error;
326d5f8dde1SJohn Marino 		      }
327d5f8dde1SJohn Marino 		    if (*re == CHAR_PERIOD)
328d5f8dde1SJohn Marino 		      {
329d5f8dde1SJohn Marino 			if (re + 1 >= re_end)
330d5f8dde1SJohn Marino 			  {
331d5f8dde1SJohn Marino 			    status = REG_ECOLLATE;
332d5f8dde1SJohn Marino 			    goto error;
333d5f8dde1SJohn Marino 			  }
334d5f8dde1SJohn Marino 			/* Found end */
335d5f8dde1SJohn Marino 			if (re[1] == CHAR_RBRACKET)
336d5f8dde1SJohn Marino 			  {
337d5f8dde1SJohn Marino 			    DPRINT(("tre_parse_bracket:   collating "
338d5f8dde1SJohn Marino 				    "symbol: '%.*" STRF "'\n",
339d5f8dde1SJohn Marino 				    REST(start - 2)));
340d5f8dde1SJohn Marino 			    /* Empty name */
341d5f8dde1SJohn Marino 			    if (re == start)
342d5f8dde1SJohn Marino 			      {
343d5f8dde1SJohn Marino 				status = REG_ECOLLATE;
344d5f8dde1SJohn Marino 				goto error;
345d5f8dde1SJohn Marino 			      }
346d5f8dde1SJohn Marino #ifdef BSD_COMPATIBILITY
347d5f8dde1SJohn Marino 			    /* Check if the name is in cnames; if so, use
348d5f8dde1SJohn Marino 			       the corresponding code */
349d5f8dde1SJohn Marino 			    c = tre_search_cnames(start, re - start);
350d5f8dde1SJohn Marino 			    if (c != (wchar_t)-1)
351d5f8dde1SJohn Marino 			      {
352d5f8dde1SJohn Marino 				re++;
353d5f8dde1SJohn Marino 				goto process_single_character;
354d5f8dde1SJohn Marino 			      }
355d5f8dde1SJohn Marino #endif /* BSD_COMPATIBILITY */
356d5f8dde1SJohn Marino 			    /* Verify this is a known sequence */
357d5f8dde1SJohn Marino 			    if (__collate_equiv_value(ctx->loc, start,
358d5f8dde1SJohn Marino 							  re - start) <= 0)
359d5f8dde1SJohn Marino 			      {
360d5f8dde1SJohn Marino 				status = REG_ECOLLATE;
361d5f8dde1SJohn Marino 				goto error;
362d5f8dde1SJohn Marino 			      }
363d5f8dde1SJohn Marino 			    /* Process single character collating symbols */
364d5f8dde1SJohn Marino 			    if (re - start == 1)
365d5f8dde1SJohn Marino 			      {
366d5f8dde1SJohn Marino 				c = *start;
367d5f8dde1SJohn Marino 				re++;
368d5f8dde1SJohn Marino 				goto process_single_character;
369d5f8dde1SJohn Marino 			      }
370d5f8dde1SJohn Marino 			    /* Inverted MCCSs are undefined */
371d5f8dde1SJohn Marino 			    if (invert)
372d5f8dde1SJohn Marino 			      {
373d5f8dde1SJohn Marino 				status = REG_ECOLLATE;
374d5f8dde1SJohn Marino 				goto error;
375d5f8dde1SJohn Marino 			      }
376d5f8dde1SJohn Marino 			    /* Can't have MCCSs as an endpoint to a range */
377d5f8dde1SJohn Marino 			    if (range > 0)
378d5f8dde1SJohn Marino 			      {
379d5f8dde1SJohn Marino 				status = REG_ERANGE;
380d5f8dde1SJohn Marino 				goto error;
381d5f8dde1SJohn Marino 			      }
382d5f8dde1SJohn Marino 			    range = -1;
383d5f8dde1SJohn Marino 			    /* Switch into MCCS collection mode (if not
384d5f8dde1SJohn Marino 			     * already there */
385d5f8dde1SJohn Marino #if TRE_DEBUG
386d5f8dde1SJohn Marino 			    if (!collect_MCCS)
387d5f8dde1SJohn Marino 			      {
388d5f8dde1SJohn Marino 				collect_MCCS = 1;
389d5f8dde1SJohn Marino 				DPRINT(("tre_parse_bracket: Detected MCCS\n"));
390d5f8dde1SJohn Marino 			      }
391d5f8dde1SJohn Marino #else /* !TRE_DEBUG */
392d5f8dde1SJohn Marino 			    collect_MCCS = 1;
393d5f8dde1SJohn Marino #endif /* !TRE_DEBUG */
394d5f8dde1SJohn Marino 			    /* Allocate a memory block the first time */
395d5f8dde1SJohn Marino 			    if (!cp)
396d5f8dde1SJohn Marino 			      {
397d5f8dde1SJohn Marino 				if ((col_syms = xmalloc(sizeof(*col_syms) *
398d5f8dde1SJohn Marino 					    (START_COLLATING_SYMBOLS + 2)))
399d5f8dde1SJohn Marino 					    == NULL)
400d5f8dde1SJohn Marino 				  return REG_ESPACE;
401d5f8dde1SJohn Marino 				cp = col_syms + 1;
402d5f8dde1SJohn Marino 				n_col_syms = START_COLLATING_SYMBOLS;
403d5f8dde1SJohn Marino 			      }
404d5f8dde1SJohn Marino 			    /* Enlarge the memory block is more is needed */
405d5f8dde1SJohn Marino 			    if ((cp - col_syms) - 1 >= n_col_syms)
406d5f8dde1SJohn Marino 			      {
407d5f8dde1SJohn Marino 				int i = n_col_syms;
408d5f8dde1SJohn Marino 				tre_collating_symbol *tmp =
409d5f8dde1SJohn Marino 				    xrealloc(col_syms, sizeof(*col_syms) *
410d5f8dde1SJohn Marino 					     ((n_col_syms *= 2) + 2));
411d5f8dde1SJohn Marino 				if (tmp == NULL)
412d5f8dde1SJohn Marino 				  {
413d5f8dde1SJohn Marino 				    xfree(col_syms);
414d5f8dde1SJohn Marino 				    return REG_ESPACE;
415d5f8dde1SJohn Marino 				  }
416d5f8dde1SJohn Marino 				DPRINT(("tre_list_collating_symbols: "
417d5f8dde1SJohn Marino 					"Enlarging col_syms to %d\n",
418d5f8dde1SJohn Marino 					n_col_syms));
419d5f8dde1SJohn Marino 				col_syms = tmp;
420d5f8dde1SJohn Marino 				cp = col_syms + i + 1;
421d5f8dde1SJohn Marino 			      }
422d5f8dde1SJohn Marino 			    cp->start = start;
423d5f8dde1SJohn Marino 			    cp->len = re - start;
424d5f8dde1SJohn Marino 			    cp++;
4255f2eab64SJohn Marino 			    re++;
4265f2eab64SJohn Marino 			    break;
4275f2eab64SJohn Marino 			  }
428d5f8dde1SJohn Marino 		      }
429d5f8dde1SJohn Marino 		  }
430d5f8dde1SJohn Marino 		break;
431d5f8dde1SJohn Marino 	      }
432d5f8dde1SJohn Marino 
433d5f8dde1SJohn Marino 	    case CHAR_EQUAL:
434d5f8dde1SJohn Marino 	    case CHAR_COLON:
435d5f8dde1SJohn Marino 	      {
436d5f8dde1SJohn Marino 		/* Process equivalence and character classes */
437d5f8dde1SJohn Marino 		tre_char_t kind = re[1];
438d5f8dde1SJohn Marino 
439d5f8dde1SJohn Marino 		/* Can't have a class as an endpoint to a range */
440d5f8dde1SJohn Marino 		if (range > 0)
441d5f8dde1SJohn Marino 		  {
442d5f8dde1SJohn Marino 		    status = REG_ERANGE;
443d5f8dde1SJohn Marino 		    goto error;
444d5f8dde1SJohn Marino 		  }
445d5f8dde1SJohn Marino 		if (!collect_MCCS && range == 0)
446d5f8dde1SJohn Marino 		  {
447d5f8dde1SJohn Marino 		    status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
448d5f8dde1SJohn Marino 					  min, &max_i, items);
449d5f8dde1SJohn Marino 		    if (status != REG_OK)
450d5f8dde1SJohn Marino 		      goto error;
451d5f8dde1SJohn Marino 		  }
452d5f8dde1SJohn Marino 		range = -1;
453d5f8dde1SJohn Marino 		re += 2;
454d5f8dde1SJohn Marino 		start = re;
455d5f8dde1SJohn Marino 		for (;; re++)
456d5f8dde1SJohn Marino 		  {
457d5f8dde1SJohn Marino 		    if (re >= re_end)
458d5f8dde1SJohn Marino 		      {
459d5f8dde1SJohn Marino 			status = kind == CHAR_EQUAL ? REG_ECOLLATE : REG_ECTYPE;
460d5f8dde1SJohn Marino 			goto error;
461d5f8dde1SJohn Marino 		      }
462d5f8dde1SJohn Marino 		    if (*re == kind)
463d5f8dde1SJohn Marino 		      {
464d5f8dde1SJohn Marino 			if (re + 1 >= re_end)
465d5f8dde1SJohn Marino 			  {
466d5f8dde1SJohn Marino 			    status = kind == CHAR_EQUAL ? REG_ECOLLATE :
467d5f8dde1SJohn Marino 							  REG_ECTYPE;
468d5f8dde1SJohn Marino 			    goto error;
469d5f8dde1SJohn Marino 			  }
470d5f8dde1SJohn Marino 			/* Found end */
471d5f8dde1SJohn Marino 			if (re[1] == CHAR_RBRACKET)
472d5f8dde1SJohn Marino 			  {
473d5f8dde1SJohn Marino 			    if (re == start)
474d5f8dde1SJohn Marino 			      {
475d5f8dde1SJohn Marino 				/* Empty class name */
476d5f8dde1SJohn Marino 				status = kind == CHAR_EQUAL ? REG_ECOLLATE :
477d5f8dde1SJohn Marino 							      REG_ECTYPE;
478d5f8dde1SJohn Marino 				goto error;
479d5f8dde1SJohn Marino 			      }
480d5f8dde1SJohn Marino 			    /* Process equivalence class */
481d5f8dde1SJohn Marino 			    if (kind == CHAR_EQUAL)
482d5f8dde1SJohn Marino 			      {
483d5f8dde1SJohn Marino 				int equiv;
484d5f8dde1SJohn Marino 
485d5f8dde1SJohn Marino 				DPRINT(("tre_parse_bracket:   equivalence: '%.*"
486d5f8dde1SJohn Marino 					STRF "'\n", REST(start - 2)));
487d5f8dde1SJohn Marino 
488d5f8dde1SJohn Marino 				/* While we find the collation value even for
489d5f8dde1SJohn Marino 				   multi-character collating elements , we
490d5f8dde1SJohn Marino 				   don't (yet) match any collation values
491d5f8dde1SJohn Marino 				   against multi-character sequences.  We'd have
492d5f8dde1SJohn Marino 				   to enumerate those multi-character sequences
493d5f8dde1SJohn Marino 				   and like multi-character collating symbols,
494d5f8dde1SJohn Marino 				   create a union of those sequences with the
495d5f8dde1SJohn Marino 				   rest of the bracket expression.  While
496d5f8dde1SJohn Marino 				   doable, a bracket expression matching
497d5f8dde1SJohn Marino 				   multiple characters, that doesn't explicitly
498d5f8dde1SJohn Marino 				   contain multi-character sequences, might
499d5f8dde1SJohn Marino 				   be unexpected, so we punt for now. */
500d5f8dde1SJohn Marino 				if ((equiv = __collate_equiv_value(ctx->loc,
501d5f8dde1SJohn Marino 					     start, re - start)) <= 0)
502d5f8dde1SJohn Marino 				  {
503d5f8dde1SJohn Marino 				    /* The standard says that if no collating
504d5f8dde1SJohn Marino 				       element if found, we use the collating
505d5f8dde1SJohn Marino 				       symbol itself.  But __collate_equiv_value
506d5f8dde1SJohn Marino 				       doesn't make a distinction between
507d5f8dde1SJohn Marino 				       an element that is in a equvalence
508d5f8dde1SJohn Marino 				       class with others, or is the only member,
509d5f8dde1SJohn Marino 				       so we already know there is no collating
510d5f8dde1SJohn Marino 				       symbol.  (Note that in the case of a
511d5f8dde1SJohn Marino 				       collating element whose collation value
512d5f8dde1SJohn Marino 				       is unique, matching against the
513d5f8dde1SJohn Marino 				       collating element itself, or against
514d5f8dde1SJohn Marino 				       its collation value, is equivalent.) */
515d5f8dde1SJohn Marino #ifdef BSD_COMPATIBILITY
516d5f8dde1SJohn Marino 				    /* Check if the name is in cnames; if so,
517d5f8dde1SJohn Marino 				       use the corresponding code */
518d5f8dde1SJohn Marino 				    c = tre_search_cnames(start, re - start);
519d5f8dde1SJohn Marino 				    if (c != (wchar_t)-1)
520d5f8dde1SJohn Marino 				      {
521d5f8dde1SJohn Marino 					re++;
522d5f8dde1SJohn Marino 					goto process_single_character;
523d5f8dde1SJohn Marino 				      }
524d5f8dde1SJohn Marino #endif /* BSD_COMPATIBILITY */
525d5f8dde1SJohn Marino 				    status = REG_ECOLLATE;
526d5f8dde1SJohn Marino 				    goto error;
527d5f8dde1SJohn Marino 				  }
528d5f8dde1SJohn Marino 				if (!collect_MCCS)
529d5f8dde1SJohn Marino 				  {
530d5f8dde1SJohn Marino 				    status = tre_new_item(ctx->mem,
531d5f8dde1SJohn Marino 					     TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,
532d5f8dde1SJohn Marino 					     equiv, &max_i, items);
533d5f8dde1SJohn Marino 				    if (status != REG_OK)
534d5f8dde1SJohn Marino 				      goto error;
535d5f8dde1SJohn Marino 				  }
536d5f8dde1SJohn Marino 			      }
5375f2eab64SJohn Marino 			    else
5385f2eab64SJohn Marino 			      {
539d5f8dde1SJohn Marino 				/* Process character class */
540d5f8dde1SJohn Marino 				DPRINT(("tre_parse_bracket:  class: '%.*" STRF
541d5f8dde1SJohn Marino 					"'\n", REST(start - 2)));
542d5f8dde1SJohn Marino 				if (!collect_MCCS)
5435f2eab64SJohn Marino 				  {
5445f2eab64SJohn Marino 				    char tmp_str[64];
545d5f8dde1SJohn Marino 				    tre_ctype_t class;
546d5f8dde1SJohn Marino 				    int len = MIN(re - start, 63);
5475f2eab64SJohn Marino #ifdef TRE_WCHAR
5485f2eab64SJohn Marino 				    {
5495f2eab64SJohn Marino 				      tre_char_t tmp_wcs[64];
550d5f8dde1SJohn Marino 				      wcsncpy(tmp_wcs, start, (size_t)len);
5515f2eab64SJohn Marino 				      tmp_wcs[len] = L'\0';
5525f2eab64SJohn Marino #if defined HAVE_WCSRTOMBS
5535f2eab64SJohn Marino 				      {
5545f2eab64SJohn Marino 					mbstate_t state;
5555f2eab64SJohn Marino 					const tre_char_t *src = tmp_wcs;
5565f2eab64SJohn Marino 					memset(&state, '\0', sizeof(state));
557d5f8dde1SJohn Marino 					len = wcsrtombs_l(tmp_str, &src,
558d5f8dde1SJohn Marino 						      sizeof(tmp_str), &state,
559d5f8dde1SJohn Marino 						      ctx->loc);
5605f2eab64SJohn Marino 				      }
5615f2eab64SJohn Marino #elif defined HAVE_WCSTOMBS
5625f2eab64SJohn Marino 				      len = wcstombs(tmp_str, tmp_wcs, 63);
5635f2eab64SJohn Marino #endif /* defined HAVE_WCSTOMBS */
5645f2eab64SJohn Marino 				    }
5655f2eab64SJohn Marino #else /* !TRE_WCHAR */
566d5f8dde1SJohn Marino 				    strncpy(tmp_str, (const char*)start, len);
5675f2eab64SJohn Marino #endif /* !TRE_WCHAR */
5685f2eab64SJohn Marino 				    tmp_str[len] = '\0';
5695f2eab64SJohn Marino 				    DPRINT(("  class name: %s\n", tmp_str));
570d5f8dde1SJohn Marino 				    class = tre_ctype_l(tmp_str, ctx->loc);
5715f2eab64SJohn Marino 				    if (!class)
5725f2eab64SJohn Marino 				      {
5735f2eab64SJohn Marino 					status = REG_ECTYPE;
574d5f8dde1SJohn Marino 					goto error;
5755f2eab64SJohn Marino 				      }
576d5f8dde1SJohn Marino 				    status = tre_new_item(ctx->mem,
577d5f8dde1SJohn Marino 					     TRE_BRACKET_MATCH_TYPE_CLASS,
578d5f8dde1SJohn Marino 					     class, &max_i, items);
579d5f8dde1SJohn Marino 				    if (status != REG_OK)
580d5f8dde1SJohn Marino 				      goto error;
581d5f8dde1SJohn Marino 				  }
582d5f8dde1SJohn Marino 			      }
583d5f8dde1SJohn Marino 			    re++;
584d5f8dde1SJohn Marino 			    break;
585d5f8dde1SJohn Marino 			  }
586d5f8dde1SJohn Marino 		      }
587d5f8dde1SJohn Marino 		  }
588d5f8dde1SJohn Marino 		other++;
589d5f8dde1SJohn Marino 		break;
590d5f8dde1SJohn Marino 	      }
591d5f8dde1SJohn Marino 
592d5f8dde1SJohn Marino 	    default:
593d5f8dde1SJohn Marino 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
594d5f8dde1SJohn Marino 	      c = CHAR_LBRACKET;
595d5f8dde1SJohn Marino 	      goto process_single_character;
596d5f8dde1SJohn Marino 	      break;
597d5f8dde1SJohn Marino 	    }
598d5f8dde1SJohn Marino 	  break;
599d5f8dde1SJohn Marino 
600d5f8dde1SJohn Marino 	case CHAR_RBRACKET:
601d5f8dde1SJohn Marino 	  /* A first right bracket */
602d5f8dde1SJohn Marino 	  if (re == ctx->re)
6035f2eab64SJohn Marino 	    {
6045f2eab64SJohn Marino 	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
605d5f8dde1SJohn Marino 	      min = CHAR_RBRACKET;
606d5f8dde1SJohn Marino 	      range = 0;
607d5f8dde1SJohn Marino 	      other++;
6085f2eab64SJohn Marino 	      break;
6095f2eab64SJohn Marino 	    }
610d5f8dde1SJohn Marino 	  /* Done */
611d5f8dde1SJohn Marino 	  if (collect_MCCS)
612d5f8dde1SJohn Marino 	    {
613d5f8dde1SJohn Marino 	      DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n",
614d5f8dde1SJohn Marino 		      REST(re)));
615d5f8dde1SJohn Marino 	      if (col_syms)
616d5f8dde1SJohn Marino 		{
617d5f8dde1SJohn Marino 		  /* Mark the character following the right bracket.  Set len
618d5f8dde1SJohn Marino 		   * to whether there are other things besides the
619d5f8dde1SJohn Marino 		   * multi-character collating symbols */
620d5f8dde1SJohn Marino 		  col_syms->start = re + 1;
621d5f8dde1SJohn Marino 		  col_syms->len = other;
622d5f8dde1SJohn Marino 		  /* Mark the end of the list */
623d5f8dde1SJohn Marino 		  cp->start = NULL;
624d5f8dde1SJohn Marino 		}
625d5f8dde1SJohn Marino 	      *result = col_syms;
626d5f8dde1SJohn Marino 	      return REG_OK;
627d5f8dde1SJohn Marino 	    }
628d5f8dde1SJohn Marino 	  /* range > 0 is not possible, since we did a lookahead after the
629d5f8dde1SJohn Marino 	   * hyphen */
630d5f8dde1SJohn Marino 	  if (range == 0)
631d5f8dde1SJohn Marino 	    {
632d5f8dde1SJohn Marino 	      status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
633d5f8dde1SJohn Marino 				    min, &max_i, items);
6345f2eab64SJohn Marino 	      if (status != REG_OK)
635d5f8dde1SJohn Marino 		goto error;
6365f2eab64SJohn Marino 	    }
637d5f8dde1SJohn Marino 	  DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re)));
6385f2eab64SJohn Marino 	  *items_size = max_i;
639d5f8dde1SJohn Marino 	  ctx->re = re + 1;
640d5f8dde1SJohn Marino 	  return REG_OK;
641d5f8dde1SJohn Marino 
642d5f8dde1SJohn Marino 	default:
643d5f8dde1SJohn Marino 	  DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
644d5f8dde1SJohn Marino 	  c = *re;
645d5f8dde1SJohn Marino process_single_character:
646d5f8dde1SJohn Marino 	  /* Process single character */
647d5f8dde1SJohn Marino 	  if (range > 0)
648d5f8dde1SJohn Marino 	    {
649d5f8dde1SJohn Marino 	      int mine, maxe;
650d5f8dde1SJohn Marino 
651d5f8dde1SJohn Marino process_end_range:
652d5f8dde1SJohn Marino 	      /* Get collation equivalence values */
653d5f8dde1SJohn Marino 	      mine = __collate_equiv_value(ctx->loc, &min, 1);
654d5f8dde1SJohn Marino 	      maxe = __collate_equiv_value(ctx->loc, &c, 1);
655d5f8dde1SJohn Marino 	      if (maxe < mine)
656d5f8dde1SJohn Marino 		{
657d5f8dde1SJohn Marino 		  status = REG_ERANGE;
658d5f8dde1SJohn Marino 		  goto error;
659d5f8dde1SJohn Marino 		}
660d5f8dde1SJohn Marino 	      if (!collect_MCCS)
661d5f8dde1SJohn Marino 		{
662d5f8dde1SJohn Marino 		  status = tre_new_item(ctx->mem,
663d5f8dde1SJohn Marino 					TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,
664d5f8dde1SJohn Marino 					mine, &max_i, items);
665d5f8dde1SJohn Marino 		  if (status != REG_OK)
666d5f8dde1SJohn Marino 		    goto error;
667d5f8dde1SJohn Marino 		  status = tre_new_item(ctx->mem,
668d5f8dde1SJohn Marino 					TRE_BRACKET_MATCH_TYPE_RANGE_END,
669d5f8dde1SJohn Marino 					maxe, &max_i, items);
670d5f8dde1SJohn Marino 		  if (status != REG_OK)
671d5f8dde1SJohn Marino 		    goto error;
672d5f8dde1SJohn Marino 		}
673d5f8dde1SJohn Marino 	      range = -1;
674d5f8dde1SJohn Marino 	    }
675d5f8dde1SJohn Marino 	  else
676d5f8dde1SJohn Marino 	    {
677d5f8dde1SJohn Marino process_begin_range:
678d5f8dde1SJohn Marino 	      if (!collect_MCCS)
679d5f8dde1SJohn Marino 		{
680d5f8dde1SJohn Marino 		  if (range == 0)
681d5f8dde1SJohn Marino 		    {
682d5f8dde1SJohn Marino 		      status = tre_new_item(ctx->mem,
683d5f8dde1SJohn Marino 					    TRE_BRACKET_MATCH_TYPE_CHAR,
684d5f8dde1SJohn Marino 					    min, &max_i, items);
685d5f8dde1SJohn Marino 		      if (status != REG_OK)
686d5f8dde1SJohn Marino 			goto error;
687d5f8dde1SJohn Marino 		    }
688d5f8dde1SJohn Marino 		  min = c;
689d5f8dde1SJohn Marino 		}
690d5f8dde1SJohn Marino 	      range = 0;
691d5f8dde1SJohn Marino 	    }
692d5f8dde1SJohn Marino 	  other++;
693d5f8dde1SJohn Marino 	  break;
694d5f8dde1SJohn Marino 	}
695d5f8dde1SJohn Marino     }
696d5f8dde1SJohn Marino   status = REG_EBRACK;
697d5f8dde1SJohn Marino error:
698d5f8dde1SJohn Marino   DPRINT(("tre_parse_bracket:	error: '%.*" STRF "', status=%d\n",
699d5f8dde1SJohn Marino 	  REST(re), status));
700d5f8dde1SJohn Marino   if (col_syms)
701d5f8dde1SJohn Marino     xfree(col_syms);
7025f2eab64SJohn Marino   return status;
7035f2eab64SJohn Marino }
7045f2eab64SJohn Marino 
705d5f8dde1SJohn Marino #ifdef TRE_DEBUG
706d5f8dde1SJohn Marino static const char *bracket_match_type_str[] = {
707d5f8dde1SJohn Marino   "unused",
708d5f8dde1SJohn Marino   "char",
709d5f8dde1SJohn Marino   "range begin",
710d5f8dde1SJohn Marino   "range end",
711d5f8dde1SJohn Marino   "class",
712d5f8dde1SJohn Marino   "equivalence value",
713d5f8dde1SJohn Marino };
714d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
715d5f8dde1SJohn Marino 
7165f2eab64SJohn Marino static reg_errcode_t
tre_parse_bracket(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)7175f2eab64SJohn Marino tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
7185f2eab64SJohn Marino {
719d5f8dde1SJohn Marino   tre_ast_node_t *node;
7205f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
721d5f8dde1SJohn Marino   tre_bracket_match_list_t *items;
722d5f8dde1SJohn Marino   int max_i = 32;
723d5f8dde1SJohn Marino   tre_collating_symbol *col_syms = NULL;
724d5f8dde1SJohn Marino 
725d5f8dde1SJohn Marino   /* Handle special cases [[:<:]] and [[:>:]] */
726d5f8dde1SJohn Marino   if (ctx->re_end - ctx->re >= 6 && ctx->re[0] == CHAR_LBRACKET
727d5f8dde1SJohn Marino       && ctx->re[1] == CHAR_COLON && (ctx->re[2] == L'<' || ctx->re[2] == L'>')
728d5f8dde1SJohn Marino       && ctx->re[3] == CHAR_COLON && ctx->re[4] == CHAR_RBRACKET
729d5f8dde1SJohn Marino       && ctx->re[5] == CHAR_RBRACKET)
730d5f8dde1SJohn Marino     {
731d5f8dde1SJohn Marino       *result = tre_ast_new_literal(ctx->mem, ASSERTION,
732d5f8dde1SJohn Marino 		      (ctx->re[2] == L'<') ? ASSERT_AT_BOW : ASSERT_AT_EOW,
733d5f8dde1SJohn Marino 		      -1);
734d5f8dde1SJohn Marino       DPRINT(("tre_parse_bracket: special case %s\n", (ctx->re[2] == L'<') ?
735d5f8dde1SJohn Marino 	      "[[:<:]]" : "[[:>:]]"));
736d5f8dde1SJohn Marino       ctx->re += 6;
737d5f8dde1SJohn Marino       return *result ? REG_OK : REG_ESPACE;
738d5f8dde1SJohn Marino     }
7395f2eab64SJohn Marino 
7405f2eab64SJohn Marino   /* Start off with an array of `max_i' elements. */
741d5f8dde1SJohn Marino   items = xcalloc(1, SIZEOF_BRACKET_MATCH_LIST_N(max_i));
7425f2eab64SJohn Marino   if (items == NULL)
7435f2eab64SJohn Marino     return REG_ESPACE;
7445f2eab64SJohn Marino 
7455f2eab64SJohn Marino   if (*ctx->re == CHAR_CARET)
7465f2eab64SJohn Marino     {
7475f2eab64SJohn Marino       DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
748d5f8dde1SJohn Marino       items->flags |= TRE_BRACKET_MATCH_FLAG_NEGATE;
7495f2eab64SJohn Marino       ctx->re++;
7505f2eab64SJohn Marino     }
7515f2eab64SJohn Marino 
752d5f8dde1SJohn Marino   status = tre_parse_bracket_items(ctx, &items, &max_i, &col_syms);
7535f2eab64SJohn Marino 
7545f2eab64SJohn Marino   if (status != REG_OK)
7555f2eab64SJohn Marino     goto parse_bracket_done;
7565f2eab64SJohn Marino 
757d5f8dde1SJohn Marino   /* If there are collating symbols, split off the multi-character ones
758d5f8dde1SJohn Marino    * into a union of the bracket expression (without the collating symbols)
759d5f8dde1SJohn Marino    * and the multiple-character sequences.  We create an equivalent input
760d5f8dde1SJohn Marino    * string and run tre_parse() recursively */
761d5f8dde1SJohn Marino   if (col_syms)
762d5f8dde1SJohn Marino     {
763d5f8dde1SJohn Marino       tre_char_t *str, *sp;
764d5f8dde1SJohn Marino       tre_collating_symbol *cp;
765d5f8dde1SJohn Marino       tre_parse_ctx_t subctx;
7665f2eab64SJohn Marino 
767d5f8dde1SJohn Marino       /* Allocate a new string.  We start with the size of the original
768d5f8dde1SJohn Marino        * bracket expression (minus 1) and add 2 (for a leading "[" and
769d5f8dde1SJohn Marino        * a trailing nil; don't need a "^", since it is illegal to have
770d5f8dde1SJohn Marino        * inverted MCCSs).  Since a multi-character collating symbols
771d5f8dde1SJohn Marino        * will be converted from "[.xx.]" to "|xx" (n+4 to n+1), we don't
772d5f8dde1SJohn Marino        * need to worry about the new string getting too long. */
773d5f8dde1SJohn Marino       xfree(items);
774d5f8dde1SJohn Marino       str = xmalloc(sizeof(*str) * ((col_syms->start - ctx->re) + 2));
775d5f8dde1SJohn Marino       if (str == NULL)
7765f2eab64SJohn Marino 	{
777d5f8dde1SJohn Marino 	  xfree(col_syms);
778d5f8dde1SJohn Marino 	  return REG_ESPACE;
779d5f8dde1SJohn Marino 	}
780d5f8dde1SJohn Marino       sp = str;
781d5f8dde1SJohn Marino       if (col_syms->len > 0)
782d5f8dde1SJohn Marino 	{
783d5f8dde1SJohn Marino 	  /* There are other items in the bracket expression besides the
784d5f8dde1SJohn Marino 	   * multi-character collating symbols, so create a new bracket
785d5f8dde1SJohn Marino 	   * expression with only those other itmes. */
786d5f8dde1SJohn Marino 	  const tre_char_t *re;
787d5f8dde1SJohn Marino 	  ptrdiff_t i;
7885f2eab64SJohn Marino 
789d5f8dde1SJohn Marino 	  *sp++ = '[';
790d5f8dde1SJohn Marino 	  re = ctx->re;
791d5f8dde1SJohn Marino 	  for (cp = col_syms + 1; cp->start; cp++)
792d5f8dde1SJohn Marino 	    {
793d5f8dde1SJohn Marino 	      /* The "- 2" is to account for the "[." */
794d5f8dde1SJohn Marino 	      if ((i = ((cp->start - re) - 2)) > 0)
795d5f8dde1SJohn Marino 		{
796d5f8dde1SJohn Marino 		  memcpy(sp, re, sizeof(*sp) * i);
797d5f8dde1SJohn Marino 		  sp += i;
798d5f8dde1SJohn Marino 		}
799d5f8dde1SJohn Marino 	      /* The "+ 2" is to account for the ".]" */
800d5f8dde1SJohn Marino 	      re = cp->start + cp->len + 2;
801d5f8dde1SJohn Marino 	    }
802d5f8dde1SJohn Marino 	    i = col_syms->start - re; /* Includes the trailing right bracket */
803d5f8dde1SJohn Marino 	    memcpy(sp, re, sizeof(*sp) * i);
804d5f8dde1SJohn Marino 	    sp += i;
805d5f8dde1SJohn Marino 	    *sp++ = '|';
806d5f8dde1SJohn Marino 	}
807d5f8dde1SJohn Marino       for (cp = col_syms + 1; cp->start; cp++)
808d5f8dde1SJohn Marino 	{
809d5f8dde1SJohn Marino 	  memcpy(sp, cp->start, sizeof(*sp) * cp->len);
810d5f8dde1SJohn Marino 	  sp += cp->len;
811d5f8dde1SJohn Marino 	  if (cp[1].start)
812d5f8dde1SJohn Marino 	    *sp++ = '|';
813d5f8dde1SJohn Marino 	}
814d5f8dde1SJohn Marino       *sp = 0;
815d5f8dde1SJohn Marino       DPRINT(("tre_parse_bracket: Reparsing bracket expression with '%ls'\n",
816d5f8dde1SJohn Marino 	      str));
8175f2eab64SJohn Marino 
818d5f8dde1SJohn Marino       memcpy(&subctx, ctx, sizeof(subctx));
819d5f8dde1SJohn Marino       subctx.re = str;
820d5f8dde1SJohn Marino       subctx.len = sp - str;
821d5f8dde1SJohn Marino       subctx.nofirstsub = 1;
822d5f8dde1SJohn Marino       subctx.cflags |= REG_EXTENDED; /* Force extended mode for parsing */
823d5f8dde1SJohn Marino       status = tre_parse(&subctx);
824d5f8dde1SJohn Marino       xfree(str);
825d5f8dde1SJohn Marino       if (status != REG_OK)
8265f2eab64SJohn Marino 	{
827d5f8dde1SJohn Marino 	  xfree(col_syms);
828d5f8dde1SJohn Marino 	  return status;
8295f2eab64SJohn Marino 	}
830d5f8dde1SJohn Marino       ctx->re = col_syms->start;
831d5f8dde1SJohn Marino       ctx->position = subctx.position;
832d5f8dde1SJohn Marino       xfree(col_syms);
833d5f8dde1SJohn Marino       *result = subctx.result;
834d5f8dde1SJohn Marino       DPRINT(("tre_parse_bracket: Returning to original string\n"));
835d5f8dde1SJohn Marino       return REG_OK;
8365f2eab64SJohn Marino     }
8375f2eab64SJohn Marino 
838d5f8dde1SJohn Marino   DPRINT(("tre_parse_bracket: creating bracket expression literal\n"));
839d5f8dde1SJohn Marino   node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
8405f2eab64SJohn Marino   if (node == NULL)
8415f2eab64SJohn Marino     {
8425f2eab64SJohn Marino       status = REG_ESPACE;
8435f2eab64SJohn Marino       goto parse_bracket_done;
8445f2eab64SJohn Marino     }
8455f2eab64SJohn Marino   else
8465f2eab64SJohn Marino     {
847d5f8dde1SJohn Marino       tre_literal_t *l = node->obj;
848d5f8dde1SJohn Marino       l->u.bracket_match_list = tre_mem_alloc(ctx->mem,
849d5f8dde1SJohn Marino 					 SIZEOF_BRACKET_MATCH_LIST(items));
850d5f8dde1SJohn Marino       if (l->u.bracket_match_list == NULL)
851d5f8dde1SJohn Marino 	{
8525f2eab64SJohn Marino 	  status = REG_ESPACE;
8535f2eab64SJohn Marino 	  goto parse_bracket_done;
854d5f8dde1SJohn Marino 	}
855d5f8dde1SJohn Marino       memcpy(l->u.bracket_match_list, items, SIZEOF_BRACKET_MATCH_LIST(items));
856d5f8dde1SJohn Marino     }
8575f2eab64SJohn Marino 
8585f2eab64SJohn Marino #ifdef TRE_DEBUG
859d5f8dde1SJohn Marino   {
860d5f8dde1SJohn Marino     int i;
861d5f8dde1SJohn Marino     tre_bracket_match_t *b;
862d5f8dde1SJohn Marino     DPRINT(("tre_parse_bracket: %d bracket match items, flags 0x%x\n",
863d5f8dde1SJohn Marino 	    items->num_bracket_matches, items->flags));
864d5f8dde1SJohn Marino     for (i = 0, b = items->bracket_matches;
865d5f8dde1SJohn Marino 	 i < items->num_bracket_matches; i++, b++)
866d5f8dde1SJohn Marino       {
867d5f8dde1SJohn Marino 	DPRINT(("   %d: %s %d\n", i, bracket_match_type_str[b->type],
868d5f8dde1SJohn Marino 		b->value));
869d5f8dde1SJohn Marino       }
870d5f8dde1SJohn Marino   }
8715f2eab64SJohn Marino #endif /* TRE_DEBUG */
8725f2eab64SJohn Marino 
8735f2eab64SJohn Marino  parse_bracket_done:
8745f2eab64SJohn Marino   xfree(items);
8755f2eab64SJohn Marino   ctx->position++;
8765f2eab64SJohn Marino   *result = node;
8775f2eab64SJohn Marino   return status;
8785f2eab64SJohn Marino }
8795f2eab64SJohn Marino 
8805f2eab64SJohn Marino 
8815f2eab64SJohn Marino /* Parses a positive decimal integer.  Returns -1 if the string does not
8825f2eab64SJohn Marino    contain a valid number. */
8835f2eab64SJohn Marino static int
tre_parse_int(const tre_char_t ** regex,const tre_char_t * regex_end)8845f2eab64SJohn Marino tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
8855f2eab64SJohn Marino {
8865f2eab64SJohn Marino   int num = -1;
8875f2eab64SJohn Marino   const tre_char_t *r = *regex;
8885f2eab64SJohn Marino   while (r < regex_end && *r >= L'0' && *r <= L'9')
8895f2eab64SJohn Marino     {
8905f2eab64SJohn Marino       if (num < 0)
8915f2eab64SJohn Marino 	num = 0;
8925f2eab64SJohn Marino       num = num * 10 + *r - L'0';
8935f2eab64SJohn Marino       r++;
8945f2eab64SJohn Marino     }
8955f2eab64SJohn Marino   *regex = r;
8965f2eab64SJohn Marino   return num;
8975f2eab64SJohn Marino }
8985f2eab64SJohn Marino 
8995f2eab64SJohn Marino 
9005f2eab64SJohn Marino static reg_errcode_t
tre_parse_bound(tre_parse_ctx_t * ctx,tre_ast_node_t ** result)9015f2eab64SJohn Marino tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
9025f2eab64SJohn Marino {
903d5f8dde1SJohn Marino   int min, max;
904d5f8dde1SJohn Marino #ifdef TRE_APPROX
905d5f8dde1SJohn Marino   int i;
9065f2eab64SJohn Marino   int cost_ins, cost_del, cost_subst, cost_max;
9075f2eab64SJohn Marino   int limit_ins, limit_del, limit_subst, limit_err;
9085f2eab64SJohn Marino   const tre_char_t *start;
909d5f8dde1SJohn Marino #endif /* TRE_APPROX */
910d5f8dde1SJohn Marino   const tre_char_t *r = ctx->re;
9115f2eab64SJohn Marino   int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
912d5f8dde1SJohn Marino #ifdef TRE_APPROX
9135f2eab64SJohn Marino   int approx = 0;
9145f2eab64SJohn Marino   int costs_set = 0;
9155f2eab64SJohn Marino   int counts_set = 0;
9165f2eab64SJohn Marino 
9175f2eab64SJohn Marino   cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
9185f2eab64SJohn Marino   limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
919d5f8dde1SJohn Marino #endif /* TRE_APPROX */
9205f2eab64SJohn Marino 
9215f2eab64SJohn Marino   /* Parse number (minimum repetition count). */
9225f2eab64SJohn Marino   min = -1;
923d5f8dde1SJohn Marino   if (r >= ctx->re_end)
924d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
925d5f8dde1SJohn Marino     return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_EBRACE;
926d5f8dde1SJohn Marino #else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
927d5f8dde1SJohn Marino     return REG_EBRACE;
928d5f8dde1SJohn Marino #endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
929d5f8dde1SJohn Marino   if (*r >= L'0' && *r <= L'9') {
9305f2eab64SJohn Marino     DPRINT(("tre_parse:	  min count: '%.*" STRF "'\n", REST(r)));
9315f2eab64SJohn Marino     min = tre_parse_int(&r, ctx->re_end);
9325f2eab64SJohn Marino   }
933d5f8dde1SJohn Marino #ifndef TRE_APPROX
934d5f8dde1SJohn Marino   else
935d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
936d5f8dde1SJohn Marino       /* For ERE, return REG_NOMATCH to signal that the lbrace should
937d5f8dde1SJohn Marino          be treated as a literal */
938d5f8dde1SJohn Marino       return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_BADBR;
939d5f8dde1SJohn Marino #else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
940d5f8dde1SJohn Marino       return REG_BADBR;
941d5f8dde1SJohn Marino #endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
942d5f8dde1SJohn Marino #endif /* !TRE_APPROX */
9435f2eab64SJohn Marino 
9445f2eab64SJohn Marino   /* Parse comma and second number (maximum repetition count). */
9455f2eab64SJohn Marino   max = min;
9465f2eab64SJohn Marino   if (r < ctx->re_end && *r == CHAR_COMMA)
9475f2eab64SJohn Marino     {
9485f2eab64SJohn Marino       r++;
9495f2eab64SJohn Marino       DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
9505f2eab64SJohn Marino       max = tre_parse_int(&r, ctx->re_end);
9515f2eab64SJohn Marino     }
9525f2eab64SJohn Marino 
9535f2eab64SJohn Marino   /* Check that the repeat counts are sane. */
954d5f8dde1SJohn Marino   if ((max >= 0 && min > max) || min > RE_DUP_MAX || max > RE_DUP_MAX)
9555f2eab64SJohn Marino     return REG_BADBR;
9565f2eab64SJohn Marino 
9575f2eab64SJohn Marino 
958d5f8dde1SJohn Marino #ifdef TRE_APPROX
9595f2eab64SJohn Marino   /*
9605f2eab64SJohn Marino    '{'
9615f2eab64SJohn Marino      optionally followed immediately by a number == minimum repcount
9625f2eab64SJohn Marino      optionally followed by , then a number == maximum repcount
9635f2eab64SJohn Marino       + then a number == maximum insertion count
9645f2eab64SJohn Marino       - then a number == maximum deletion count
9655f2eab64SJohn Marino       # then a number == maximum substitution count
9665f2eab64SJohn Marino       ~ then a number == maximum number of errors
9675f2eab64SJohn Marino       Any of +, -, # or ~ without followed by a number means that
9685f2eab64SJohn Marino       the maximum count/number of errors is infinite.
9695f2eab64SJohn Marino 
9705f2eab64SJohn Marino       An equation of the form
9715f2eab64SJohn Marino 	Xi + Yd + Zs < C
9725f2eab64SJohn Marino       can be specified to set costs and the cost limit to a value
9735f2eab64SJohn Marino       different from the default value:
9745f2eab64SJohn Marino 	- X is the cost of an insertion
9755f2eab64SJohn Marino 	- Y is the cost of a deletion
9765f2eab64SJohn Marino 	- Z is the cost of a substitution
9775f2eab64SJohn Marino 	- C is the maximum cost
9785f2eab64SJohn Marino 
9795f2eab64SJohn Marino       If no count limit or cost is set for an operation, the operation
9805f2eab64SJohn Marino       is not allowed at all.
9815f2eab64SJohn Marino   */
9825f2eab64SJohn Marino 
9835f2eab64SJohn Marino 
9845f2eab64SJohn Marino   do {
9855f2eab64SJohn Marino     int done;
9865f2eab64SJohn Marino     start = r;
9875f2eab64SJohn Marino 
9885f2eab64SJohn Marino     /* Parse count limit settings */
9895f2eab64SJohn Marino     done = 0;
9905f2eab64SJohn Marino     if (!counts_set)
9915f2eab64SJohn Marino       while (r + 1 < ctx->re_end && !done)
9925f2eab64SJohn Marino 	{
9935f2eab64SJohn Marino 	  switch (*r)
9945f2eab64SJohn Marino 	    {
9955f2eab64SJohn Marino 	    case CHAR_PLUS:  /* Insert limit */
9965f2eab64SJohn Marino 	      DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
9975f2eab64SJohn Marino 	      r++;
9985f2eab64SJohn Marino 	      limit_ins = tre_parse_int(&r, ctx->re_end);
9995f2eab64SJohn Marino 	      if (limit_ins < 0)
10005f2eab64SJohn Marino 		limit_ins = INT_MAX;
10015f2eab64SJohn Marino 	      counts_set = 1;
10025f2eab64SJohn Marino 	      break;
10035f2eab64SJohn Marino 	    case CHAR_MINUS: /* Delete limit */
10045f2eab64SJohn Marino 	      DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
10055f2eab64SJohn Marino 	      r++;
10065f2eab64SJohn Marino 	      limit_del = tre_parse_int(&r, ctx->re_end);
10075f2eab64SJohn Marino 	      if (limit_del < 0)
10085f2eab64SJohn Marino 		limit_del = INT_MAX;
10095f2eab64SJohn Marino 	      counts_set = 1;
10105f2eab64SJohn Marino 	      break;
10115f2eab64SJohn Marino 	    case CHAR_HASH:  /* Substitute limit */
10125f2eab64SJohn Marino 	      DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
10135f2eab64SJohn Marino 	      r++;
10145f2eab64SJohn Marino 	      limit_subst = tre_parse_int(&r, ctx->re_end);
10155f2eab64SJohn Marino 	      if (limit_subst < 0)
10165f2eab64SJohn Marino 		limit_subst = INT_MAX;
10175f2eab64SJohn Marino 	      counts_set = 1;
10185f2eab64SJohn Marino 	      break;
10195f2eab64SJohn Marino 	    case CHAR_TILDE: /* Maximum number of changes */
10205f2eab64SJohn Marino 	      DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
10215f2eab64SJohn Marino 	      r++;
10225f2eab64SJohn Marino 	      limit_err = tre_parse_int(&r, ctx->re_end);
10235f2eab64SJohn Marino 	      if (limit_err < 0)
10245f2eab64SJohn Marino 		limit_err = INT_MAX;
10255f2eab64SJohn Marino 	      approx = 1;
10265f2eab64SJohn Marino 	      break;
10275f2eab64SJohn Marino 	    case CHAR_COMMA:
10285f2eab64SJohn Marino 	      r++;
10295f2eab64SJohn Marino 	      break;
10305f2eab64SJohn Marino 	    case L' ':
10315f2eab64SJohn Marino 	      r++;
10325f2eab64SJohn Marino 	      break;
10335f2eab64SJohn Marino 	    case L'}':
10345f2eab64SJohn Marino 	      done = 1;
10355f2eab64SJohn Marino 	      break;
10365f2eab64SJohn Marino 	    default:
10375f2eab64SJohn Marino 	      done = 1;
10385f2eab64SJohn Marino 	      break;
10395f2eab64SJohn Marino 	    }
10405f2eab64SJohn Marino 	}
10415f2eab64SJohn Marino 
10425f2eab64SJohn Marino     /* Parse cost restriction equation. */
10435f2eab64SJohn Marino     done = 0;
10445f2eab64SJohn Marino     if (!costs_set)
10455f2eab64SJohn Marino       while (r + 1 < ctx->re_end && !done)
10465f2eab64SJohn Marino 	{
10475f2eab64SJohn Marino 	  switch (*r)
10485f2eab64SJohn Marino 	    {
10495f2eab64SJohn Marino 	    case CHAR_PLUS:
10505f2eab64SJohn Marino 	    case L' ':
10515f2eab64SJohn Marino 	      r++;
10525f2eab64SJohn Marino 	      break;
10535f2eab64SJohn Marino 	    case L'<':
10545f2eab64SJohn Marino 	      DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
10555f2eab64SJohn Marino 	      r++;
10565f2eab64SJohn Marino 	      while (*r == L' ')
10575f2eab64SJohn Marino 		r++;
10585f2eab64SJohn Marino 	      cost_max = tre_parse_int(&r, ctx->re_end);
10595f2eab64SJohn Marino 	      if (cost_max < 0)
10605f2eab64SJohn Marino 		cost_max = INT_MAX;
10615f2eab64SJohn Marino 	      else
10625f2eab64SJohn Marino 		cost_max--;
10635f2eab64SJohn Marino 	      approx = 1;
10645f2eab64SJohn Marino 	      break;
10655f2eab64SJohn Marino 	    case CHAR_COMMA:
10665f2eab64SJohn Marino 	      r++;
10675f2eab64SJohn Marino 	      done = 1;
10685f2eab64SJohn Marino 	      break;
10695f2eab64SJohn Marino 	    default:
10705f2eab64SJohn Marino 	      if (*r >= L'0' && *r <= L'9')
10715f2eab64SJohn Marino 		{
10725f2eab64SJohn Marino #ifdef TRE_DEBUG
10735f2eab64SJohn Marino 		  const tre_char_t *sr = r;
10745f2eab64SJohn Marino #endif /* TRE_DEBUG */
10755f2eab64SJohn Marino 		  int cost = tre_parse_int(&r, ctx->re_end);
10765f2eab64SJohn Marino 		  /* XXX - make sure r is not past end. */
10775f2eab64SJohn Marino 		  switch (*r)
10785f2eab64SJohn Marino 		    {
10795f2eab64SJohn Marino 		    case L'i':	/* Insert cost */
10805f2eab64SJohn Marino 		      DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
10815f2eab64SJohn Marino 			      REST(sr)));
10825f2eab64SJohn Marino 		      r++;
10835f2eab64SJohn Marino 		      cost_ins = cost;
10845f2eab64SJohn Marino 		      costs_set = 1;
10855f2eab64SJohn Marino 		      break;
10865f2eab64SJohn Marino 		    case L'd':	/* Delete cost */
10875f2eab64SJohn Marino 		      DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
10885f2eab64SJohn Marino 			      REST(sr)));
10895f2eab64SJohn Marino 		      r++;
10905f2eab64SJohn Marino 		      cost_del = cost;
10915f2eab64SJohn Marino 		      costs_set = 1;
10925f2eab64SJohn Marino 		      break;
10935f2eab64SJohn Marino 		    case L's':	/* Substitute cost */
10945f2eab64SJohn Marino 		      DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
10955f2eab64SJohn Marino 			      REST(sr)));
10965f2eab64SJohn Marino 		      r++;
10975f2eab64SJohn Marino 		      cost_subst = cost;
10985f2eab64SJohn Marino 		      costs_set = 1;
10995f2eab64SJohn Marino 		      break;
11005f2eab64SJohn Marino 		    default:
11015f2eab64SJohn Marino 		      return REG_BADBR;
11025f2eab64SJohn Marino 		    }
11035f2eab64SJohn Marino 		}
11045f2eab64SJohn Marino 	      else
11055f2eab64SJohn Marino 		{
11065f2eab64SJohn Marino 		  done = 1;
11075f2eab64SJohn Marino 		  break;
11085f2eab64SJohn Marino 		}
11095f2eab64SJohn Marino 	    }
11105f2eab64SJohn Marino 	}
11115f2eab64SJohn Marino   } while (start != r);
1112d5f8dde1SJohn Marino #endif /* TRE_APPROX */
11135f2eab64SJohn Marino 
1114d5f8dde1SJohn Marino   /*{*//* Missing }. */
11155f2eab64SJohn Marino   if (r >= ctx->re_end)
11165f2eab64SJohn Marino     return REG_EBRACE;
11175f2eab64SJohn Marino 
11185f2eab64SJohn Marino   /* Empty contents of {}. */
11195f2eab64SJohn Marino   if (r == ctx->re)
11205f2eab64SJohn Marino     return REG_BADBR;
11215f2eab64SJohn Marino 
11225f2eab64SJohn Marino   /* Parse the ending '}' or '\}'.*/
11235f2eab64SJohn Marino   if (ctx->cflags & REG_EXTENDED)
11245f2eab64SJohn Marino     {
11255f2eab64SJohn Marino       if (r >= ctx->re_end || *r != CHAR_RBRACE)
11265f2eab64SJohn Marino 	return REG_BADBR;
11275f2eab64SJohn Marino       r++;
1128d5f8dde1SJohn Marino       /* Parse trailing '?' marking minimal repetition. */
1129d5f8dde1SJohn Marino       if (r < ctx->re_end)
1130d5f8dde1SJohn Marino 	{
1131d5f8dde1SJohn Marino 	  if (*r == CHAR_QUESTIONMARK)
1132d5f8dde1SJohn Marino 	    {
1133d5f8dde1SJohn Marino 	      /* Process the question mark only in enhanced mode.
1134d5f8dde1SJohn Marino 		 Otherwise, the question mark is an error in ERE
1135d5f8dde1SJohn Marino 		 or a literal in BRE */
1136d5f8dde1SJohn Marino 	      if (ctx->cflags & REG_ENHANCED)
1137d5f8dde1SJohn Marino 		{
1138d5f8dde1SJohn Marino 		  minimal = !(ctx->cflags & REG_UNGREEDY);
1139d5f8dde1SJohn Marino 		  r++;
1140d5f8dde1SJohn Marino 		}
1141d5f8dde1SJohn Marino 	      else return REG_BADRPT;
1142d5f8dde1SJohn Marino 	    }
1143d5f8dde1SJohn Marino 	  else if (*r == CHAR_STAR || *r == CHAR_PLUS)
1144d5f8dde1SJohn Marino 	    {
1145d5f8dde1SJohn Marino 	      /* These are reserved for future extensions. */
1146d5f8dde1SJohn Marino 	      return REG_BADRPT;
1147d5f8dde1SJohn Marino 	    }
1148d5f8dde1SJohn Marino 	}
11495f2eab64SJohn Marino     }
11505f2eab64SJohn Marino   else
11515f2eab64SJohn Marino     {
11525f2eab64SJohn Marino       if (r + 1 >= ctx->re_end
11535f2eab64SJohn Marino 	  || *r != CHAR_BACKSLASH
11545f2eab64SJohn Marino 	  || *(r + 1) != CHAR_RBRACE)
11555f2eab64SJohn Marino 	return REG_BADBR;
11565f2eab64SJohn Marino       r += 2;
1157d5f8dde1SJohn Marino       if (r < ctx->re_end && *r == CHAR_STAR)
11585f2eab64SJohn Marino 	{
1159d5f8dde1SJohn Marino 	  /* This is reserved for future extensions. */
11605f2eab64SJohn Marino 	  return REG_BADRPT;
11615f2eab64SJohn Marino 	}
11625f2eab64SJohn Marino     }
11635f2eab64SJohn Marino 
1164d5f8dde1SJohn Marino   if (minimal)
1165d5f8dde1SJohn Marino     ctx->num_reorder_tags++;
1166d5f8dde1SJohn Marino 
1167d5f8dde1SJohn Marino   if (!result) goto parse_bound_exit;
11685f2eab64SJohn Marino   /* Create the AST node(s). */
1169d5f8dde1SJohn Marino   /* Originally, if min == 0 && max == 0, we immediately replace the whole
1170d5f8dde1SJohn Marino      iteration with EMPTY.  This unfortunately drops any submatches, and
1171d5f8dde1SJohn Marino      messes up setting the pmatch values (we can get tags of -1, and
1172d5f8dde1SJohn Marino      tag values in the billions).  So we leave it and process this case as
1173d5f8dde1SJohn Marino      usual, and wait until tre_expand_ast() to replace with EMPTY */
1174d5f8dde1SJohn Marino #ifdef TRE_APPROX
11755f2eab64SJohn Marino   if (min < 0 && max < 0)
11765f2eab64SJohn Marino     /* Only approximate parameters set, no repetitions. */
11775f2eab64SJohn Marino     min = max = 1;
1178d5f8dde1SJohn Marino #endif /* TRE_APPROX */
11795f2eab64SJohn Marino 
11805f2eab64SJohn Marino   *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
11815f2eab64SJohn Marino   if (!*result)
11825f2eab64SJohn Marino     return REG_ESPACE;
11835f2eab64SJohn Marino 
1184d5f8dde1SJohn Marino #ifdef TRE_APPROX
11855f2eab64SJohn Marino   /* If approximate matching parameters are set, add them to the
11865f2eab64SJohn Marino      iteration node. */
11875f2eab64SJohn Marino   if (approx || costs_set || counts_set)
11885f2eab64SJohn Marino     {
11895f2eab64SJohn Marino       int *params;
11905f2eab64SJohn Marino       tre_iteration_t *iter = (*result)->obj;
11915f2eab64SJohn Marino 
11925f2eab64SJohn Marino       if (costs_set || counts_set)
11935f2eab64SJohn Marino 	{
11945f2eab64SJohn Marino 	  if (limit_ins == TRE_PARAM_UNSET)
11955f2eab64SJohn Marino 	    {
11965f2eab64SJohn Marino 	      if (cost_ins == TRE_PARAM_UNSET)
11975f2eab64SJohn Marino 		limit_ins = 0;
11985f2eab64SJohn Marino 	      else
11995f2eab64SJohn Marino 		limit_ins = INT_MAX;
12005f2eab64SJohn Marino 	    }
12015f2eab64SJohn Marino 
12025f2eab64SJohn Marino 	  if (limit_del == TRE_PARAM_UNSET)
12035f2eab64SJohn Marino 	    {
12045f2eab64SJohn Marino 	      if (cost_del == TRE_PARAM_UNSET)
12055f2eab64SJohn Marino 		limit_del = 0;
12065f2eab64SJohn Marino 	      else
12075f2eab64SJohn Marino 		limit_del = INT_MAX;
12085f2eab64SJohn Marino 	    }
12095f2eab64SJohn Marino 
12105f2eab64SJohn Marino 	  if (limit_subst == TRE_PARAM_UNSET)
12115f2eab64SJohn Marino 	    {
12125f2eab64SJohn Marino 	      if (cost_subst == TRE_PARAM_UNSET)
12135f2eab64SJohn Marino 		limit_subst = 0;
12145f2eab64SJohn Marino 	      else
12155f2eab64SJohn Marino 		limit_subst = INT_MAX;
12165f2eab64SJohn Marino 	    }
12175f2eab64SJohn Marino 	}
12185f2eab64SJohn Marino 
12195f2eab64SJohn Marino       if (cost_max == TRE_PARAM_UNSET)
12205f2eab64SJohn Marino 	cost_max = INT_MAX;
12215f2eab64SJohn Marino       if (limit_err == TRE_PARAM_UNSET)
12225f2eab64SJohn Marino 	limit_err = INT_MAX;
12235f2eab64SJohn Marino 
12245f2eab64SJohn Marino       ctx->have_approx = 1;
12255f2eab64SJohn Marino       params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
12265f2eab64SJohn Marino       if (!params)
12275f2eab64SJohn Marino 	return REG_ESPACE;
12285f2eab64SJohn Marino       for (i = 0; i < TRE_PARAM_LAST; i++)
12295f2eab64SJohn Marino 	params[i] = TRE_PARAM_UNSET;
12305f2eab64SJohn Marino       params[TRE_PARAM_COST_INS] = cost_ins;
12315f2eab64SJohn Marino       params[TRE_PARAM_COST_DEL] = cost_del;
12325f2eab64SJohn Marino       params[TRE_PARAM_COST_SUBST] = cost_subst;
12335f2eab64SJohn Marino       params[TRE_PARAM_COST_MAX] = cost_max;
12345f2eab64SJohn Marino       params[TRE_PARAM_MAX_INS] = limit_ins;
12355f2eab64SJohn Marino       params[TRE_PARAM_MAX_DEL] = limit_del;
12365f2eab64SJohn Marino       params[TRE_PARAM_MAX_SUBST] = limit_subst;
12375f2eab64SJohn Marino       params[TRE_PARAM_MAX_ERR] = limit_err;
12385f2eab64SJohn Marino       iter->params = params;
12395f2eab64SJohn Marino     }
1240d5f8dde1SJohn Marino #endif /* TRE_APPROX */
12415f2eab64SJohn Marino 
1242d5f8dde1SJohn Marino parse_bound_exit:
1243d5f8dde1SJohn Marino #ifdef TRE_APPROX
12445f2eab64SJohn Marino   DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
12455f2eab64SJohn Marino 	  "limits [%d,%d,%d, total %d]\n",
12465f2eab64SJohn Marino 	  min, max, cost_ins, cost_del, cost_subst, cost_max,
12475f2eab64SJohn Marino 	  limit_ins, limit_del, limit_subst, limit_err));
1248d5f8dde1SJohn Marino #else /* !TRE_APPROX */
1249d5f8dde1SJohn Marino   DPRINT(("tre_parse_bound: min %d, max %d\n", min, max));
1250d5f8dde1SJohn Marino #endif /* !TRE_APPROX */
12515f2eab64SJohn Marino 
12525f2eab64SJohn Marino 
12535f2eab64SJohn Marino   ctx->re = r;
12545f2eab64SJohn Marino   return REG_OK;
12555f2eab64SJohn Marino }
12565f2eab64SJohn Marino 
1257d5f8dde1SJohn Marino /* Previously, we had PARSE_RESTORE_CFLAGS restore the cflags, but for
1258d5f8dde1SJohn Marino    non-self-contained options, like (?i), this causes ((?i)fu)bar to be
1259d5f8dde1SJohn Marino    treated more like ((?i)fu(?-i)bar), so the pmatch value is incorrect.
1260d5f8dde1SJohn Marino    Because we now set up tags for even non-capturing parenthesized
1261d5f8dde1SJohn Marino    subexpressions, we always call PARSE_MARK_FOR_SUBMATCH.  So if we
1262d5f8dde1SJohn Marino    pass the unmodified version of cflags to PARSE_MARK_FOR_SUBMATCH and
1263d5f8dde1SJohn Marino    have it restore cflags after the subexpression, we don't need to have
1264d5f8dde1SJohn Marino    a separate PARSE_RESTORE_CFLAGS, and then after processing the
1265d5f8dde1SJohn Marino    non-self-contained option, we can call PARSE_ATOM instead of PARSE_RE.
1266d5f8dde1SJohn Marino    This has the side-benefit of now matching the perl behavior: the RE
1267d5f8dde1SJohn Marino    foo(?i)bar|zap is foo(?i)bar OR (?i)zap instead of TRE previous behavior
1268d5f8dde1SJohn Marino    of foo AND (?i) (bar OR zap). */
12695f2eab64SJohn Marino typedef enum {
12705f2eab64SJohn Marino   PARSE_RE = 0,
12715f2eab64SJohn Marino   PARSE_ATOM,
12725f2eab64SJohn Marino   PARSE_MARK_FOR_SUBMATCH,
12735f2eab64SJohn Marino   PARSE_BRANCH,
12745f2eab64SJohn Marino   PARSE_PIECE,
12755f2eab64SJohn Marino   PARSE_CATENATION,
12765f2eab64SJohn Marino   PARSE_POST_CATENATION,
12775f2eab64SJohn Marino   PARSE_UNION,
12785f2eab64SJohn Marino   PARSE_POST_UNION,
12795f2eab64SJohn Marino   PARSE_POSTFIX,
12805f2eab64SJohn Marino } tre_parse_re_stack_symbol_t;
12815f2eab64SJohn Marino 
12825f2eab64SJohn Marino 
12835f2eab64SJohn Marino reg_errcode_t
tre_parse(tre_parse_ctx_t * ctx)12845f2eab64SJohn Marino tre_parse(tre_parse_ctx_t *ctx)
12855f2eab64SJohn Marino {
12865f2eab64SJohn Marino   tre_ast_node_t *result = NULL;
12875f2eab64SJohn Marino   tre_parse_re_stack_symbol_t symbol;
12885f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
12895f2eab64SJohn Marino   tre_stack_t *stack = ctx->stack;
12905f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
12915f2eab64SJohn Marino   int depth = 0;
12925f2eab64SJohn Marino   int temporary_cflags = 0;
1293d5f8dde1SJohn Marino   int bre_branch_begin;
1294d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1295d5f8dde1SJohn Marino   const tre_char_t *tmp_re;
1296d5f8dde1SJohn Marino #endif
12975f2eab64SJohn Marino 
1298d5f8dde1SJohn Marino   DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d cflags = 0%o\n",
1299d5f8dde1SJohn Marino 	  ctx->len, ctx->re, ctx->len, ctx->cflags));
13005f2eab64SJohn Marino 
1301d5f8dde1SJohn Marino   if (ctx->len <= 0) return REG_EMPTY;
13025f2eab64SJohn Marino   if (!ctx->nofirstsub)
13035f2eab64SJohn Marino     {
1304d5f8dde1SJohn Marino       STACK_PUSH(stack, int, ctx->cflags);
13055f2eab64SJohn Marino       STACK_PUSH(stack, int, ctx->submatch_id);
13065f2eab64SJohn Marino       STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
13075f2eab64SJohn Marino       ctx->submatch_id++;
13085f2eab64SJohn Marino     }
1309d5f8dde1SJohn Marino   STACK_PUSH(stack, int, 0); // bre_branch_begin
13105f2eab64SJohn Marino   STACK_PUSH(stack, int, PARSE_RE);
13115f2eab64SJohn Marino   ctx->re_start = ctx->re;
13125f2eab64SJohn Marino   ctx->re_end = ctx->re + ctx->len;
13135f2eab64SJohn Marino 
13145f2eab64SJohn Marino 
13155f2eab64SJohn Marino   /* The following is basically just a recursive descent parser.  I use
13165f2eab64SJohn Marino      an explicit stack instead of recursive functions mostly because of
13175f2eab64SJohn Marino      two reasons: compatibility with systems which have an overflowable
13185f2eab64SJohn Marino      call stack, and efficiency (both in lines of code and speed).  */
1319d5f8dde1SJohn Marino   while (tre_stack_num_objects(stack) > bottom)
13205f2eab64SJohn Marino     {
13215f2eab64SJohn Marino       symbol = tre_stack_pop_int(stack);
13225f2eab64SJohn Marino       switch (symbol)
13235f2eab64SJohn Marino 	{
13245f2eab64SJohn Marino 	case PARSE_RE:
13255f2eab64SJohn Marino 	  /* Parse a full regexp.  A regexp is one or more branches,
13265f2eab64SJohn Marino 	     separated by the union operator `|'. */
1327d5f8dde1SJohn Marino 	  bre_branch_begin = tre_stack_pop_int(stack);
1328d5f8dde1SJohn Marino 	  if (
13295f2eab64SJohn Marino #ifdef REG_LITERAL
1330d5f8dde1SJohn Marino 	      !(ctx->cflags & REG_LITERAL) &&
13315f2eab64SJohn Marino #endif /* REG_LITERAL */
1332d5f8dde1SJohn Marino 	      ctx->cflags & (REG_EXTENDED | REG_ENHANCED))
13335f2eab64SJohn Marino 	    STACK_PUSHX(stack, int, PARSE_UNION);
1334d5f8dde1SJohn Marino 	  STACK_PUSHX(stack, int, bre_branch_begin);
13355f2eab64SJohn Marino 	  STACK_PUSHX(stack, int, PARSE_BRANCH);
13365f2eab64SJohn Marino 	  break;
13375f2eab64SJohn Marino 
13385f2eab64SJohn Marino 	case PARSE_BRANCH:
13395f2eab64SJohn Marino 	  /* Parse a branch.  A branch is one or more pieces, concatenated.
13405f2eab64SJohn Marino 	     A piece is an atom possibly followed by a postfix operator. */
1341d5f8dde1SJohn Marino 	  bre_branch_begin = tre_stack_pop_int(stack);
13425f2eab64SJohn Marino 	  STACK_PUSHX(stack, int, PARSE_CATENATION);
1343d5f8dde1SJohn Marino 	  STACK_PUSHX(stack, int, bre_branch_begin);
13445f2eab64SJohn Marino 	  STACK_PUSHX(stack, int, PARSE_PIECE);
13455f2eab64SJohn Marino 	  break;
13465f2eab64SJohn Marino 
13475f2eab64SJohn Marino 	case PARSE_PIECE:
13485f2eab64SJohn Marino 	  /* Parse a piece.  A piece is an atom possibly followed by one
13495f2eab64SJohn Marino 	     or more postfix operators. */
1350d5f8dde1SJohn Marino 	  bre_branch_begin = tre_stack_pop_int(stack);
13515f2eab64SJohn Marino 	  STACK_PUSHX(stack, int, PARSE_POSTFIX);
1352d5f8dde1SJohn Marino 	  STACK_PUSHX(stack, int, bre_branch_begin);
13535f2eab64SJohn Marino 	  STACK_PUSHX(stack, int, PARSE_ATOM);
13545f2eab64SJohn Marino 	  break;
13555f2eab64SJohn Marino 
13565f2eab64SJohn Marino 	case PARSE_CATENATION:
13575f2eab64SJohn Marino 	  /* If the expression has not ended, parse another piece. */
13585f2eab64SJohn Marino 	  {
13595f2eab64SJohn Marino 	    tre_char_t c;
13605f2eab64SJohn Marino 	    if (ctx->re >= ctx->re_end)
13615f2eab64SJohn Marino 	      break;
13625f2eab64SJohn Marino 	    c = *ctx->re;
13635f2eab64SJohn Marino #ifdef REG_LITERAL
13645f2eab64SJohn Marino 	    if (!(ctx->cflags & REG_LITERAL))
13655f2eab64SJohn Marino 	      {
13665f2eab64SJohn Marino #endif /* REG_LITERAL */
1367d5f8dde1SJohn Marino 		if ((ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) ||
1368d5f8dde1SJohn Marino 		    ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED
1369d5f8dde1SJohn Marino 		    && ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH &&
1370d5f8dde1SJohn Marino 		    *(ctx->re + 1) == CHAR_PIPE))
13715f2eab64SJohn Marino 		  break;
13725f2eab64SJohn Marino 		if ((ctx->cflags & REG_EXTENDED
13735f2eab64SJohn Marino 		     && c == CHAR_RPAREN && depth > 0)
13745f2eab64SJohn Marino 		    || (!(ctx->cflags & REG_EXTENDED)
1375d5f8dde1SJohn Marino 			&& ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH
1376d5f8dde1SJohn Marino 			    && *(ctx->re + 1) == CHAR_RPAREN))
13775f2eab64SJohn Marino 		  {
13785f2eab64SJohn Marino 		    if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
1379d5f8dde1SJohn Marino 		      return REG_EPAREN;
13805f2eab64SJohn Marino 		    DPRINT(("tre_parse:	  group end: '%.*" STRF "'\n",
13815f2eab64SJohn Marino 			    REST(ctx->re)));
13825f2eab64SJohn Marino 		    depth--;
1383d5f8dde1SJohn Marino 		    if (!(ctx->cflags & (REG_EXTENDED | REG_ENHANCED)))
13845f2eab64SJohn Marino 		      ctx->re += 2;
13855f2eab64SJohn Marino 		    break;
13865f2eab64SJohn Marino 		  }
13875f2eab64SJohn Marino #ifdef REG_LITERAL
13885f2eab64SJohn Marino 	      }
13895f2eab64SJohn Marino #endif /* REG_LITERAL */
13905f2eab64SJohn Marino 
1391d5f8dde1SJohn Marino #ifdef REG_LEFT_ASSOC
1392d5f8dde1SJohn Marino 	    if (ctx->cflags & REG_LEFT_ASSOC)
13935f2eab64SJohn Marino 	      {
1394d5f8dde1SJohn Marino 		/* Left associative concatenation. */
1395d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, PARSE_CATENATION);
13965f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, result);
13975f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
1398d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, 0); // bre_branch_begin
13995f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_PIECE);
14005f2eab64SJohn Marino 	      }
14015f2eab64SJohn Marino 	    else
1402d5f8dde1SJohn Marino #endif /* REG_LEFT_ASSOC */
14035f2eab64SJohn Marino 	      {
1404d5f8dde1SJohn Marino 		/* Default case, right associative concatenation. */
14055f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, result);
14065f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
1407d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, PARSE_CATENATION);
1408d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, 0); // bre_branch_begin
14095f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_PIECE);
14105f2eab64SJohn Marino 	      }
14115f2eab64SJohn Marino 	    break;
14125f2eab64SJohn Marino 	  }
14135f2eab64SJohn Marino 
14145f2eab64SJohn Marino 	case PARSE_POST_CATENATION:
14155f2eab64SJohn Marino 	  {
14165f2eab64SJohn Marino 	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
14175f2eab64SJohn Marino 	    tre_ast_node_t *tmp_node;
14185f2eab64SJohn Marino 	    tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
14195f2eab64SJohn Marino 	    if (!tmp_node)
14205f2eab64SJohn Marino 	      return REG_ESPACE;
14215f2eab64SJohn Marino 	    result = tmp_node;
14225f2eab64SJohn Marino 	    break;
14235f2eab64SJohn Marino 	  }
14245f2eab64SJohn Marino 
14255f2eab64SJohn Marino 	case PARSE_UNION:
14265f2eab64SJohn Marino 	  if (ctx->re >= ctx->re_end)
14275f2eab64SJohn Marino 	    break;
14285f2eab64SJohn Marino #ifdef REG_LITERAL
14295f2eab64SJohn Marino 	  if (ctx->cflags & REG_LITERAL)
14305f2eab64SJohn Marino 	    break;
14315f2eab64SJohn Marino #endif /* REG_LITERAL */
1432d5f8dde1SJohn Marino 	  if (!(ctx->cflags & REG_EXTENDED))
1433d5f8dde1SJohn Marino 	    {
1434d5f8dde1SJohn Marino 	      if (*ctx->re != CHAR_BACKSLASH || ctx->re + 1 >= ctx->re_end)
1435d5f8dde1SJohn Marino 		break;
1436d5f8dde1SJohn Marino 	      ctx->re++;
1437d5f8dde1SJohn Marino 	    }
14385f2eab64SJohn Marino 	  switch (*ctx->re)
14395f2eab64SJohn Marino 	    {
14405f2eab64SJohn Marino 	    case CHAR_PIPE:
14415f2eab64SJohn Marino 	      DPRINT(("tre_parse:	union: '%.*" STRF "'\n",
14425f2eab64SJohn Marino 		      REST(ctx->re)));
14435f2eab64SJohn Marino 	      STACK_PUSHX(stack, int, PARSE_UNION);
1444d5f8dde1SJohn Marino 	      STACK_PUSHX(stack, voidptr, (void *)ctx->re);
14455f2eab64SJohn Marino 	      STACK_PUSHX(stack, voidptr, result);
14465f2eab64SJohn Marino 	      STACK_PUSHX(stack, int, PARSE_POST_UNION);
1447d5f8dde1SJohn Marino 	      /* We need to pass a boolean (eventually) to PARSE_ATOM to
1448d5f8dde1SJohn Marino 		 indicate if this is the beginning of a BRE extended branch. */
1449d5f8dde1SJohn Marino 	      STACK_PUSHX(stack, int, (ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED); // bre_branch_begin
14505f2eab64SJohn Marino 	      STACK_PUSHX(stack, int, PARSE_BRANCH);
14515f2eab64SJohn Marino 	      ctx->re++;
14525f2eab64SJohn Marino 	      break;
14535f2eab64SJohn Marino 
14545f2eab64SJohn Marino 	    case CHAR_RPAREN:
14555f2eab64SJohn Marino 	      ctx->re++;
14565f2eab64SJohn Marino 	      break;
14575f2eab64SJohn Marino 
14585f2eab64SJohn Marino 	    default:
1459d5f8dde1SJohn Marino 	      if (!(ctx->cflags & REG_EXTENDED))
1460d5f8dde1SJohn Marino 		ctx->re--;
14615f2eab64SJohn Marino 	      break;
14625f2eab64SJohn Marino 	    }
14635f2eab64SJohn Marino 	  break;
14645f2eab64SJohn Marino 
14655f2eab64SJohn Marino 	case PARSE_POST_UNION:
14665f2eab64SJohn Marino 	  {
14675f2eab64SJohn Marino 	    tre_ast_node_t *tmp_node;
14685f2eab64SJohn Marino 	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
1469d5f8dde1SJohn Marino 	    const tre_char_t *pipechar = tre_stack_pop_voidptr(stack);
1470d5f8dde1SJohn Marino 	    /* error on empty expression at end of union */
1471d5f8dde1SJohn Marino 	    if (pipechar == ctx->re - 1)
1472d5f8dde1SJohn Marino 	      {
1473d5f8dde1SJohn Marino 		return REG_EMPTY;
1474d5f8dde1SJohn Marino 	      }
14755f2eab64SJohn Marino 	    tmp_node = tre_ast_new_union(ctx->mem, tree, result);
14765f2eab64SJohn Marino 	    if (!tmp_node)
14775f2eab64SJohn Marino 	      return REG_ESPACE;
14785f2eab64SJohn Marino 	    result = tmp_node;
14795f2eab64SJohn Marino 	    break;
14805f2eab64SJohn Marino 	  }
14815f2eab64SJohn Marino 
14825f2eab64SJohn Marino 	case PARSE_POSTFIX:
14835f2eab64SJohn Marino 	  /* Parse postfix operators. */
14845f2eab64SJohn Marino 	  if (ctx->re >= ctx->re_end)
14855f2eab64SJohn Marino 	    break;
14865f2eab64SJohn Marino #ifdef REG_LITERAL
14875f2eab64SJohn Marino 	  if (ctx->cflags & REG_LITERAL)
14885f2eab64SJohn Marino 	    break;
14895f2eab64SJohn Marino #endif /* REG_LITERAL */
1490d5f8dde1SJohn Marino 	  int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
1491d5f8dde1SJohn Marino 	  int rep_min = 0;
1492d5f8dde1SJohn Marino 	  int rep_max = -1;
1493d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1494d5f8dde1SJohn Marino 	  int lbrace_off;
1495d5f8dde1SJohn Marino #endif
14965f2eab64SJohn Marino 	  switch (*ctx->re)
14975f2eab64SJohn Marino 	    {
14985f2eab64SJohn Marino 	    case CHAR_PLUS:
14995f2eab64SJohn Marino 	    case CHAR_QUESTIONMARK:
15005f2eab64SJohn Marino 	      if (!(ctx->cflags & REG_EXTENDED))
15015f2eab64SJohn Marino 		break;
15025f2eab64SJohn Marino 		/*FALLTHROUGH*/
15035f2eab64SJohn Marino 	    case CHAR_STAR:
15045f2eab64SJohn Marino 	      {
15055f2eab64SJohn Marino 		tre_ast_node_t *tmp_node;
15065f2eab64SJohn Marino #ifdef TRE_DEBUG
1507d5f8dde1SJohn Marino 		const char *tstr = "star";
15085f2eab64SJohn Marino 		tmp_re = ctx->re;
15095f2eab64SJohn Marino #endif
15105f2eab64SJohn Marino 
1511d5f8dde1SJohn Marino 	handle_plus_or_question:
1512d5f8dde1SJohn Marino 		/* error on iteration of raw assertion (not in subexpression) */
1513d5f8dde1SJohn Marino 		if (result->type == LITERAL && result->submatch_id < 0 &&
1514d5f8dde1SJohn Marino 		    IS_ASSERTION((tre_literal_t *)result->obj))
1515d5f8dde1SJohn Marino 		  {
1516d5f8dde1SJohn Marino 		    if (!(ctx->cflags & REG_EXTENDED)) break;
1517d5f8dde1SJohn Marino 		    return REG_BADRPT;
1518d5f8dde1SJohn Marino 		  }
1519d5f8dde1SJohn Marino 		if (*ctx->re == CHAR_PLUS)
1520d5f8dde1SJohn Marino 		  {
1521d5f8dde1SJohn Marino 		    rep_min = 1;
1522d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1523d5f8dde1SJohn Marino 		    tstr = "plus";
1524d5f8dde1SJohn Marino #endif
1525d5f8dde1SJohn Marino 		  }
1526d5f8dde1SJohn Marino 		if (*ctx->re == CHAR_QUESTIONMARK)
1527d5f8dde1SJohn Marino 		  {
1528d5f8dde1SJohn Marino 		    rep_max = 1;
1529d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1530d5f8dde1SJohn Marino 		    tstr = "questionmark";
1531d5f8dde1SJohn Marino #endif
1532d5f8dde1SJohn Marino 		  }
1533d5f8dde1SJohn Marino 
1534d5f8dde1SJohn Marino 		if (ctx->cflags & REG_EXTENDED)
1535d5f8dde1SJohn Marino 		  {
15365f2eab64SJohn Marino 		    if (ctx->re + 1 < ctx->re_end)
15375f2eab64SJohn Marino 		      {
15385f2eab64SJohn Marino 			if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
15395f2eab64SJohn Marino 			  {
1540d5f8dde1SJohn Marino 			    /* Process the question mark only in enhanced mode.
1541d5f8dde1SJohn Marino 			       Otherwise, the question mark is an error in ERE */
1542d5f8dde1SJohn Marino 			    if (ctx->cflags & REG_ENHANCED)
1543d5f8dde1SJohn Marino 			      {
15445f2eab64SJohn Marino 				minimal = !(ctx->cflags & REG_UNGREEDY);
15455f2eab64SJohn Marino 				ctx->re++;
15465f2eab64SJohn Marino 			      }
1547d5f8dde1SJohn Marino 			    else return REG_BADRPT;
1548d5f8dde1SJohn Marino 			  }
15495f2eab64SJohn Marino 			else if (*(ctx->re + 1) == CHAR_STAR
15505f2eab64SJohn Marino 				 || *(ctx->re + 1) == CHAR_PLUS)
15515f2eab64SJohn Marino 			  {
15525f2eab64SJohn Marino 			    /* These are reserved for future extensions. */
15535f2eab64SJohn Marino 			    return REG_BADRPT;
15545f2eab64SJohn Marino 			  }
15555f2eab64SJohn Marino 		      }
1556d5f8dde1SJohn Marino 		  }
1557d5f8dde1SJohn Marino 		else
1558d5f8dde1SJohn Marino 		  {
1559d5f8dde1SJohn Marino 		    if (ctx->re + 1 < ctx->re_end && *(ctx->re + 1) == CHAR_STAR)
1560d5f8dde1SJohn Marino 		      {
1561d5f8dde1SJohn Marino 			/* This is reserved for future extensions. */
1562d5f8dde1SJohn Marino 			return REG_BADRPT;
1563d5f8dde1SJohn Marino 		      }
1564d5f8dde1SJohn Marino 		    if (ctx->re + 2 < ctx->re_end)
1565d5f8dde1SJohn Marino 		      {
1566*122b686eSSascha Wildner 			if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 2) == CHAR_QUESTIONMARK)
1567d5f8dde1SJohn Marino 			  {
1568d5f8dde1SJohn Marino 			    /* Process the question mark only in enhanced mode.
1569d5f8dde1SJohn Marino 			       Otherwise, the question mark is a literal in BRE */
1570d5f8dde1SJohn Marino 			    if (ctx->cflags & REG_ENHANCED)
1571d5f8dde1SJohn Marino 			      {
1572d5f8dde1SJohn Marino 				minimal = !(ctx->cflags & REG_UNGREEDY);
1573d5f8dde1SJohn Marino 				ctx->re += 2;
1574d5f8dde1SJohn Marino 			      }
1575d5f8dde1SJohn Marino 			  }
1576d5f8dde1SJohn Marino 			else if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 2) == CHAR_PLUS)
1577d5f8dde1SJohn Marino 			  {
1578d5f8dde1SJohn Marino 			    /* This is reserved for future extensions. */
1579d5f8dde1SJohn Marino 			    return REG_BADRPT;
1580d5f8dde1SJohn Marino 			  }
1581d5f8dde1SJohn Marino 		      }
1582d5f8dde1SJohn Marino 		  }
15835f2eab64SJohn Marino 
1584d5f8dde1SJohn Marino 		if (minimal)
1585d5f8dde1SJohn Marino 		  ctx->num_reorder_tags++;
1586d5f8dde1SJohn Marino 
1587d5f8dde1SJohn Marino 		DPRINT(("tre_parse: %s %s: '%.*" STRF "'\n",
1588d5f8dde1SJohn Marino 			minimal ? "  minimal" : "greedy", tstr, REST(tmp_re)));
1589d5f8dde1SJohn Marino 		if (result == NULL)
1590d5f8dde1SJohn Marino 		  {
1591d5f8dde1SJohn Marino 		    if (ctx->cflags & REG_EXTENDED) return REG_BADRPT;
1592d5f8dde1SJohn Marino 		    else goto parse_literal;
1593d5f8dde1SJohn Marino 		  }
15945f2eab64SJohn Marino 		ctx->re++;
15955f2eab64SJohn Marino 		tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
15965f2eab64SJohn Marino 					    minimal);
15975f2eab64SJohn Marino 		if (tmp_node == NULL)
15985f2eab64SJohn Marino 		  return REG_ESPACE;
15995f2eab64SJohn Marino 		result = tmp_node;
1600d5f8dde1SJohn Marino 
1601d5f8dde1SJohn Marino 		/* Set the iterator with a submatch id in the invisible range
1602d5f8dde1SJohn Marino 		 * (which will be overridden if a real submatch is needed) */
1603d5f8dde1SJohn Marino 		result->submatch_id = ctx->submatch_id_invisible++;
1604d5f8dde1SJohn Marino 
1605d5f8dde1SJohn Marino #if 0
1606d5f8dde1SJohn Marino 		/* We don't allow multiple postfixes, but this might be needed
1607d5f8dde1SJohn Marino 		   to support approximate matching */
16085f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_POSTFIX);
1609d5f8dde1SJohn Marino #endif
16105f2eab64SJohn Marino 	      }
16115f2eab64SJohn Marino 	      break;
16125f2eab64SJohn Marino 
16135f2eab64SJohn Marino 	    case CHAR_BACKSLASH:
16145f2eab64SJohn Marino 	      /* "\{" is special without REG_EXTENDED */
1615d5f8dde1SJohn Marino 	      /* "\+" and "\?" are special with REG_ENHANCED for BRE */
16165f2eab64SJohn Marino 	      if (!(ctx->cflags & REG_EXTENDED)
1617d5f8dde1SJohn Marino 		  && ctx->re + 1 < ctx->re_end)
16185f2eab64SJohn Marino 		{
1619d5f8dde1SJohn Marino 		  switch (*(ctx->re + 1))
1620d5f8dde1SJohn Marino 		    {
1621d5f8dde1SJohn Marino 		    case CHAR_LBRACE:
16225f2eab64SJohn Marino 		      ctx->re++;
1623d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1624d5f8dde1SJohn Marino 		      lbrace_off = 2;
1625d5f8dde1SJohn Marino #endif
16265f2eab64SJohn Marino 		      goto parse_brace;
1627d5f8dde1SJohn Marino 		    case CHAR_PLUS:
1628d5f8dde1SJohn Marino 		    case CHAR_QUESTIONMARK:
1629d5f8dde1SJohn Marino 		      if (ctx->cflags & REG_ENHANCED)
1630d5f8dde1SJohn Marino 			{
1631d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1632d5f8dde1SJohn Marino 			  tmp_re = ctx->re;
1633d5f8dde1SJohn Marino #endif
1634d5f8dde1SJohn Marino 			  ctx->re++;
1635d5f8dde1SJohn Marino 			  goto handle_plus_or_question;
1636d5f8dde1SJohn Marino 			}
1637d5f8dde1SJohn Marino 		      break;
1638d5f8dde1SJohn Marino 		    }
1639d5f8dde1SJohn Marino 		  break;
16405f2eab64SJohn Marino 		}
16415f2eab64SJohn Marino 	      else
16425f2eab64SJohn Marino 		break;
16435f2eab64SJohn Marino 
16445f2eab64SJohn Marino 	    case CHAR_LBRACE:
1645d5f8dde1SJohn Marino 	      {
1646d5f8dde1SJohn Marino 		int raw_assertion;
1647d5f8dde1SJohn Marino 
16485f2eab64SJohn Marino 		/* "{" is literal without REG_EXTENDED */
16495f2eab64SJohn Marino 		if (!(ctx->cflags & REG_EXTENDED))
16505f2eab64SJohn Marino 		  break;
1651d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1652d5f8dde1SJohn Marino 		lbrace_off = 1;
1653d5f8dde1SJohn Marino #endif
16545f2eab64SJohn Marino 
16555f2eab64SJohn Marino 	    parse_brace:
1656d5f8dde1SJohn Marino 		/* error on iteration of raw assertion (not in subexpression),
1657d5f8dde1SJohn Marino 		   but wait until after parsing bounds */
1658d5f8dde1SJohn Marino 		raw_assertion = (result->type == LITERAL
1659d5f8dde1SJohn Marino 				 && result->submatch_id < 0
1660d5f8dde1SJohn Marino 				 && IS_ASSERTION((tre_literal_t *)result->obj));
16615f2eab64SJohn Marino 		ctx->re++;
16625f2eab64SJohn Marino 
16635f2eab64SJohn Marino 		status = tre_parse_bound(ctx, &result);
1664d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
1665d5f8dde1SJohn Marino 		/* For ERE, if status is REG_NOMATCH, this mean the lbrace
1666d5f8dde1SJohn Marino 		   is to be treated as a literal. */
1667d5f8dde1SJohn Marino 		if (status == REG_NOMATCH)
1668d5f8dde1SJohn Marino 		  {
1669d5f8dde1SJohn Marino 		    ctx->re--;
1670d5f8dde1SJohn Marino 		    break;
1671d5f8dde1SJohn Marino 		  }
1672d5f8dde1SJohn Marino #endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
1673d5f8dde1SJohn Marino 		DPRINT(("tre_parse:	bound: '%.*" STRF "'\n",
1674d5f8dde1SJohn Marino 			REST(ctx->re - lbrace_off)));
16755f2eab64SJohn Marino 		if (status != REG_OK)
16765f2eab64SJohn Marino 		  return status;
1677d5f8dde1SJohn Marino 		if (raw_assertion) return REG_BADRPT;
1678d5f8dde1SJohn Marino 
1679d5f8dde1SJohn Marino 		/* Set the iterator with a submatch id in the invisible range
1680d5f8dde1SJohn Marino 		 * (which will be overridden if a real submatch is needed) */
1681d5f8dde1SJohn Marino 		if (result->type == ITERATION)
1682d5f8dde1SJohn Marino 		  result->submatch_id = ctx->submatch_id_invisible++;
1683d5f8dde1SJohn Marino 
1684d5f8dde1SJohn Marino #if 0
1685d5f8dde1SJohn Marino 		/* We don't allow multiple postfixes, but this might be needed
1686d5f8dde1SJohn Marino 		   to support approximate matching */
16875f2eab64SJohn Marino 		STACK_PUSHX(stack, int, PARSE_POSTFIX);
1688d5f8dde1SJohn Marino #endif
16895f2eab64SJohn Marino 		break;
16905f2eab64SJohn Marino 	      }
1691d5f8dde1SJohn Marino 	    }
16925f2eab64SJohn Marino 	  break;
16935f2eab64SJohn Marino 
16945f2eab64SJohn Marino 	case PARSE_ATOM:
1695d5f8dde1SJohn Marino 	  {
16965f2eab64SJohn Marino 	    /* Parse an atom.  An atom is a regular expression enclosed in `()',
16975f2eab64SJohn Marino 	       an empty set of `()', a bracket expression, `.', `^', `$',
16985f2eab64SJohn Marino 	       a `\' followed by a character, or a single character. */
16995f2eab64SJohn Marino 
1700d5f8dde1SJohn Marino 	    /* The stack contains a boolean value, whether PARSE_ATOM is
1701d5f8dde1SJohn Marino 	       being called just after the start of a group (left paren)
1702d5f8dde1SJohn Marino 	       in a BRE */
1703d5f8dde1SJohn Marino 	    bre_branch_begin = tre_stack_pop_int(stack);
1704d5f8dde1SJohn Marino 
17055f2eab64SJohn Marino 	    /* End of regexp? (empty string). */
17065f2eab64SJohn Marino 	    if (ctx->re >= ctx->re_end)
17075f2eab64SJohn Marino 	      goto parse_literal;
17085f2eab64SJohn Marino 
17095f2eab64SJohn Marino #ifdef REG_LITERAL
17105f2eab64SJohn Marino 	    if (ctx->cflags & REG_LITERAL)
17115f2eab64SJohn Marino 	      goto parse_literal;
17125f2eab64SJohn Marino #endif /* REG_LITERAL */
17135f2eab64SJohn Marino 
17145f2eab64SJohn Marino 	    switch (*ctx->re)
17155f2eab64SJohn Marino 	      {
17165f2eab64SJohn Marino 	      case CHAR_LPAREN:  /* parenthesized subexpression */
17175f2eab64SJohn Marino 
17185f2eab64SJohn Marino 		/* Handle "(?...)" extensions.  They work in a way similar
17195f2eab64SJohn Marino 		   to Perls corresponding extensions. */
1720d5f8dde1SJohn Marino 		if ((ctx->cflags & (REG_EXTENDED|REG_ENHANCED)) ==
1721d5f8dde1SJohn Marino 		    (REG_EXTENDED|REG_ENHANCED)
17225f2eab64SJohn Marino 		    && *(ctx->re + 1) == CHAR_QUESTIONMARK)
17235f2eab64SJohn Marino 		  {
17245f2eab64SJohn Marino 		    int new_cflags = ctx->cflags;
17255f2eab64SJohn Marino 		    int bit = 1;
1726d5f8dde1SJohn Marino 		    int invisible_submatch = 0;
1727d5f8dde1SJohn Marino 		    DPRINT(("tre_parse:	extension: '%.*" STRF "'\n",
17285f2eab64SJohn Marino 			    REST(ctx->re)));
17295f2eab64SJohn Marino 		    ctx->re += 2;
17305f2eab64SJohn Marino 		    while (/*CONSTCOND*/1)
17315f2eab64SJohn Marino 		      {
17325f2eab64SJohn Marino 			if (*ctx->re == L'i')
17335f2eab64SJohn Marino 			  {
1734d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:	    icase: '%.*" STRF "'\n",
17355f2eab64SJohn Marino 				    REST(ctx->re)));
17365f2eab64SJohn Marino 			    if (bit)
17375f2eab64SJohn Marino 			      new_cflags |= REG_ICASE;
17385f2eab64SJohn Marino 			    else
17395f2eab64SJohn Marino 			      new_cflags &= ~REG_ICASE;
17405f2eab64SJohn Marino 			    ctx->re++;
17415f2eab64SJohn Marino 			  }
17425f2eab64SJohn Marino 			else if (*ctx->re == L'n')
17435f2eab64SJohn Marino 			  {
1744d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:	  newline: '%.*" STRF "'\n",
17455f2eab64SJohn Marino 				    REST(ctx->re)));
17465f2eab64SJohn Marino 			    if (bit)
17475f2eab64SJohn Marino 			      new_cflags |= REG_NEWLINE;
17485f2eab64SJohn Marino 			    else
17495f2eab64SJohn Marino 			      new_cflags &= ~REG_NEWLINE;
17505f2eab64SJohn Marino 			    ctx->re++;
17515f2eab64SJohn Marino 			  }
1752d5f8dde1SJohn Marino #ifdef REG_LEFT_ASSOC
1753d5f8dde1SJohn Marino 			else if (*ctx->re == L'l')
17545f2eab64SJohn Marino 			  {
1755d5f8dde1SJohn Marino 			    DPRINT(("tre_parse: left assoc: '%.*" STRF "'\n",
17565f2eab64SJohn Marino 				    REST(ctx->re)));
17575f2eab64SJohn Marino 			    if (bit)
1758d5f8dde1SJohn Marino 			      new_cflags |= REG_LEFT_ASSOC;
17595f2eab64SJohn Marino 			    else
1760d5f8dde1SJohn Marino 			      new_cflags &= ~REG_LEFT_ASSOC;
17615f2eab64SJohn Marino 			    ctx->re++;
17625f2eab64SJohn Marino 			  }
1763d5f8dde1SJohn Marino #endif /* REG_LEFT_ASSOC */
17645f2eab64SJohn Marino #ifdef REG_UNGREEDY
17655f2eab64SJohn Marino 			else if (*ctx->re == L'U')
17665f2eab64SJohn Marino 			  {
1767d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:    ungreedy: '%.*" STRF "'\n",
17685f2eab64SJohn Marino 				    REST(ctx->re)));
17695f2eab64SJohn Marino 			    if (bit)
17705f2eab64SJohn Marino 			      new_cflags |= REG_UNGREEDY;
17715f2eab64SJohn Marino 			    else
17725f2eab64SJohn Marino 			      new_cflags &= ~REG_UNGREEDY;
17735f2eab64SJohn Marino 			    ctx->re++;
17745f2eab64SJohn Marino 			  }
17755f2eab64SJohn Marino #endif /* REG_UNGREEDY */
17765f2eab64SJohn Marino 			else if (*ctx->re == CHAR_MINUS)
17775f2eab64SJohn Marino 			  {
1778d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:	 turn off: '%.*" STRF "'\n",
17795f2eab64SJohn Marino 				    REST(ctx->re)));
17805f2eab64SJohn Marino 			    ctx->re++;
17815f2eab64SJohn Marino 			    bit = 0;
17825f2eab64SJohn Marino 			  }
17835f2eab64SJohn Marino 			else if (*ctx->re == CHAR_COLON)
17845f2eab64SJohn Marino 			  {
1785d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:	 no group: '%.*" STRF
1786d5f8dde1SJohn Marino 				    "', (invisible submatch %d)\n",
1787d5f8dde1SJohn Marino 				    REST(ctx->re), ctx->submatch_id_invisible));
17885f2eab64SJohn Marino 			    ctx->re++;
17895f2eab64SJohn Marino 			    depth++;
1790d5f8dde1SJohn Marino 			    invisible_submatch = 1;
17915f2eab64SJohn Marino 			    break;
17925f2eab64SJohn Marino 			  }
17935f2eab64SJohn Marino 			else if (*ctx->re == CHAR_HASH)
17945f2eab64SJohn Marino 			  {
1795d5f8dde1SJohn Marino 			    DPRINT(("tre_parse:    comment: '%.*" STRF "'\n",
17965f2eab64SJohn Marino 				    REST(ctx->re)));
17975f2eab64SJohn Marino 			    /* A comment can contain any character except a
17985f2eab64SJohn Marino 			       right parenthesis */
17995f2eab64SJohn Marino 			    while (*ctx->re != CHAR_RPAREN
18005f2eab64SJohn Marino 				   && ctx->re < ctx->re_end)
18015f2eab64SJohn Marino 			      ctx->re++;
18025f2eab64SJohn Marino 			    if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
18035f2eab64SJohn Marino 			      {
18045f2eab64SJohn Marino 				ctx->re++;
18055f2eab64SJohn Marino 				break;
18065f2eab64SJohn Marino 			      }
18075f2eab64SJohn Marino 			    else
18085f2eab64SJohn Marino 			      return REG_BADPAT;
18095f2eab64SJohn Marino 			  }
18105f2eab64SJohn Marino 			else if (*ctx->re == CHAR_RPAREN)
18115f2eab64SJohn Marino 			  {
18125f2eab64SJohn Marino 			    ctx->re++;
18135f2eab64SJohn Marino 			    break;
18145f2eab64SJohn Marino 			  }
18155f2eab64SJohn Marino 			else
1816d5f8dde1SJohn Marino 			  return REG_BADRPT;
18175f2eab64SJohn Marino 		      }
18185f2eab64SJohn Marino 
18195f2eab64SJohn Marino 		    /* Turn on the cflags changes for the rest of the
18205f2eab64SJohn Marino 		       enclosing group. */
1821d5f8dde1SJohn Marino 		    if (invisible_submatch)
1822d5f8dde1SJohn Marino 		      {
18235f2eab64SJohn Marino 			STACK_PUSHX(stack, int, ctx->cflags);
1824d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, ctx->submatch_id_invisible);
1825d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
1826d5f8dde1SJohn Marino 			ctx->submatch_id_invisible++;
1827d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, 0); // bre_branch_begin
18285f2eab64SJohn Marino 			STACK_PUSHX(stack, int, PARSE_RE);
1829d5f8dde1SJohn Marino 		      }
1830d5f8dde1SJohn Marino 		    else {
1831d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, 0); // bre_branch_begin
1832d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, PARSE_ATOM);
1833d5f8dde1SJohn Marino 		    }
18345f2eab64SJohn Marino 		    ctx->cflags = new_cflags;
18355f2eab64SJohn Marino 		    break;
18365f2eab64SJohn Marino 		  }
18375f2eab64SJohn Marino 
1838d5f8dde1SJohn Marino 		if (ctx->cflags & REG_EXTENDED)
18395f2eab64SJohn Marino 		  {
1840d5f8dde1SJohn Marino 		parse_bre_lparen:
18415f2eab64SJohn Marino 		    DPRINT(("tre_parse: group begin: '%.*" STRF
18425f2eab64SJohn Marino 			    "', submatch %d\n", REST(ctx->re),
18435f2eab64SJohn Marino 			    ctx->submatch_id));
18445f2eab64SJohn Marino 		    ctx->re++;
18455f2eab64SJohn Marino 		    /* First parse a whole RE, then mark the resulting tree
18465f2eab64SJohn Marino 		       for submatching. */
1847d5f8dde1SJohn Marino 		    STACK_PUSHX(stack, int, ctx->cflags);
18485f2eab64SJohn Marino 		    STACK_PUSHX(stack, int, ctx->submatch_id);
18495f2eab64SJohn Marino 		    STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
1850d5f8dde1SJohn Marino 		    /* We need to pass a boolean (eventually) to PARSE_ATOM to
1851d5f8dde1SJohn Marino 		       indicate if this is the beginning of a BRE group. */
1852d5f8dde1SJohn Marino 		    STACK_PUSHX(stack, int, !(ctx->cflags & REG_EXTENDED));
18535f2eab64SJohn Marino 		    STACK_PUSHX(stack, int, PARSE_RE);
18545f2eab64SJohn Marino 		    ctx->submatch_id++;
1855d5f8dde1SJohn Marino 		    depth++;
18565f2eab64SJohn Marino 		  }
18575f2eab64SJohn Marino 		else
18585f2eab64SJohn Marino 		  goto parse_literal;
18595f2eab64SJohn Marino 		break;
18605f2eab64SJohn Marino 
18615f2eab64SJohn Marino 	      case CHAR_RPAREN:  /* end of current subexpression */
1862d5f8dde1SJohn Marino 		if (ctx->cflags & REG_EXTENDED && depth > 0)
18635f2eab64SJohn Marino 		  {
1864d5f8dde1SJohn Marino 	      parse_bre_rparen_empty:
1865d5f8dde1SJohn Marino 		    if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
1866d5f8dde1SJohn Marino 		      return REG_EPAREN;
18675f2eab64SJohn Marino 		    DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n",
18685f2eab64SJohn Marino 			    REST(ctx->re)));
18695f2eab64SJohn Marino 		    /* We were expecting an atom, but instead the current
18705f2eab64SJohn Marino 		       subexpression was closed.  POSIX leaves the meaning of
18715f2eab64SJohn Marino 		       this to be implementation-defined.  We interpret this as
18725f2eab64SJohn Marino 		       an empty expression (which matches an empty string).  */
18735f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
18745f2eab64SJohn Marino 		    if (result == NULL)
18755f2eab64SJohn Marino 		      return REG_ESPACE;
18765f2eab64SJohn Marino 		    if (!(ctx->cflags & REG_EXTENDED))
18775f2eab64SJohn Marino 		      ctx->re--;
18785f2eab64SJohn Marino 		  }
18795f2eab64SJohn Marino 		else
18805f2eab64SJohn Marino 		  goto parse_literal;
18815f2eab64SJohn Marino 		break;
18825f2eab64SJohn Marino 
18835f2eab64SJohn Marino 	      case CHAR_LBRACKET: /* bracket expression */
18845f2eab64SJohn Marino 		DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n",
18855f2eab64SJohn Marino 			REST(ctx->re)));
18865f2eab64SJohn Marino 		ctx->re++;
18875f2eab64SJohn Marino 		status = tre_parse_bracket(ctx, &result);
18885f2eab64SJohn Marino 		if (status != REG_OK)
18895f2eab64SJohn Marino 		  return status;
18905f2eab64SJohn Marino 		break;
18915f2eab64SJohn Marino 
18925f2eab64SJohn Marino 	      case CHAR_BACKSLASH:
1893d5f8dde1SJohn Marino 		/* Deal with "\(", "\)" or "\{" for BREs */
18945f2eab64SJohn Marino 		if (!(ctx->cflags & REG_EXTENDED)
1895d5f8dde1SJohn Marino 		    && ctx->re + 1 < ctx->re_end)
1896d5f8dde1SJohn Marino 		  {
1897d5f8dde1SJohn Marino 		    if (*(ctx->re + 1) == CHAR_LPAREN)
18985f2eab64SJohn Marino 		      {
18995f2eab64SJohn Marino 			ctx->re++;
1900d5f8dde1SJohn Marino 			goto parse_bre_lparen;
1901d5f8dde1SJohn Marino 		      }
1902d5f8dde1SJohn Marino 		    else if (*(ctx->re + 1) == CHAR_RPAREN)
1903d5f8dde1SJohn Marino 		      {
1904d5f8dde1SJohn Marino 			ctx->re++;
1905d5f8dde1SJohn Marino 			goto parse_bre_rparen_empty;
1906d5f8dde1SJohn Marino 		      }
1907d5f8dde1SJohn Marino 		    if (*(ctx->re + 1) == CHAR_LBRACE) goto parse_literal;
1908d5f8dde1SJohn Marino 		  }
1909d5f8dde1SJohn Marino 
1910d5f8dde1SJohn Marino 		if (ctx->re + 1 >= ctx->re_end)
1911d5f8dde1SJohn Marino 		  /* Trailing backslash. */
1912d5f8dde1SJohn Marino 		  return REG_EESCAPE;
1913d5f8dde1SJohn Marino 
1914d5f8dde1SJohn Marino 		if (!(ctx->cflags & REG_ENHANCED))
1915d5f8dde1SJohn Marino 		  {
1916d5f8dde1SJohn Marino 		    DPRINT(("tre_parse:  unenhanced bleep: '%.*" STRF "'\n", REST(ctx->re)));
1917d5f8dde1SJohn Marino 		    ctx->re++;
1918d5f8dde1SJohn Marino 		    goto unenhanced_backslash;
19195f2eab64SJohn Marino 		  }
19205f2eab64SJohn Marino 
19215f2eab64SJohn Marino 		/* If a macro is used, parse the expanded macro recursively. */
19225f2eab64SJohn Marino 		{
19235f2eab64SJohn Marino 		  tre_char_t buf[64];
19245f2eab64SJohn Marino 		  tre_expand_macro(ctx->re + 1, ctx->re_end,
19255f2eab64SJohn Marino 				   buf, elementsof(buf));
19265f2eab64SJohn Marino 		  if (buf[0] != 0)
19275f2eab64SJohn Marino 		    {
19285f2eab64SJohn Marino 		      tre_parse_ctx_t subctx;
19295f2eab64SJohn Marino 		      memcpy(&subctx, ctx, sizeof(subctx));
19305f2eab64SJohn Marino 		      subctx.re = buf;
19315f2eab64SJohn Marino 		      subctx.len = tre_strlen(buf);
19325f2eab64SJohn Marino 		      subctx.nofirstsub = 1;
19335f2eab64SJohn Marino 		      status = tre_parse(&subctx);
19345f2eab64SJohn Marino 		      if (status != REG_OK)
19355f2eab64SJohn Marino 			return status;
19365f2eab64SJohn Marino 		      ctx->re += 2;
19375f2eab64SJohn Marino 		      ctx->position = subctx.position;
19385f2eab64SJohn Marino 		      result = subctx.result;
19395f2eab64SJohn Marino 		      break;
19405f2eab64SJohn Marino 		    }
19415f2eab64SJohn Marino 		}
19425f2eab64SJohn Marino 
19435f2eab64SJohn Marino #ifdef REG_LITERAL
19445f2eab64SJohn Marino 		if (*(ctx->re + 1) == L'Q')
19455f2eab64SJohn Marino 		  {
19465f2eab64SJohn Marino 		    DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
19475f2eab64SJohn Marino 			    REST(ctx->re)));
19485f2eab64SJohn Marino 		    ctx->cflags |= REG_LITERAL;
19495f2eab64SJohn Marino 		    temporary_cflags |= REG_LITERAL;
19505f2eab64SJohn Marino 		    ctx->re += 2;
1951d5f8dde1SJohn Marino 		    STACK_PUSHX(stack, int, 0);
19525f2eab64SJohn Marino 		    STACK_PUSHX(stack, int, PARSE_ATOM);
19535f2eab64SJohn Marino 		    break;
19545f2eab64SJohn Marino 		  }
19555f2eab64SJohn Marino #endif /* REG_LITERAL */
19565f2eab64SJohn Marino 
19575f2eab64SJohn Marino 		DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
19585f2eab64SJohn Marino 		ctx->re++;
19595f2eab64SJohn Marino 		switch (*ctx->re)
19605f2eab64SJohn Marino 		  {
19615f2eab64SJohn Marino 		  case L'b':
19625f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
19635f2eab64SJohn Marino 						 ASSERT_AT_WB, -1);
19645f2eab64SJohn Marino 		    ctx->re++;
19655f2eab64SJohn Marino 		    break;
19665f2eab64SJohn Marino 		  case L'B':
19675f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
19685f2eab64SJohn Marino 						 ASSERT_AT_WB_NEG, -1);
19695f2eab64SJohn Marino 		    ctx->re++;
19705f2eab64SJohn Marino 		    break;
19715f2eab64SJohn Marino 		  case L'<':
19725f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
19735f2eab64SJohn Marino 						 ASSERT_AT_BOW, -1);
19745f2eab64SJohn Marino 		    ctx->re++;
19755f2eab64SJohn Marino 		    break;
19765f2eab64SJohn Marino 		  case L'>':
19775f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
19785f2eab64SJohn Marino 						 ASSERT_AT_EOW, -1);
19795f2eab64SJohn Marino 		    ctx->re++;
19805f2eab64SJohn Marino 		    break;
19815f2eab64SJohn Marino 		  case L'x':
19825f2eab64SJohn Marino 		    ctx->re++;
19835f2eab64SJohn Marino 		    if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
19845f2eab64SJohn Marino 		      {
19855f2eab64SJohn Marino 			/* 8 bit hex char. */
19865f2eab64SJohn Marino 			char tmp[3] = {0, 0, 0};
19875f2eab64SJohn Marino 			long val;
19885f2eab64SJohn Marino 			DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
19895f2eab64SJohn Marino 				REST(ctx->re - 2)));
19905f2eab64SJohn Marino 
1991d5f8dde1SJohn Marino 			if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
1992d5f8dde1SJohn Marino 			    ctx->re < ctx->re_end)
19935f2eab64SJohn Marino 			  {
19945f2eab64SJohn Marino 			    tmp[0] = (char)ctx->re[0];
19955f2eab64SJohn Marino 			    ctx->re++;
19965f2eab64SJohn Marino 			  }
1997d5f8dde1SJohn Marino 			if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
1998d5f8dde1SJohn Marino 			    ctx->re < ctx->re_end)
19995f2eab64SJohn Marino 			  {
20005f2eab64SJohn Marino 			    tmp[1] = (char)ctx->re[0];
20015f2eab64SJohn Marino 			    ctx->re++;
20025f2eab64SJohn Marino 			  }
20035f2eab64SJohn Marino 			val = strtol(tmp, NULL, 16);
20045f2eab64SJohn Marino 			result = tre_ast_new_literal(ctx->mem, (int)val,
20055f2eab64SJohn Marino 						     (int)val, ctx->position);
20065f2eab64SJohn Marino 			ctx->position++;
20075f2eab64SJohn Marino 			break;
20085f2eab64SJohn Marino 		      }
20095f2eab64SJohn Marino 		    else if (ctx->re < ctx->re_end)
20105f2eab64SJohn Marino 		      {
20115f2eab64SJohn Marino 			/* Wide char. */
20125f2eab64SJohn Marino 			char tmp[32];
20135f2eab64SJohn Marino 			long val;
20145f2eab64SJohn Marino 			int i = 0;
20155f2eab64SJohn Marino 			ctx->re++;
20165f2eab64SJohn Marino 			while (ctx->re_end - ctx->re >= 0)
20175f2eab64SJohn Marino 			  {
20185f2eab64SJohn Marino 			    if (ctx->re[0] == CHAR_RBRACE)
20195f2eab64SJohn Marino 			      break;
2020d5f8dde1SJohn Marino 			    if (tre_isxdigit_l(ctx->re[0], ctx->loc))
20215f2eab64SJohn Marino 			      {
20225f2eab64SJohn Marino 				tmp[i] = (char)ctx->re[0];
20235f2eab64SJohn Marino 				i++;
20245f2eab64SJohn Marino 				ctx->re++;
20255f2eab64SJohn Marino 				continue;
20265f2eab64SJohn Marino 			      }
20275f2eab64SJohn Marino 			    return REG_EBRACE;
20285f2eab64SJohn Marino 			  }
20295f2eab64SJohn Marino 			ctx->re++;
20305f2eab64SJohn Marino 			tmp[i] = 0;
20315f2eab64SJohn Marino 			val = strtol(tmp, NULL, 16);
20325f2eab64SJohn Marino 			result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
20335f2eab64SJohn Marino 						     ctx->position);
20345f2eab64SJohn Marino 			ctx->position++;
20355f2eab64SJohn Marino 			break;
20365f2eab64SJohn Marino 		      }
20375f2eab64SJohn Marino 		    /*FALLTHROUGH*/
20385f2eab64SJohn Marino 
20395f2eab64SJohn Marino 		  default:
2040d5f8dde1SJohn Marino 		  unenhanced_backslash:
2041d5f8dde1SJohn Marino 		    if ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) !=
2042d5f8dde1SJohn Marino 			REG_EXTENDED &&
2043d5f8dde1SJohn Marino 			tre_isdigit_l(*ctx->re, ctx->loc) && *ctx->re != L'0')
20445f2eab64SJohn Marino 		      {
2045d5f8dde1SJohn Marino 			/* Back reference (only in BRE or enhanced). */
20465f2eab64SJohn Marino 			int val = *ctx->re - L'0';
20475f2eab64SJohn Marino 			DPRINT(("tre_parse:     backref: '%.*" STRF "'\n",
20485f2eab64SJohn Marino 				REST(ctx->re - 1)));
20495f2eab64SJohn Marino 			result = tre_ast_new_literal(ctx->mem, BACKREF, val,
20505f2eab64SJohn Marino 						     ctx->position);
20515f2eab64SJohn Marino 			if (result == NULL)
20525f2eab64SJohn Marino 			  return REG_ESPACE;
2053d5f8dde1SJohn Marino 
2054d5f8dde1SJohn Marino 			/* Set the backref with a submatch id in the invisible
2055d5f8dde1SJohn Marino 			 * range (which will be overridden if a real submatch
2056d5f8dde1SJohn Marino 			 * is needed) */
2057d5f8dde1SJohn Marino 			result->submatch_id = ctx->submatch_id_invisible++;
2058d5f8dde1SJohn Marino 
20595f2eab64SJohn Marino 			ctx->position++;
2060d5f8dde1SJohn Marino 			ctx->num_reorder_tags++;
20615f2eab64SJohn Marino 			ctx->max_backref = MAX(val, ctx->max_backref);
20625f2eab64SJohn Marino 			ctx->re++;
20635f2eab64SJohn Marino 		      }
20645f2eab64SJohn Marino 		    else
20655f2eab64SJohn Marino 		      {
20665f2eab64SJohn Marino 			/* Escaped character. */
20675f2eab64SJohn Marino 			DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n",
20685f2eab64SJohn Marino 				REST(ctx->re - 1)));
20695f2eab64SJohn Marino 			result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
20705f2eab64SJohn Marino 						     ctx->position);
20715f2eab64SJohn Marino 			ctx->position++;
20725f2eab64SJohn Marino 			ctx->re++;
20735f2eab64SJohn Marino 		      }
20745f2eab64SJohn Marino 		    break;
20755f2eab64SJohn Marino 		  }
20765f2eab64SJohn Marino 		if (result == NULL)
20775f2eab64SJohn Marino 		  return REG_ESPACE;
20785f2eab64SJohn Marino 		break;
20795f2eab64SJohn Marino 
20805f2eab64SJohn Marino 	      case CHAR_PERIOD:	 /* the any-symbol */
20815f2eab64SJohn Marino 		DPRINT(("tre_parse:	  any: '%.*" STRF "'\n",
20825f2eab64SJohn Marino 			REST(ctx->re)));
20835f2eab64SJohn Marino 		if (ctx->cflags & REG_NEWLINE)
20845f2eab64SJohn Marino 		  {
20855f2eab64SJohn Marino 		    tre_ast_node_t *tmp1;
20865f2eab64SJohn Marino 		    tre_ast_node_t *tmp2;
20875f2eab64SJohn Marino 		    tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
20885f2eab64SJohn Marino 					       ctx->position);
20895f2eab64SJohn Marino 		    if (!tmp1)
20905f2eab64SJohn Marino 		      return REG_ESPACE;
20915f2eab64SJohn Marino 		    tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
20925f2eab64SJohn Marino 					       ctx->position + 1);
20935f2eab64SJohn Marino 		    if (!tmp2)
20945f2eab64SJohn Marino 		      return REG_ESPACE;
20955f2eab64SJohn Marino 		    result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
20965f2eab64SJohn Marino 		    if (!result)
20975f2eab64SJohn Marino 		      return REG_ESPACE;
20985f2eab64SJohn Marino 		    ctx->position += 2;
20995f2eab64SJohn Marino 		  }
21005f2eab64SJohn Marino 		else
21015f2eab64SJohn Marino 		  {
21025f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
21035f2eab64SJohn Marino 						 ctx->position);
21045f2eab64SJohn Marino 		    if (!result)
21055f2eab64SJohn Marino 		      return REG_ESPACE;
21065f2eab64SJohn Marino 		    ctx->position++;
21075f2eab64SJohn Marino 		  }
21085f2eab64SJohn Marino 		ctx->re++;
21095f2eab64SJohn Marino 		break;
21105f2eab64SJohn Marino 
21115f2eab64SJohn Marino 	      case CHAR_CARET:	 /* beginning of line assertion */
2112d5f8dde1SJohn Marino 		/* '^' has a special meaning everywhere in EREs, at the
2113d5f8dde1SJohn Marino 		   beginning of the RE and after \( is BREs.  It is also
2114d5f8dde1SJohn Marino 		   special in enhanced BREs at the beginning of each branches
2115d5f8dde1SJohn Marino 		   of a union */
21165f2eab64SJohn Marino 		if (ctx->cflags & REG_EXTENDED
2117d5f8dde1SJohn Marino 		    || bre_branch_begin
21185f2eab64SJohn Marino 		    || ctx->re == ctx->re_start)
21195f2eab64SJohn Marino 		  {
21205f2eab64SJohn Marino 		    DPRINT(("tre_parse:	      BOL: '%.*" STRF "'\n",
21215f2eab64SJohn Marino 			    REST(ctx->re)));
21225f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
21235f2eab64SJohn Marino 						 ASSERT_AT_BOL, -1);
21245f2eab64SJohn Marino 		    if (result == NULL)
21255f2eab64SJohn Marino 		      return REG_ESPACE;
21265f2eab64SJohn Marino 		    ctx->re++;
21275f2eab64SJohn Marino 		  }
21285f2eab64SJohn Marino 		else
21295f2eab64SJohn Marino 		  goto parse_literal;
21305f2eab64SJohn Marino 		break;
21315f2eab64SJohn Marino 
21325f2eab64SJohn Marino 	      case CHAR_DOLLAR:	 /* end of line assertion. */
21335f2eab64SJohn Marino 		/* '$' is special everywhere in EREs, and in the end of the
21345f2eab64SJohn Marino 		   string and before \) is BREs. */
21355f2eab64SJohn Marino 		if (ctx->cflags & REG_EXTENDED
21365f2eab64SJohn Marino 		    || (ctx->re + 2 < ctx->re_end
21375f2eab64SJohn Marino 			&& *(ctx->re + 1) == CHAR_BACKSLASH
21385f2eab64SJohn Marino 			&& *(ctx->re + 2) == CHAR_RPAREN)
21395f2eab64SJohn Marino 		    || ctx->re + 1 == ctx->re_end)
21405f2eab64SJohn Marino 		  {
21415f2eab64SJohn Marino 		    DPRINT(("tre_parse:	      EOL: '%.*" STRF "'\n",
21425f2eab64SJohn Marino 			    REST(ctx->re)));
21435f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
21445f2eab64SJohn Marino 						 ASSERT_AT_EOL, -1);
21455f2eab64SJohn Marino 		    if (result == NULL)
21465f2eab64SJohn Marino 		      return REG_ESPACE;
21475f2eab64SJohn Marino 		    ctx->re++;
21485f2eab64SJohn Marino 		  }
21495f2eab64SJohn Marino 		else
21505f2eab64SJohn Marino 		  goto parse_literal;
21515f2eab64SJohn Marino 		break;
21525f2eab64SJohn Marino 
21535f2eab64SJohn Marino 	      default:
21545f2eab64SJohn Marino 	      parse_literal:
21555f2eab64SJohn Marino 
21565f2eab64SJohn Marino 		if (temporary_cflags && ctx->re + 1 < ctx->re_end
21575f2eab64SJohn Marino 		    && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
21585f2eab64SJohn Marino 		  {
21595f2eab64SJohn Marino 		    DPRINT(("tre_parse:	 end tmps: '%.*" STRF "'\n",
21605f2eab64SJohn Marino 			    REST(ctx->re)));
21615f2eab64SJohn Marino 		    ctx->cflags &= ~temporary_cflags;
21625f2eab64SJohn Marino 		    temporary_cflags = 0;
21635f2eab64SJohn Marino 		    ctx->re += 2;
2164d5f8dde1SJohn Marino 		    if (ctx->re < ctx->re_end)
2165d5f8dde1SJohn Marino 		      {
2166d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, 0);
2167d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, PARSE_ATOM);
2168d5f8dde1SJohn Marino 		      }
2169d5f8dde1SJohn Marino 		    else
2170d5f8dde1SJohn Marino 		      {
2171d5f8dde1SJohn Marino 			result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
2172d5f8dde1SJohn Marino 			if (!result) return REG_ESPACE;
2173d5f8dde1SJohn Marino 		      }
21745f2eab64SJohn Marino 		    break;
21755f2eab64SJohn Marino 		  }
21765f2eab64SJohn Marino 
21775f2eab64SJohn Marino 
21785f2eab64SJohn Marino 		/* We are expecting an atom.  If the subexpression (or the whole
21795f2eab64SJohn Marino 		   regexp ends here, we interpret it as an empty expression
2180d5f8dde1SJohn Marino 		   (which matches an empty string), which is an error.
2181d5f8dde1SJohn Marino 		   Iterations of an empty expression is also an error. */
21825f2eab64SJohn Marino #ifdef REG_LITERAL
2183d5f8dde1SJohn Marino 		if (!(ctx->cflags & REG_LITERAL))
21845f2eab64SJohn Marino 		  {
2185d5f8dde1SJohn Marino #endif /* REG_LITERAL */
2186d5f8dde1SJohn Marino 		    /* error on end of string */
2187d5f8dde1SJohn Marino 		    if (ctx->re >= ctx->re_end) return depth > 0 ? REG_EPAREN
2188d5f8dde1SJohn Marino 						       : REG_EMPTY;
2189d5f8dde1SJohn Marino 		    /* error on unions and iterations of empty expressions */
2190d5f8dde1SJohn Marino 		    if (ctx->cflags & REG_EXTENDED)
2191d5f8dde1SJohn Marino 		      {
2192d5f8dde1SJohn Marino 			if (ctx->re < ctx->re_end)
2193d5f8dde1SJohn Marino 			  {
2194d5f8dde1SJohn Marino 			    if (*ctx->re == CHAR_PIPE) return REG_EMPTY;
2195d5f8dde1SJohn Marino 			    if (*ctx->re == CHAR_LBRACE)
2196d5f8dde1SJohn Marino 			      {
2197d5f8dde1SJohn Marino 				ctx->re++;
2198d5f8dde1SJohn Marino 		  empty_parse_bound:
2199d5f8dde1SJohn Marino 				/* We need to parse the bound first and return
2200d5f8dde1SJohn Marino 				   any error, before returning REG_BADRPT */
2201d5f8dde1SJohn Marino 				status = tre_parse_bound(ctx, NULL);
2202d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
2203d5f8dde1SJohn Marino 				/* For ERE, if REG_NOMATCH is returned, we
2204d5f8dde1SJohn Marino 				   treat the lbrace as a literal. */
2205d5f8dde1SJohn Marino 				if (status == REG_NOMATCH)
2206d5f8dde1SJohn Marino 				  {
2207d5f8dde1SJohn Marino 				    ctx->re--;
2208d5f8dde1SJohn Marino 				    /* Drop down to literal-handling code */
22095f2eab64SJohn Marino 				  }
2210d5f8dde1SJohn Marino 				else
2211d5f8dde1SJohn Marino 				  {
2212d5f8dde1SJohn Marino #endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
2213d5f8dde1SJohn Marino 				    if (status != REG_OK)
2214d5f8dde1SJohn Marino 				      return status;
2215d5f8dde1SJohn Marino 				    return REG_BADRPT;
2216d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
2217d5f8dde1SJohn Marino 				  }
2218d5f8dde1SJohn Marino #endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
2219d5f8dde1SJohn Marino 			      }
2220d5f8dde1SJohn Marino #ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
2221d5f8dde1SJohn Marino 			    else
2222d5f8dde1SJohn Marino #endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
2223d5f8dde1SJohn Marino 			    if (*ctx->re == CHAR_STAR
2224d5f8dde1SJohn Marino 				|| *ctx->re == CHAR_PLUS
2225d5f8dde1SJohn Marino 				|| *ctx->re == CHAR_QUESTIONMARK)
2226d5f8dde1SJohn Marino 			      {
2227d5f8dde1SJohn Marino 				return REG_BADRPT;
2228d5f8dde1SJohn Marino 			      }
2229d5f8dde1SJohn Marino 			  }
2230d5f8dde1SJohn Marino 		      }
2231d5f8dde1SJohn Marino 		    else if (ctx->re + 1 < ctx->re_end
2232d5f8dde1SJohn Marino 			     && *ctx->re == CHAR_BACKSLASH
2233d5f8dde1SJohn Marino 			     && *(ctx->re + 1) == CHAR_LBRACE)
2234d5f8dde1SJohn Marino 		      {
2235d5f8dde1SJohn Marino 			ctx->re += 2;
2236d5f8dde1SJohn Marino 			goto empty_parse_bound;
2237d5f8dde1SJohn Marino 		      }
2238d5f8dde1SJohn Marino #ifdef REG_LITERAL
2239d5f8dde1SJohn Marino 		  }
2240d5f8dde1SJohn Marino #endif /* REG_LITERAL */
22415f2eab64SJohn Marino 
22425f2eab64SJohn Marino 		DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
22435f2eab64SJohn Marino 			REST(ctx->re)));
22445f2eab64SJohn Marino 		/* Note that we can't use an tre_isalpha() test here, since there
22455f2eab64SJohn Marino 		   may be characters which are alphabetic but neither upper or
22465f2eab64SJohn Marino 		   lower case. */
22475f2eab64SJohn Marino 		if (ctx->cflags & REG_ICASE
2248d5f8dde1SJohn Marino 		    && (tre_isupper_l(*ctx->re, ctx->loc) ||
2249d5f8dde1SJohn Marino 		    tre_islower_l(*ctx->re, ctx->loc)))
22505f2eab64SJohn Marino 		  {
22515f2eab64SJohn Marino 		    tre_ast_node_t *tmp1;
22525f2eab64SJohn Marino 		    tre_ast_node_t *tmp2;
22535f2eab64SJohn Marino 
22545f2eab64SJohn Marino 		    /* XXX - Can there be more than one opposite-case
22555f2eab64SJohn Marino 		       counterpoints for some character in some locale?  Or
22565f2eab64SJohn Marino 		       more than two characters which all should be regarded
22575f2eab64SJohn Marino 		       the same character if case is ignored?  If yes, there
22585f2eab64SJohn Marino 		       does not seem to be a portable way to detect it.  I guess
22595f2eab64SJohn Marino 		       that at least for multi-character collating elements there
22605f2eab64SJohn Marino 		       could be several opposite-case counterpoints, but they
22615f2eab64SJohn Marino 		       cannot be supported portably anyway. */
2262d5f8dde1SJohn Marino 		    tmp1 = tre_ast_new_literal(ctx->mem,
2263d5f8dde1SJohn Marino 					       tre_toupper_l(*ctx->re, ctx->loc),
2264d5f8dde1SJohn Marino 					       tre_toupper_l(*ctx->re, ctx->loc),
22655f2eab64SJohn Marino 					       ctx->position);
22665f2eab64SJohn Marino 		    if (!tmp1)
22675f2eab64SJohn Marino 		      return REG_ESPACE;
2268d5f8dde1SJohn Marino 		    tmp2 = tre_ast_new_literal(ctx->mem,
2269d5f8dde1SJohn Marino 					       tre_tolower_l(*ctx->re, ctx->loc),
2270d5f8dde1SJohn Marino 					       tre_tolower_l(*ctx->re, ctx->loc),
22715f2eab64SJohn Marino 					       ctx->position);
22725f2eab64SJohn Marino 		    if (!tmp2)
22735f2eab64SJohn Marino 		      return REG_ESPACE;
22745f2eab64SJohn Marino 		    result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
22755f2eab64SJohn Marino 		    if (!result)
22765f2eab64SJohn Marino 		      return REG_ESPACE;
22775f2eab64SJohn Marino 		  }
22785f2eab64SJohn Marino 		else
22795f2eab64SJohn Marino 		  {
22805f2eab64SJohn Marino 		    result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
22815f2eab64SJohn Marino 						 ctx->position);
22825f2eab64SJohn Marino 		    if (!result)
22835f2eab64SJohn Marino 		      return REG_ESPACE;
22845f2eab64SJohn Marino 		  }
22855f2eab64SJohn Marino 		ctx->position++;
22865f2eab64SJohn Marino 		ctx->re++;
22875f2eab64SJohn Marino 		break;
22885f2eab64SJohn Marino 	      }
22895f2eab64SJohn Marino 	    break;
2290d5f8dde1SJohn Marino 	  }
22915f2eab64SJohn Marino 
22925f2eab64SJohn Marino 	case PARSE_MARK_FOR_SUBMATCH:
22935f2eab64SJohn Marino 	  {
22945f2eab64SJohn Marino 	    int submatch_id = tre_stack_pop_int(stack);
22955f2eab64SJohn Marino 
2296d5f8dde1SJohn Marino 	    ctx->cflags = tre_stack_pop_int(stack); /* restore cflags */
2297d5f8dde1SJohn Marino 	    if (result->submatch_id >= 0 &&
2298d5f8dde1SJohn Marino 		result->submatch_id < SUBMATCH_ID_INVISIBLE_START)
22995f2eab64SJohn Marino 	      {
23005f2eab64SJohn Marino 		tre_ast_node_t *n, *tmp_node;
2301d5f8dde1SJohn Marino 		if (submatch_id >= SUBMATCH_ID_INVISIBLE_START)
2302d5f8dde1SJohn Marino 		  break;
23035f2eab64SJohn Marino 		n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
23045f2eab64SJohn Marino 		if (n == NULL)
23055f2eab64SJohn Marino 		  return REG_ESPACE;
23065f2eab64SJohn Marino 		tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
23075f2eab64SJohn Marino 		if (tmp_node == NULL)
23085f2eab64SJohn Marino 		  return REG_ESPACE;
23095f2eab64SJohn Marino 		tmp_node->num_submatches = result->num_submatches;
23105f2eab64SJohn Marino 		result = tmp_node;
23115f2eab64SJohn Marino 	      }
23125f2eab64SJohn Marino 	    result->submatch_id = submatch_id;
2313d5f8dde1SJohn Marino 	    if (submatch_id < SUBMATCH_ID_INVISIBLE_START)
23145f2eab64SJohn Marino 	      result->num_submatches++;
23155f2eab64SJohn Marino 	    break;
23165f2eab64SJohn Marino 	  }
23175f2eab64SJohn Marino 
23185f2eab64SJohn Marino 	default:
23195f2eab64SJohn Marino 	  assert(0);
23205f2eab64SJohn Marino 	  break;
23215f2eab64SJohn Marino 	}
23225f2eab64SJohn Marino     }
23235f2eab64SJohn Marino 
23245f2eab64SJohn Marino   /* Check for missing closing parentheses. */
23255f2eab64SJohn Marino   if (depth > 0)
23265f2eab64SJohn Marino     return REG_EPAREN;
23275f2eab64SJohn Marino 
23285f2eab64SJohn Marino   ctx->result = result;
23295f2eab64SJohn Marino 
2330d5f8dde1SJohn Marino   return REG_OK;
23315f2eab64SJohn Marino }
23325f2eab64SJohn Marino 
23335f2eab64SJohn Marino /* EOF */
2334