xref: /dflybsd-src/contrib/tre/lib/tre-compile.c (revision d5f8dde1e2bae450329b3ee8c25ef109b5713f07)
15f2eab64SJohn Marino /*
25f2eab64SJohn Marino   tre-compile.c - TRE regex compiler
35f2eab64SJohn Marino 
45f2eab64SJohn Marino   This software is released under a BSD-style license.
55f2eab64SJohn Marino   See the file LICENSE for details and copyright.
65f2eab64SJohn Marino 
75f2eab64SJohn Marino */
85f2eab64SJohn Marino 
95f2eab64SJohn Marino /*
105f2eab64SJohn Marino   TODO:
115f2eab64SJohn Marino    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
125f2eab64SJohn Marino      function calls.
135f2eab64SJohn Marino */
145f2eab64SJohn Marino 
155f2eab64SJohn Marino 
165f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
175f2eab64SJohn Marino #include <config.h>
185f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
195f2eab64SJohn Marino #include <stdio.h>
205f2eab64SJohn Marino #include <assert.h>
215f2eab64SJohn Marino #include <string.h>
22*d5f8dde1SJohn Marino #include <limits.h>
235f2eab64SJohn Marino 
245f2eab64SJohn Marino #include "tre-internal.h"
255f2eab64SJohn Marino #include "tre-mem.h"
265f2eab64SJohn Marino #include "tre-stack.h"
275f2eab64SJohn Marino #include "tre-ast.h"
285f2eab64SJohn Marino #include "tre-parse.h"
295f2eab64SJohn Marino #include "tre-compile.h"
305f2eab64SJohn Marino #include "tre.h"
315f2eab64SJohn Marino #include "xmalloc.h"
325f2eab64SJohn Marino 
335f2eab64SJohn Marino /*
34*d5f8dde1SJohn Marino   The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
35*d5f8dde1SJohn Marino */
36*d5f8dde1SJohn Marino #undef bit_ffs
37*d5f8dde1SJohn Marino #define	bit_ffs(name, nbits, value) { \
38*d5f8dde1SJohn Marino 	register bitstr_t *_name = name; \
39*d5f8dde1SJohn Marino 	register int _byte, _nbits = nbits; \
40*d5f8dde1SJohn Marino 	register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41*d5f8dde1SJohn Marino 	for (_byte = 0; _byte <= _stopbyte; ++_byte) \
42*d5f8dde1SJohn Marino 		if (_name[_byte]) { \
43*d5f8dde1SJohn Marino 			_value = _byte << 3; \
44*d5f8dde1SJohn Marino 			for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45*d5f8dde1SJohn Marino 			    ++_value, _stopbyte >>= 1); \
46*d5f8dde1SJohn Marino 			break; \
47*d5f8dde1SJohn Marino 		} \
48*d5f8dde1SJohn Marino 	*(value) = _value; \
49*d5f8dde1SJohn Marino }
50*d5f8dde1SJohn Marino 
51*d5f8dde1SJohn Marino /*
525f2eab64SJohn Marino   Algorithms to setup tags so that submatch addressing can be done.
535f2eab64SJohn Marino */
545f2eab64SJohn Marino 
555f2eab64SJohn Marino 
56*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
57*d5f8dde1SJohn Marino static const char *tag_dir_str[] = {
58*d5f8dde1SJohn Marino   "minimize",
59*d5f8dde1SJohn Marino   "maximize",
60*d5f8dde1SJohn Marino   "left-maximize"
61*d5f8dde1SJohn Marino };
62*d5f8dde1SJohn Marino 
63*d5f8dde1SJohn Marino static const char _indent[] = "  ";
64*d5f8dde1SJohn Marino 
65*d5f8dde1SJohn Marino static void
print_indent(int indent)66*d5f8dde1SJohn Marino print_indent(int indent)
67*d5f8dde1SJohn Marino {
68*d5f8dde1SJohn Marino   while (indent-- > 0)
69*d5f8dde1SJohn Marino     DPRINT((_indent));
70*d5f8dde1SJohn Marino }
71*d5f8dde1SJohn Marino 
72*d5f8dde1SJohn Marino static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
73*d5f8dde1SJohn Marino 				   int num_tags);
74*d5f8dde1SJohn Marino static void
print_last_match_branch_pre(tre_last_matched_branch_pre_t * branch,int indent,int num_tags)75*d5f8dde1SJohn Marino print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
76*d5f8dde1SJohn Marino 			    int num_tags)
77*d5f8dde1SJohn Marino {
78*d5f8dde1SJohn Marino   tre_last_matched_pre_t *u = branch->last_matched;
79*d5f8dde1SJohn Marino   int n_last_matched = 0;
80*d5f8dde1SJohn Marino 
81*d5f8dde1SJohn Marino   while (u)
82*d5f8dde1SJohn Marino     {
83*d5f8dde1SJohn Marino       n_last_matched++;
84*d5f8dde1SJohn Marino       u = u->next;
85*d5f8dde1SJohn Marino     }
86*d5f8dde1SJohn Marino 
87*d5f8dde1SJohn Marino   print_indent(indent);
88*d5f8dde1SJohn Marino   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89*d5f8dde1SJohn Marino 	  branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
90*d5f8dde1SJohn Marino   print_indent(indent);
91*d5f8dde1SJohn Marino   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
92*d5f8dde1SJohn Marino 	  n_last_matched));
93*d5f8dde1SJohn Marino   if (branch->n_last_matched != n_last_matched)
94*d5f8dde1SJohn Marino     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95*d5f8dde1SJohn Marino   if (branch->cmp_tag > 0)
96*d5f8dde1SJohn Marino     {
97*d5f8dde1SJohn Marino       int i;
98*d5f8dde1SJohn Marino       const char *sep = " tags=";
99*d5f8dde1SJohn Marino       print_indent(indent);
100*d5f8dde1SJohn Marino       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
101*d5f8dde1SJohn Marino       for (i = 0; i < num_tags; i++)
102*d5f8dde1SJohn Marino 	if (bit_test(branch->tags, i))
103*d5f8dde1SJohn Marino 	  {
104*d5f8dde1SJohn Marino 	    DPRINT(("%s%d", sep, i));
105*d5f8dde1SJohn Marino 	    sep = ",";
106*d5f8dde1SJohn Marino 	  }
107*d5f8dde1SJohn Marino       DPRINT(("\n"));
108*d5f8dde1SJohn Marino     }
109*d5f8dde1SJohn Marino 
110*d5f8dde1SJohn Marino   u = branch->last_matched;
111*d5f8dde1SJohn Marino   indent++;
112*d5f8dde1SJohn Marino   while (u)
113*d5f8dde1SJohn Marino     {
114*d5f8dde1SJohn Marino       print_last_matched_pre(u, indent, num_tags);
115*d5f8dde1SJohn Marino       u = u->next;
116*d5f8dde1SJohn Marino     }
117*d5f8dde1SJohn Marino }
118*d5f8dde1SJohn Marino 
119*d5f8dde1SJohn Marino static void
print_last_matched_pre(tre_last_matched_pre_t * lm,int indent,int num_tags)120*d5f8dde1SJohn Marino print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
121*d5f8dde1SJohn Marino {
122*d5f8dde1SJohn Marino   tre_last_matched_branch_pre_t *b = lm->branches;
123*d5f8dde1SJohn Marino   int n_branches = 0;
124*d5f8dde1SJohn Marino 
125*d5f8dde1SJohn Marino   while (b)
126*d5f8dde1SJohn Marino     {
127*d5f8dde1SJohn Marino       n_branches++;
128*d5f8dde1SJohn Marino       b = b->next;
129*d5f8dde1SJohn Marino     }
130*d5f8dde1SJohn Marino 
131*d5f8dde1SJohn Marino   print_indent(indent);
132*d5f8dde1SJohn Marino   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133*d5f8dde1SJohn Marino 	  lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
134*d5f8dde1SJohn Marino   print_indent(indent);
135*d5f8dde1SJohn Marino   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
136*d5f8dde1SJohn Marino 	  lm->n_branches, n_branches));
137*d5f8dde1SJohn Marino   if (lm->n_branches != n_branches)
138*d5f8dde1SJohn Marino     DPRINT(("*** mismatch between n and branches ***\n"));
139*d5f8dde1SJohn Marino 
140*d5f8dde1SJohn Marino   b = lm->branches;
141*d5f8dde1SJohn Marino   indent++;
142*d5f8dde1SJohn Marino   while (b)
143*d5f8dde1SJohn Marino     {
144*d5f8dde1SJohn Marino       print_last_match_branch_pre(b, indent, num_tags);
145*d5f8dde1SJohn Marino       b = b->next;
146*d5f8dde1SJohn Marino     }
147*d5f8dde1SJohn Marino }
148*d5f8dde1SJohn Marino 
149*d5f8dde1SJohn Marino static void print_last_matched(tre_last_matched_t *lm, int indent);
150*d5f8dde1SJohn Marino static void
print_last_match_branch(tre_last_matched_branch_t * branch,int indent)151*d5f8dde1SJohn Marino print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
152*d5f8dde1SJohn Marino {
153*d5f8dde1SJohn Marino   tre_last_matched_t *u;
154*d5f8dde1SJohn Marino   int i;
155*d5f8dde1SJohn Marino 
156*d5f8dde1SJohn Marino   print_indent(indent);
157*d5f8dde1SJohn Marino   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
158*d5f8dde1SJohn Marino   if (branch->cmp_tag > 0)
159*d5f8dde1SJohn Marino     {
160*d5f8dde1SJohn Marino       print_indent(indent);
161*d5f8dde1SJohn Marino       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
162*d5f8dde1SJohn Marino       if (branch->n_tags > 0)
163*d5f8dde1SJohn Marino 	{
164*d5f8dde1SJohn Marino 	  const char *sep = " tags=";
165*d5f8dde1SJohn Marino 	  for (i = 0; i < branch->n_tags; i++)
166*d5f8dde1SJohn Marino 	    {
167*d5f8dde1SJohn Marino 	      DPRINT(("%s%d", sep, branch->tags[i]));
168*d5f8dde1SJohn Marino 	      sep = ",";
169*d5f8dde1SJohn Marino 	    }
170*d5f8dde1SJohn Marino 	}
171*d5f8dde1SJohn Marino       DPRINT(("\n"));
172*d5f8dde1SJohn Marino     }
173*d5f8dde1SJohn Marino 
174*d5f8dde1SJohn Marino   u = branch->last_matched;
175*d5f8dde1SJohn Marino   indent++;
176*d5f8dde1SJohn Marino   for (i = branch->n_last_matched; i > 0; i--, u++)
177*d5f8dde1SJohn Marino     print_last_matched(u, indent);
178*d5f8dde1SJohn Marino }
179*d5f8dde1SJohn Marino 
180*d5f8dde1SJohn Marino static void
print_last_matched(tre_last_matched_t * lm,int indent)181*d5f8dde1SJohn Marino print_last_matched(tre_last_matched_t *lm, int indent)
182*d5f8dde1SJohn Marino {
183*d5f8dde1SJohn Marino   int i;
184*d5f8dde1SJohn Marino   tre_last_matched_branch_t *b;
185*d5f8dde1SJohn Marino 
186*d5f8dde1SJohn Marino   print_indent(indent);
187*d5f8dde1SJohn Marino   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
188*d5f8dde1SJohn Marino 	  lm->start_tag));
189*d5f8dde1SJohn Marino 
190*d5f8dde1SJohn Marino   b = lm->branches;
191*d5f8dde1SJohn Marino   indent++;
192*d5f8dde1SJohn Marino   for (i = lm->n_branches; i > 0; i--, b++)
193*d5f8dde1SJohn Marino     print_last_match_branch(b, indent);
194*d5f8dde1SJohn Marino }
195*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
196*d5f8dde1SJohn Marino 
197*d5f8dde1SJohn Marino 
198*d5f8dde1SJohn Marino /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199*d5f8dde1SJohn Marino    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
200*d5f8dde1SJohn Marino    create an unset tre_last_matched_branch_pre_t. */
201*d5f8dde1SJohn Marino static reg_errcode_t
tre_merge_branches(tre_mem_t mem,tre_ast_node_t * dst,tre_ast_node_t * src,int tag_id,int num_tags)202*d5f8dde1SJohn Marino tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
203*d5f8dde1SJohn Marino 		   int tag_id, int num_tags)
204*d5f8dde1SJohn Marino {
205*d5f8dde1SJohn Marino   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
206*d5f8dde1SJohn Marino   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
207*d5f8dde1SJohn Marino 
208*d5f8dde1SJohn Marino   if (db)
209*d5f8dde1SJohn Marino     {
210*d5f8dde1SJohn Marino       if (sb)
211*d5f8dde1SJohn Marino 	{
212*d5f8dde1SJohn Marino 	  bitstr_t *l = db->tags;
213*d5f8dde1SJohn Marino 	  bitstr_t *r = sb->tags;
214*d5f8dde1SJohn Marino 	  int i = bitstr_size(num_tags);
215*d5f8dde1SJohn Marino 
216*d5f8dde1SJohn Marino 	  while(i-- > 0)
217*d5f8dde1SJohn Marino 	    *l++ |= *r++;
218*d5f8dde1SJohn Marino 	  /* db and sb are the info from two parallel sub-trees, so the tags
219*d5f8dde1SJohn Marino 	     must be mutually exclusive, and we can just add their numbers */
220*d5f8dde1SJohn Marino 	  db->n_tags += sb->n_tags;
221*d5f8dde1SJohn Marino 	  db->tot_tags += sb->tot_tags;
222*d5f8dde1SJohn Marino 	  if (db->last_matched)
223*d5f8dde1SJohn Marino 	    {
224*d5f8dde1SJohn Marino 	      if (sb->last_matched)
225*d5f8dde1SJohn Marino 		{
226*d5f8dde1SJohn Marino 		  tre_last_matched_pre_t *u = db->last_matched;
227*d5f8dde1SJohn Marino 
228*d5f8dde1SJohn Marino 		  while(u->next)
229*d5f8dde1SJohn Marino 		    u = u->next;
230*d5f8dde1SJohn Marino 		  u->next = sb->last_matched;
231*d5f8dde1SJohn Marino 		  db->n_last_matched += sb->n_last_matched;
232*d5f8dde1SJohn Marino 		  db->tot_branches += sb->tot_branches;
233*d5f8dde1SJohn Marino 		  db->tot_last_matched += sb->tot_last_matched;
234*d5f8dde1SJohn Marino 		}
235*d5f8dde1SJohn Marino 	    }
236*d5f8dde1SJohn Marino 	    else if (sb->last_matched)
237*d5f8dde1SJohn Marino 	      {
238*d5f8dde1SJohn Marino 		db->last_matched = sb->last_matched;
239*d5f8dde1SJohn Marino 		db->n_last_matched = sb->n_last_matched;
240*d5f8dde1SJohn Marino 		db->tot_branches = sb->tot_branches;
241*d5f8dde1SJohn Marino 		db->tot_last_matched = sb->tot_last_matched;
242*d5f8dde1SJohn Marino 	      }
243*d5f8dde1SJohn Marino 	}
244*d5f8dde1SJohn Marino     }
245*d5f8dde1SJohn Marino   else
246*d5f8dde1SJohn Marino     db = sb;
247*d5f8dde1SJohn Marino 
248*d5f8dde1SJohn Marino   if (tag_id != 0)
249*d5f8dde1SJohn Marino     {
250*d5f8dde1SJohn Marino       if (!db)
251*d5f8dde1SJohn Marino 	{
252*d5f8dde1SJohn Marino 	  db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
253*d5f8dde1SJohn Marino 			      + bitstr_size(num_tags));
254*d5f8dde1SJohn Marino 	  if (db == NULL)
255*d5f8dde1SJohn Marino 	    return REG_ESPACE;
256*d5f8dde1SJohn Marino 	  db->tot_branches = 1;
257*d5f8dde1SJohn Marino 	}
258*d5f8dde1SJohn Marino       if (tag_id > 0)
259*d5f8dde1SJohn Marino 	{
260*d5f8dde1SJohn Marino 	  /* tag_id is a new tag, and shouldn't exist in db's tags,
261*d5f8dde1SJohn Marino 	     so we can always increment n_tags */
262*d5f8dde1SJohn Marino 	  bit_set(db->tags, tag_id);
263*d5f8dde1SJohn Marino 	  db->n_tags++;
264*d5f8dde1SJohn Marino 	  db->tot_tags++;
265*d5f8dde1SJohn Marino 	}
266*d5f8dde1SJohn Marino     }
267*d5f8dde1SJohn Marino   dst->last_matched_branch = db;
268*d5f8dde1SJohn Marino   return REG_OK;
269*d5f8dde1SJohn Marino }
270*d5f8dde1SJohn Marino 
271*d5f8dde1SJohn Marino 
2725f2eab64SJohn Marino /* Inserts a catenation node to the root of the tree given in `node'.
2735f2eab64SJohn Marino    As the left child a new tag with number `tag_id' to `node' is added,
2745f2eab64SJohn Marino    and the right child is the old root. */
2755f2eab64SJohn Marino static reg_errcode_t
tre_add_tag_left(tre_mem_t mem,tre_ast_node_t * node,int tag_id)2765f2eab64SJohn Marino tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
2775f2eab64SJohn Marino {
2785f2eab64SJohn Marino   tre_catenation_t *c;
2795f2eab64SJohn Marino 
2805f2eab64SJohn Marino   DPRINT(("add_tag_left: tag %d\n", tag_id));
2815f2eab64SJohn Marino 
2825f2eab64SJohn Marino   c = tre_mem_alloc(mem, sizeof(*c));
2835f2eab64SJohn Marino   if (c == NULL)
2845f2eab64SJohn Marino     return REG_ESPACE;
2855f2eab64SJohn Marino   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
2865f2eab64SJohn Marino   if (c->left == NULL)
2875f2eab64SJohn Marino     return REG_ESPACE;
288*d5f8dde1SJohn Marino   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
2895f2eab64SJohn Marino   if (c->right == NULL)
2905f2eab64SJohn Marino     return REG_ESPACE;
2915f2eab64SJohn Marino 
2925f2eab64SJohn Marino   c->right->obj = node->obj;
2935f2eab64SJohn Marino   c->right->type = node->type;
294*d5f8dde1SJohn Marino   c->right->last_matched_branch = node->last_matched_branch;
2955f2eab64SJohn Marino   c->right->nullable = -1;
2965f2eab64SJohn Marino   c->right->submatch_id = -1;
2975f2eab64SJohn Marino   node->obj = c;
2985f2eab64SJohn Marino   node->type = CATENATION;
299*d5f8dde1SJohn Marino   node->original = c->right;
3005f2eab64SJohn Marino   return REG_OK;
3015f2eab64SJohn Marino }
3025f2eab64SJohn Marino 
3035f2eab64SJohn Marino /* Inserts a catenation node to the root of the tree given in `node'.
3045f2eab64SJohn Marino    As the right child a new tag with number `tag_id' to `node' is added,
3055f2eab64SJohn Marino    and the left child is the old root. */
3065f2eab64SJohn Marino static reg_errcode_t
tre_add_tag_right(tre_mem_t mem,tre_ast_node_t * node,int tag_id)3075f2eab64SJohn Marino tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
3085f2eab64SJohn Marino {
3095f2eab64SJohn Marino   tre_catenation_t *c;
3105f2eab64SJohn Marino 
3115f2eab64SJohn Marino   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
3125f2eab64SJohn Marino 
3135f2eab64SJohn Marino   c = tre_mem_alloc(mem, sizeof(*c));
3145f2eab64SJohn Marino   if (c == NULL)
3155f2eab64SJohn Marino     return REG_ESPACE;
3165f2eab64SJohn Marino   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
3175f2eab64SJohn Marino   if (c->right == NULL)
3185f2eab64SJohn Marino     return REG_ESPACE;
319*d5f8dde1SJohn Marino   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
3205f2eab64SJohn Marino   if (c->left == NULL)
3215f2eab64SJohn Marino     return REG_ESPACE;
3225f2eab64SJohn Marino 
3235f2eab64SJohn Marino   c->left->obj = node->obj;
3245f2eab64SJohn Marino   c->left->type = node->type;
325*d5f8dde1SJohn Marino   c->left->last_matched_branch = node->last_matched_branch;
3265f2eab64SJohn Marino   c->left->nullable = -1;
3275f2eab64SJohn Marino   c->left->submatch_id = -1;
3285f2eab64SJohn Marino   node->obj = c;
3295f2eab64SJohn Marino   node->type = CATENATION;
330*d5f8dde1SJohn Marino   node->original = c->left;
3315f2eab64SJohn Marino   return REG_OK;
3325f2eab64SJohn Marino }
3335f2eab64SJohn Marino 
3345f2eab64SJohn Marino typedef enum {
3355f2eab64SJohn Marino   ADDTAGS_RECURSE,
336*d5f8dde1SJohn Marino   ADDTAGS_RECURSE_NOT_TOP_UNION,
3375f2eab64SJohn Marino   ADDTAGS_AFTER_ITERATION,
3385f2eab64SJohn Marino   ADDTAGS_AFTER_UNION_LEFT,
3395f2eab64SJohn Marino   ADDTAGS_AFTER_UNION_RIGHT,
3405f2eab64SJohn Marino   ADDTAGS_AFTER_CAT_LEFT,
3415f2eab64SJohn Marino   ADDTAGS_AFTER_CAT_RIGHT,
342*d5f8dde1SJohn Marino   ADDTAGS_SET_SUBMATCH_END,
343*d5f8dde1SJohn Marino   ADDTAGS_UNION_RECURSE,
344*d5f8dde1SJohn Marino   ADDTAGS_UNION_RIGHT_RECURSE,
345*d5f8dde1SJohn Marino   ADDTAGS_AFTER_UNION_TOP,
3465f2eab64SJohn Marino } tre_addtags_symbol_t;
3475f2eab64SJohn Marino 
348*d5f8dde1SJohn Marino enum {
349*d5f8dde1SJohn Marino   COPY_LAST_MATCHED_BRANCH,
350*d5f8dde1SJohn Marino   COPY_LAST_MATCHED_BRANCH_NEXT,
351*d5f8dde1SJohn Marino   COPY_LAST_MATCHED,
352*d5f8dde1SJohn Marino   COPY_LAST_MATCHED_NEXT,
353*d5f8dde1SJohn Marino };
3545f2eab64SJohn Marino 
3555f2eab64SJohn Marino 
356*d5f8dde1SJohn Marino #define REGSET_UNSET		((unsigned)-1)
3575f2eab64SJohn Marino 
3585f2eab64SJohn Marino /* Go through `regset' and set submatch data for submatches that are
3595f2eab64SJohn Marino    using this tag. */
3605f2eab64SJohn Marino static void
tre_purge_regset(unsigned * regset,tre_tnfa_t * tnfa,int tag)361*d5f8dde1SJohn Marino tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
3625f2eab64SJohn Marino {
3635f2eab64SJohn Marino   int i;
3645f2eab64SJohn Marino 
365*d5f8dde1SJohn Marino   for (i = 0; regset[i] != REGSET_UNSET; i++)
3665f2eab64SJohn Marino     {
3675f2eab64SJohn Marino       int id = regset[i] / 2;
3685f2eab64SJohn Marino       int start = !(regset[i] % 2);
369*d5f8dde1SJohn Marino       if (id >= SUBMATCH_ID_INVISIBLE_START)
370*d5f8dde1SJohn Marino 	continue;
3715f2eab64SJohn Marino       DPRINT(("  Using tag %d for %s offset of "
3725f2eab64SJohn Marino 	      "submatch %d\n", tag,
3735f2eab64SJohn Marino 	      start ? "start" : "end", id));
3745f2eab64SJohn Marino       if (start)
3755f2eab64SJohn Marino 	tnfa->submatch_data[id].so_tag = tag;
3765f2eab64SJohn Marino       else
3775f2eab64SJohn Marino 	tnfa->submatch_data[id].eo_tag = tag;
3785f2eab64SJohn Marino     }
3795f2eab64SJohn Marino   regset[0] = -1;
3805f2eab64SJohn Marino }
3815f2eab64SJohn Marino 
3825f2eab64SJohn Marino 
383*d5f8dde1SJohn Marino #define REGSET_HAS_STARTS	0x1
384*d5f8dde1SJohn Marino #define REGSET_HAS_ENDS		0x2
385*d5f8dde1SJohn Marino 
386*d5f8dde1SJohn Marino 
3875f2eab64SJohn Marino /* Adds tags to appropriate locations in the parse tree in `tree', so that
3885f2eab64SJohn Marino    subexpressions marked for submatch addressing can be traced. */
3895f2eab64SJohn Marino static reg_errcode_t
tre_add_tags(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree,tre_tnfa_t * tnfa)3905f2eab64SJohn Marino tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
3915f2eab64SJohn Marino 	     tre_tnfa_t *tnfa)
3925f2eab64SJohn Marino {
3935f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
3945f2eab64SJohn Marino   tre_addtags_symbol_t symbol;
3955f2eab64SJohn Marino   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
3965f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
3975f2eab64SJohn Marino   /* True for first pass (counting number of needed tags) */
3985f2eab64SJohn Marino   int first_pass = (mem == NULL || tnfa == NULL);
399*d5f8dde1SJohn Marino   unsigned *regset, *orig_regset;
400*d5f8dde1SJohn Marino   int regset_contains = 0;
4015f2eab64SJohn Marino   int num_tags = 0; /* Total number of tags. */
4025f2eab64SJohn Marino   int num_minimals = 0;	 /* Number of special minimal tags. */
4035f2eab64SJohn Marino   int tag = 0;	    /* The tag that is to be added next. */
4045f2eab64SJohn Marino   int next_tag = 1; /* Next tag to use after this one. */
4055f2eab64SJohn Marino   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
406*d5f8dde1SJohn Marino   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
407*d5f8dde1SJohn Marino 		             * the first is the tag to reorder, the second
408*d5f8dde1SJohn Marino 		             * is the tag after which the first is reordered */
409*d5f8dde1SJohn Marino   int *rtp;		    /* Pointer used to fill in reorder_tags and
410*d5f8dde1SJohn Marino 			     * tag_order */
411*d5f8dde1SJohn Marino   int *to_reorder;          /* Transform array converting sequential order to
412*d5f8dde1SJohn Marino 		             * that specified by reorder_tags */
413*d5f8dde1SJohn Marino   int id;
4145f2eab64SJohn Marino 
415*d5f8dde1SJohn Marino   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
4165f2eab64SJohn Marino   if (!first_pass)
4175f2eab64SJohn Marino     {
418*d5f8dde1SJohn Marino       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
4195f2eab64SJohn Marino       tnfa->end_tag = 0;
4205f2eab64SJohn Marino       tnfa->minimal_tags[0] = -1;
4215f2eab64SJohn Marino     }
4225f2eab64SJohn Marino 
423*d5f8dde1SJohn Marino   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
424*d5f8dde1SJohn Marino 		   + tnfa->num_submatches_invisible + 1) * 2));
4255f2eab64SJohn Marino   if (regset == NULL)
426*d5f8dde1SJohn Marino     {
427*d5f8dde1SJohn Marino       status = REG_ESPACE;
428*d5f8dde1SJohn Marino       goto error_regset;
429*d5f8dde1SJohn Marino     }
430*d5f8dde1SJohn Marino   regset[0] = REGSET_UNSET;
4315f2eab64SJohn Marino   orig_regset = regset;
4325f2eab64SJohn Marino 
433*d5f8dde1SJohn Marino   if (!first_pass)
4345f2eab64SJohn Marino     {
435*d5f8dde1SJohn Marino       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436*d5f8dde1SJohn Marino        * to_reorder in one batch (assuming all are the same type) */
437*d5f8dde1SJohn Marino       rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
438*d5f8dde1SJohn Marino 				   ((2 * tnfa->num_reorder_tags + 1) +
439*d5f8dde1SJohn Marino 				   tnfa->num_tags));
440*d5f8dde1SJohn Marino       if (reorder_tags == NULL)
441*d5f8dde1SJohn Marino 	{
442*d5f8dde1SJohn Marino 	  status = REG_ESPACE;
443*d5f8dde1SJohn Marino 	  goto error_reorder_tags;
4445f2eab64SJohn Marino 	}
445*d5f8dde1SJohn Marino       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
4465f2eab64SJohn Marino     }
4475f2eab64SJohn Marino 
4485f2eab64SJohn Marino   STACK_PUSH(stack, voidptr, node);
4495f2eab64SJohn Marino   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
4505f2eab64SJohn Marino 
4515f2eab64SJohn Marino   while (tre_stack_num_objects(stack) > bottom)
4525f2eab64SJohn Marino     {
4535f2eab64SJohn Marino       if (status != REG_OK)
4545f2eab64SJohn Marino 	break;
4555f2eab64SJohn Marino 
4565f2eab64SJohn Marino       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
4575f2eab64SJohn Marino       switch (symbol)
4585f2eab64SJohn Marino 	{
459*d5f8dde1SJohn Marino 	  int top_union;
4605f2eab64SJohn Marino 
4615f2eab64SJohn Marino 	case ADDTAGS_SET_SUBMATCH_END:
4625f2eab64SJohn Marino 	  {
4635f2eab64SJohn Marino 	    int i;
4645f2eab64SJohn Marino 
465*d5f8dde1SJohn Marino 	    id = tre_stack_pop_int(stack);
466*d5f8dde1SJohn Marino 	    node = tre_stack_pop_voidptr(stack);
4675f2eab64SJohn Marino 	    /* Add end of this submatch to regset. */
468*d5f8dde1SJohn Marino 	    for (i = 0; regset[i] != REGSET_UNSET; i++);
4695f2eab64SJohn Marino 	    regset[i] = id * 2 + 1;
4705f2eab64SJohn Marino 	    regset[i + 1] = -1;
471*d5f8dde1SJohn Marino 	    regset_contains |= REGSET_HAS_ENDS;
4725f2eab64SJohn Marino 
473*d5f8dde1SJohn Marino 	    /* Always put a tag after a minimal iterator. */
474*d5f8dde1SJohn Marino 	    if (minimal_tag >= 0)
475*d5f8dde1SJohn Marino 	      {
476*d5f8dde1SJohn Marino 		if (first_pass)
477*d5f8dde1SJohn Marino 		  {
478*d5f8dde1SJohn Marino 		    node->num_tags++;
479*d5f8dde1SJohn Marino 		    DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
480*d5f8dde1SJohn Marino 			    node->num_tags));
481*d5f8dde1SJohn Marino 		  }
482*d5f8dde1SJohn Marino 		else
483*d5f8dde1SJohn Marino 		  {
484*d5f8dde1SJohn Marino 		    int i;
485*d5f8dde1SJohn Marino 		    status = tre_merge_branches(mem, node, NULL, tag,
486*d5f8dde1SJohn Marino 						tnfa->num_tags);
487*d5f8dde1SJohn Marino 		    if (status != REG_OK)
488*d5f8dde1SJohn Marino 		      break;
489*d5f8dde1SJohn Marino 		    status = tre_add_tag_right(mem, node, tag);
490*d5f8dde1SJohn Marino 		    if (status != REG_OK)
491*d5f8dde1SJohn Marino 		      break;
492*d5f8dde1SJohn Marino 		    tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
493*d5f8dde1SJohn Marino 		    DPRINT(("Setting t%d direction to %s\n", tag,
494*d5f8dde1SJohn Marino 			    tag_dir_str[tnfa->tag_directions[tag]]));
495*d5f8dde1SJohn Marino 		    DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
496*d5f8dde1SJohn Marino 		    for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
497*d5f8dde1SJohn Marino 		    tnfa->minimal_tags[i] = tag;
498*d5f8dde1SJohn Marino 		    tnfa->minimal_tags[i + 1] = minimal_tag;
499*d5f8dde1SJohn Marino 		    tnfa->minimal_tags[i + 2] = -1;
500*d5f8dde1SJohn Marino 
501*d5f8dde1SJohn Marino 		    DPRINT(("  Minimal end: t%d reordered to "
502*d5f8dde1SJohn Marino 			    "after t%d\n", tag, minimal_tag));
503*d5f8dde1SJohn Marino 		    /* Append to tag_order, move "tag" after
504*d5f8dde1SJohn Marino 		     * "minimal_tag" */
505*d5f8dde1SJohn Marino 		    *rtp++ = tag;
506*d5f8dde1SJohn Marino 		    *rtp++ = minimal_tag;
507*d5f8dde1SJohn Marino 
508*d5f8dde1SJohn Marino 		    num_minimals++;
509*d5f8dde1SJohn Marino 		    tre_purge_regset(regset, tnfa, tag);
510*d5f8dde1SJohn Marino 		  }
511*d5f8dde1SJohn Marino 
512*d5f8dde1SJohn Marino 		minimal_tag = -1;
513*d5f8dde1SJohn Marino 		DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
514*d5f8dde1SJohn Marino 		regset[0] = REGSET_UNSET;
515*d5f8dde1SJohn Marino 		regset_contains = 0;
516*d5f8dde1SJohn Marino 		tag = next_tag;
517*d5f8dde1SJohn Marino 		num_tags++;
518*d5f8dde1SJohn Marino 		next_tag++;
519*d5f8dde1SJohn Marino 	      }
5205f2eab64SJohn Marino 	    break;
5215f2eab64SJohn Marino 	  }
5225f2eab64SJohn Marino 
523*d5f8dde1SJohn Marino 	case ADDTAGS_RECURSE_NOT_TOP_UNION:
524*d5f8dde1SJohn Marino 	  /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525*d5f8dde1SJohn Marino 	   * indicating that if a union is being processed, it is not the
526*d5f8dde1SJohn Marino 	   * top-most of a series */
527*d5f8dde1SJohn Marino 	  top_union = 0;
528*d5f8dde1SJohn Marino 	  goto do_addtags_recurse;
529*d5f8dde1SJohn Marino 
5305f2eab64SJohn Marino 	case ADDTAGS_RECURSE:
531*d5f8dde1SJohn Marino 	  /* Setting top_union to 1 means that if a union is begin processed,
532*d5f8dde1SJohn Marino 	   * it is the top-most of a series, and should recurse through the
533*d5f8dde1SJohn Marino 	   * series to set the left_tag and right_tag values */
534*d5f8dde1SJohn Marino 	  top_union = 1;
535*d5f8dde1SJohn Marino 
536*d5f8dde1SJohn Marino do_addtags_recurse:
5375f2eab64SJohn Marino 	  node = tre_stack_pop_voidptr(stack);
5385f2eab64SJohn Marino 
539*d5f8dde1SJohn Marino 	  id = node->submatch_id;
540*d5f8dde1SJohn Marino 	  if (id >= 0)
5415f2eab64SJohn Marino 	    {
5425f2eab64SJohn Marino 	      int i;
5435f2eab64SJohn Marino 
5445f2eab64SJohn Marino 
5455f2eab64SJohn Marino 	      /* Add start of this submatch to regset. */
546*d5f8dde1SJohn Marino 	      for (i = 0; regset[i] != REGSET_UNSET; i++);
5475f2eab64SJohn Marino 	      regset[i] = id * 2;
5485f2eab64SJohn Marino 	      regset[i + 1] = -1;
549*d5f8dde1SJohn Marino 	      regset_contains |= REGSET_HAS_STARTS;
5505f2eab64SJohn Marino 
5515f2eab64SJohn Marino 	      /* Add end of this submatch to regset after processing this
5525f2eab64SJohn Marino 		 node. */
553*d5f8dde1SJohn Marino 	      STACK_PUSH(stack, voidptr, node);
554*d5f8dde1SJohn Marino 	      STACK_PUSHX(stack, int, id);
5555f2eab64SJohn Marino 	      STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
5565f2eab64SJohn Marino 	    }
5575f2eab64SJohn Marino 
5585f2eab64SJohn Marino 	  switch (node->type)
5595f2eab64SJohn Marino 	    {
5605f2eab64SJohn Marino 	    case LITERAL:
5615f2eab64SJohn Marino 	      {
5625f2eab64SJohn Marino 		tre_literal_t *lit = node->obj;
5635f2eab64SJohn Marino 
564*d5f8dde1SJohn Marino 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
5655f2eab64SJohn Marino 		  {
5665f2eab64SJohn Marino 		    DPRINT(("Literal %d-%d\n",
5675f2eab64SJohn Marino 			    (int)lit->code_min, (int)lit->code_max));
568*d5f8dde1SJohn Marino 		    if (regset_contains)
5695f2eab64SJohn Marino 		      {
5705f2eab64SJohn Marino 			/* Regset is not empty, so add a tag before the
5715f2eab64SJohn Marino 			   literal or backref. */
572*d5f8dde1SJohn Marino 			if (first_pass)
5735f2eab64SJohn Marino 			  {
574*d5f8dde1SJohn Marino 			    DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
575*d5f8dde1SJohn Marino 			    node->num_tags = 1;
5765f2eab64SJohn Marino 			  }
5775f2eab64SJohn Marino 			else
5785f2eab64SJohn Marino 			  {
579*d5f8dde1SJohn Marino 			    status = tre_merge_branches(mem, node, NULL, tag,
580*d5f8dde1SJohn Marino 							tnfa->num_tags);
581*d5f8dde1SJohn Marino 			    if (status != REG_OK)
582*d5f8dde1SJohn Marino 			      break;
583*d5f8dde1SJohn Marino 			    status = tre_add_tag_left(mem, node, tag);
584*d5f8dde1SJohn Marino 			    if (status != REG_OK)
585*d5f8dde1SJohn Marino 			      break;
586*d5f8dde1SJohn Marino 			    if (regset_contains == REGSET_HAS_STARTS)
587*d5f8dde1SJohn Marino 			      tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
588*d5f8dde1SJohn Marino 			    else
589*d5f8dde1SJohn Marino 			      tnfa->tag_directions[tag] = direction;
590*d5f8dde1SJohn Marino 			    DPRINT(("Setting t%d direction to %s\n", tag,
591*d5f8dde1SJohn Marino 				    tag_dir_str[tnfa->tag_directions[tag]]));
592*d5f8dde1SJohn Marino 			    tre_purge_regset(regset, tnfa, tag);
593*d5f8dde1SJohn Marino 
594*d5f8dde1SJohn Marino 			    if (IS_BACKREF(lit))
595*d5f8dde1SJohn Marino 			      {
596*d5f8dde1SJohn Marino 				int b = lit->code_max;
597*d5f8dde1SJohn Marino 				int t = tnfa->submatch_data[b].so_tag;
598*d5f8dde1SJohn Marino 				/* Fail if the referenced submatch hasn't been
599*d5f8dde1SJohn Marino 				 * completed yet */
600*d5f8dde1SJohn Marino 				if (tnfa->submatch_data[b].eo_tag < 0)
601*d5f8dde1SJohn Marino 				  {
602*d5f8dde1SJohn Marino 				    status = REG_ESUBREG;
603*d5f8dde1SJohn Marino 				    break;
604*d5f8dde1SJohn Marino 				  }
605*d5f8dde1SJohn Marino 				if (t < tag)
606*d5f8dde1SJohn Marino 				  {
607*d5f8dde1SJohn Marino 				    DPRINT(("  Backref %d start: "
608*d5f8dde1SJohn Marino 					    "t%d reordered to before t%d\n",
609*d5f8dde1SJohn Marino 					    b, tag, t));
610*d5f8dde1SJohn Marino 				    if(t > 0)
611*d5f8dde1SJohn Marino 				      t--;
612*d5f8dde1SJohn Marino 				    /* Append to tag_order, move "tag" after
613*d5f8dde1SJohn Marino 				     * "t" */
614*d5f8dde1SJohn Marino 				    *rtp++ = tag;
615*d5f8dde1SJohn Marino 				    *rtp++ = t;
616*d5f8dde1SJohn Marino 				  }
617*d5f8dde1SJohn Marino #if TRE_DEBUG
618*d5f8dde1SJohn Marino 				else
619*d5f8dde1SJohn Marino 				  DPRINT(("  Backref %d start: "
620*d5f8dde1SJohn Marino 					  "(t%d already before t%d)\n",
621*d5f8dde1SJohn Marino 					    b, tag, t));
622*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
623*d5f8dde1SJohn Marino 			      }
6245f2eab64SJohn Marino 			  }
6255f2eab64SJohn Marino 
626*d5f8dde1SJohn Marino 			DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
627*d5f8dde1SJohn Marino 				tag));
628*d5f8dde1SJohn Marino 			regset[0] = REGSET_UNSET;
629*d5f8dde1SJohn Marino 			regset_contains = 0;
6305f2eab64SJohn Marino 			tag = next_tag;
6315f2eab64SJohn Marino 			num_tags++;
6325f2eab64SJohn Marino 			next_tag++;
6335f2eab64SJohn Marino 		      }
6345f2eab64SJohn Marino 		  }
6355f2eab64SJohn Marino 		else
6365f2eab64SJohn Marino 		  {
6375f2eab64SJohn Marino 		    assert(!IS_TAG(lit));
6385f2eab64SJohn Marino 		  }
6395f2eab64SJohn Marino 		break;
6405f2eab64SJohn Marino 	      }
6415f2eab64SJohn Marino 	    case CATENATION:
6425f2eab64SJohn Marino 	      {
6435f2eab64SJohn Marino 		tre_catenation_t *cat = node->obj;
6445f2eab64SJohn Marino 		tre_ast_node_t *left = cat->left;
6455f2eab64SJohn Marino 		tre_ast_node_t *right = cat->right;
6465f2eab64SJohn Marino 		int reserved_tag = -1;
6475f2eab64SJohn Marino 		DPRINT(("Catenation, next_tag = %d\n", next_tag));
6485f2eab64SJohn Marino 
6495f2eab64SJohn Marino 
6505f2eab64SJohn Marino 		/* After processing right child. */
6515f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, node);
6525f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
6535f2eab64SJohn Marino 
6545f2eab64SJohn Marino 		/* Process right child. */
6555f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, right);
6565f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6575f2eab64SJohn Marino 
6585f2eab64SJohn Marino 		/* After processing left child. */
6595f2eab64SJohn Marino 		STACK_PUSHX(stack, int, next_tag + left->num_tags);
6605f2eab64SJohn Marino 		DPRINT(("  Pushing %d for after left\n",
6615f2eab64SJohn Marino 			next_tag + left->num_tags));
6625f2eab64SJohn Marino 		if (left->num_tags > 0 && right->num_tags > 0)
6635f2eab64SJohn Marino 		  {
6645f2eab64SJohn Marino 		    /* Reserve the next tag to the right child. */
665*d5f8dde1SJohn Marino 		    DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
666*d5f8dde1SJohn Marino 			    "Reserving next_tag %d to right child\n",
6675f2eab64SJohn Marino 			    next_tag));
6685f2eab64SJohn Marino 		    reserved_tag = next_tag;
6695f2eab64SJohn Marino 		    next_tag++;
6705f2eab64SJohn Marino 		  }
6715f2eab64SJohn Marino 		STACK_PUSHX(stack, int, reserved_tag);
6725f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
6735f2eab64SJohn Marino 
6745f2eab64SJohn Marino 		/* Process left child. */
6755f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, left);
6765f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6775f2eab64SJohn Marino 
6785f2eab64SJohn Marino 		}
6795f2eab64SJohn Marino 	      break;
6805f2eab64SJohn Marino 	    case ITERATION:
6815f2eab64SJohn Marino 	      {
6825f2eab64SJohn Marino 		tre_iteration_t *iter = node->obj;
6835f2eab64SJohn Marino 		DPRINT(("Iteration\n"));
6845f2eab64SJohn Marino 
6855f2eab64SJohn Marino 		if (first_pass)
686*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, int, regset_contains != 0);
6875f2eab64SJohn Marino 		STACK_PUSHX(stack, int, tag);
6885f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, node);
6895f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
6905f2eab64SJohn Marino 
6915f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, iter->arg);
6925f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6935f2eab64SJohn Marino 
694*d5f8dde1SJohn Marino 		/* Regset is not empty, so add a tag here (this always happens
695*d5f8dde1SJohn Marino 		   because iterators always get submatch id, even if in the
696*d5f8dde1SJohn Marino 		   invisible range) */
697*d5f8dde1SJohn Marino 		if (regset_contains)
6985f2eab64SJohn Marino 		  {
6995f2eab64SJohn Marino 		    if (!first_pass)
7005f2eab64SJohn Marino 		      {
701*d5f8dde1SJohn Marino 			status = tre_merge_branches(mem, node, NULL, tag,
702*d5f8dde1SJohn Marino 						    tnfa->num_tags);
703*d5f8dde1SJohn Marino 			if (status != REG_OK)
704*d5f8dde1SJohn Marino 			  break;
7055f2eab64SJohn Marino 			status = tre_add_tag_left(mem, node, tag);
706*d5f8dde1SJohn Marino 			if (status != REG_OK)
707*d5f8dde1SJohn Marino 			  break;
708*d5f8dde1SJohn Marino 			if (regset_contains == REGSET_HAS_STARTS && tag != 0)
709*d5f8dde1SJohn Marino 			  tnfa->tag_directions[tag] = iter->minimal ?
710*d5f8dde1SJohn Marino 						      TRE_TAG_MINIMIZE :
711*d5f8dde1SJohn Marino 						      TRE_TAG_LEFT_MAXIMIZE;
7125f2eab64SJohn Marino 			else
7135f2eab64SJohn Marino 			  tnfa->tag_directions[tag] = direction;
714*d5f8dde1SJohn Marino 			DPRINT(("Setting t%d direction to %s\n", tag,
715*d5f8dde1SJohn Marino 				tag_dir_str[tnfa->tag_directions[tag]]));
7165f2eab64SJohn Marino 			tre_purge_regset(regset, tnfa, tag);
7175f2eab64SJohn Marino 		      }
7185f2eab64SJohn Marino 
719*d5f8dde1SJohn Marino 		    DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
720*d5f8dde1SJohn Marino 			    tag));
721*d5f8dde1SJohn Marino 		    regset[0] = REGSET_UNSET;
722*d5f8dde1SJohn Marino 		    regset_contains = 0;
7235f2eab64SJohn Marino 		    tag = next_tag;
7245f2eab64SJohn Marino 		    num_tags++;
7255f2eab64SJohn Marino 		    next_tag++;
7265f2eab64SJohn Marino 		  }
727*d5f8dde1SJohn Marino 		direction = TRE_TAG_LEFT_MAXIMIZE;
728*d5f8dde1SJohn Marino 		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
7295f2eab64SJohn Marino 	      }
7305f2eab64SJohn Marino 	      break;
7315f2eab64SJohn Marino 	    case UNION:
7325f2eab64SJohn Marino 	      {
733*d5f8dde1SJohn Marino 		tre_union_t *uni;
734*d5f8dde1SJohn Marino 		tre_ast_node_t *left;
735*d5f8dde1SJohn Marino 		tre_ast_node_t *right;
736*d5f8dde1SJohn Marino 		int front_tag = -1;
7375f2eab64SJohn Marino 
7385f2eab64SJohn Marino 		DPRINT(("Union\n"));
7395f2eab64SJohn Marino 
740*d5f8dde1SJohn Marino 		if (regset_contains)
741*d5f8dde1SJohn Marino 		  {
742*d5f8dde1SJohn Marino 		    DPRINT(("  UNION num_tags++ tag=%d\n", tag));
743*d5f8dde1SJohn Marino 		    front_tag = tag;
744*d5f8dde1SJohn Marino 		    tag = next_tag;
745*d5f8dde1SJohn Marino 		    num_tags++;
746*d5f8dde1SJohn Marino 		    next_tag++;
747*d5f8dde1SJohn Marino 		  }
748*d5f8dde1SJohn Marino 
749*d5f8dde1SJohn Marino 		/* For the top union, walk the tree of consecutive unions,
750*d5f8dde1SJohn Marino 		 * setting the left_tag and right_tag values in increasing
751*d5f8dde1SJohn Marino 		 * order (left to right priority) */
752*d5f8dde1SJohn Marino 		if (top_union &&
753*d5f8dde1SJohn Marino 		    (node->num_submatches -
754*d5f8dde1SJohn Marino 		    (node->submatch_id >= 0 &&
755*d5f8dde1SJohn Marino 		    node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
756*d5f8dde1SJohn Marino 		  {
757*d5f8dde1SJohn Marino 		    tre_ast_node_t *n;
758*d5f8dde1SJohn Marino 		    int last = tre_stack_num_objects(stack);
759*d5f8dde1SJohn Marino 
760*d5f8dde1SJohn Marino 		    STACK_PUSH(stack, voidptr, node);
761*d5f8dde1SJohn Marino 		    STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
762*d5f8dde1SJohn Marino 
763*d5f8dde1SJohn Marino 		    while (tre_stack_num_objects(stack) > last)
764*d5f8dde1SJohn Marino 		      {
765*d5f8dde1SJohn Marino 			symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
766*d5f8dde1SJohn Marino 			switch (symbol)
767*d5f8dde1SJohn Marino 			  {
768*d5f8dde1SJohn Marino 			  case ADDTAGS_UNION_RECURSE:
769*d5f8dde1SJohn Marino 			    n = tre_stack_pop_voidptr(stack);
770*d5f8dde1SJohn Marino 			    uni = n->obj;
771*d5f8dde1SJohn Marino 			    left = uni->left;
772*d5f8dde1SJohn Marino 
773*d5f8dde1SJohn Marino 			    /* Since the top union has num_submatches > 0,
774*d5f8dde1SJohn Marino 			     * we set all the consecutive union's
775*d5f8dde1SJohn Marino 			     * make_branches to 1 to force the generation
776*d5f8dde1SJohn Marino 			     * of end tags for each union branch. */
777*d5f8dde1SJohn Marino 			    n->make_branches = 1;
778*d5f8dde1SJohn Marino 
779*d5f8dde1SJohn Marino 			    STACK_PUSH(stack, voidptr, n);
780*d5f8dde1SJohn Marino 			    STACK_PUSH(stack, int,
781*d5f8dde1SJohn Marino 				       ADDTAGS_UNION_RIGHT_RECURSE);
782*d5f8dde1SJohn Marino 
783*d5f8dde1SJohn Marino 			    if (left->type == UNION)
784*d5f8dde1SJohn Marino 			      {
785*d5f8dde1SJohn Marino 				STACK_PUSH(stack, voidptr, left);
786*d5f8dde1SJohn Marino 				STACK_PUSH(stack, int,
787*d5f8dde1SJohn Marino 					   ADDTAGS_UNION_RECURSE);
788*d5f8dde1SJohn Marino 			      }
789*d5f8dde1SJohn Marino 			    else
790*d5f8dde1SJohn Marino 			      {
791*d5f8dde1SJohn Marino 				DPRINT(("  ADDTAGS_UNION_RECURSE "
792*d5f8dde1SJohn Marino 					"num_tags++ tag=%d\n", tag));
793*d5f8dde1SJohn Marino 				uni->left_tag = tag;
794*d5f8dde1SJohn Marino 				tag = next_tag;
795*d5f8dde1SJohn Marino 				num_tags++;
796*d5f8dde1SJohn Marino 				next_tag++;
797*d5f8dde1SJohn Marino 			      }
798*d5f8dde1SJohn Marino 			    break;
799*d5f8dde1SJohn Marino 
800*d5f8dde1SJohn Marino 			  case ADDTAGS_UNION_RIGHT_RECURSE:
801*d5f8dde1SJohn Marino 			    n = tre_stack_pop_voidptr(stack);
802*d5f8dde1SJohn Marino 			    uni = n->obj;
803*d5f8dde1SJohn Marino 			    right = uni->right;
804*d5f8dde1SJohn Marino 
805*d5f8dde1SJohn Marino 			    if (right->type == UNION)
806*d5f8dde1SJohn Marino 			      {
807*d5f8dde1SJohn Marino 				STACK_PUSH(stack, voidptr, right);
808*d5f8dde1SJohn Marino 				STACK_PUSH(stack, int,
809*d5f8dde1SJohn Marino 					   ADDTAGS_UNION_RECURSE);
810*d5f8dde1SJohn Marino 			      }
811*d5f8dde1SJohn Marino 			    else
812*d5f8dde1SJohn Marino 			      {
813*d5f8dde1SJohn Marino 				DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
814*d5f8dde1SJohn Marino 					"num_tags++ tag=%d\n", tag));
815*d5f8dde1SJohn Marino 				uni->right_tag = tag;
816*d5f8dde1SJohn Marino 				tag = next_tag;
817*d5f8dde1SJohn Marino 				num_tags++;
818*d5f8dde1SJohn Marino 				next_tag++;
819*d5f8dde1SJohn Marino 			      }
820*d5f8dde1SJohn Marino 
821*d5f8dde1SJohn Marino 			    break;
822*d5f8dde1SJohn Marino 
823*d5f8dde1SJohn Marino 			  default:
824*d5f8dde1SJohn Marino 			    assert(0);
825*d5f8dde1SJohn Marino 			    break;
826*d5f8dde1SJohn Marino 
827*d5f8dde1SJohn Marino 			  } /* end switch(symbol) */
828*d5f8dde1SJohn Marino 		      } /* end while(tre_stack_num_objects(stack) > last */
829*d5f8dde1SJohn Marino 		    if (!first_pass)
830*d5f8dde1SJohn Marino 		      {
831*d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, front_tag);
8325f2eab64SJohn Marino 			STACK_PUSHX(stack, voidptr, node);
833*d5f8dde1SJohn Marino 			STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
834*d5f8dde1SJohn Marino 		      }
835*d5f8dde1SJohn Marino 		  } /* end if (top_union && ...) */
836*d5f8dde1SJohn Marino 
837*d5f8dde1SJohn Marino 		uni = node->obj;
838*d5f8dde1SJohn Marino 		left = uni->left;
839*d5f8dde1SJohn Marino 		right = uni->right;
840*d5f8dde1SJohn Marino 
841*d5f8dde1SJohn Marino 		/* After processing right child. */
842*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, regset);
843*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, regset_contains != 0);
844*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, node);
8455f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
8465f2eab64SJohn Marino 
8475f2eab64SJohn Marino 		/* Process right child. */
8485f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, right);
849*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
8505f2eab64SJohn Marino 
8515f2eab64SJohn Marino 		/* After processing left child. */
8525f2eab64SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
8535f2eab64SJohn Marino 
8545f2eab64SJohn Marino 		/* Process left child. */
8555f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, left);
856*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
8575f2eab64SJohn Marino 
8585f2eab64SJohn Marino 		/* Regset is not empty, so add a tag here. */
859*d5f8dde1SJohn Marino 		if (regset_contains)
8605f2eab64SJohn Marino 		  {
8615f2eab64SJohn Marino 		    if (!first_pass)
8625f2eab64SJohn Marino 		      {
863*d5f8dde1SJohn Marino 			status = tre_merge_branches(mem, node, NULL, front_tag,
864*d5f8dde1SJohn Marino 						    tnfa->num_tags);
865*d5f8dde1SJohn Marino 			if (status != REG_OK)
866*d5f8dde1SJohn Marino 			  break;
867*d5f8dde1SJohn Marino 			status = tre_add_tag_left(mem, node, front_tag);
868*d5f8dde1SJohn Marino 			if (status != REG_OK)
869*d5f8dde1SJohn Marino 			  break;
870*d5f8dde1SJohn Marino 			if (regset_contains == REGSET_HAS_STARTS)
871*d5f8dde1SJohn Marino 			  tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
872*d5f8dde1SJohn Marino 			else
873*d5f8dde1SJohn Marino 			  tnfa->tag_directions[front_tag] = direction;
874*d5f8dde1SJohn Marino 			DPRINT(("Setting t%d direction to %s\n", front_tag,
875*d5f8dde1SJohn Marino 				tag_dir_str[tnfa->tag_directions[front_tag]]));
876*d5f8dde1SJohn Marino 			tre_purge_regset(regset, tnfa, front_tag);
8775f2eab64SJohn Marino 		      }
8785f2eab64SJohn Marino 
879*d5f8dde1SJohn Marino 		    regset[0] = REGSET_UNSET;
880*d5f8dde1SJohn Marino 		    regset_contains = 0;
8815f2eab64SJohn Marino 		  }
8825f2eab64SJohn Marino 
8835f2eab64SJohn Marino 		break;
8845f2eab64SJohn Marino 	      }
885*d5f8dde1SJohn Marino 	    } /* end switch (node->type) */
8865f2eab64SJohn Marino 
8875f2eab64SJohn Marino 	  break; /* end case: ADDTAGS_RECURSE */
8885f2eab64SJohn Marino 
8895f2eab64SJohn Marino 	case ADDTAGS_AFTER_ITERATION:
8905f2eab64SJohn Marino 	  {
891*d5f8dde1SJohn Marino 	    tre_iteration_t *iter;
892*d5f8dde1SJohn Marino 	    tre_ast_node_t *orig;
8935f2eab64SJohn Marino 	    int enter_tag;
894*d5f8dde1SJohn Marino 
8955f2eab64SJohn Marino 	    node = tre_stack_pop_voidptr(stack);
896*d5f8dde1SJohn Marino 	    orig = node->original ? node->original : node;
897*d5f8dde1SJohn Marino 	    iter = (tre_iteration_t *)orig->obj;
8985f2eab64SJohn Marino 	    enter_tag = tre_stack_pop_int(stack);
899*d5f8dde1SJohn Marino 	    if (iter->minimal)
9005f2eab64SJohn Marino 	      minimal_tag = enter_tag;
9015f2eab64SJohn Marino 
9025f2eab64SJohn Marino 	    DPRINT(("After iteration\n"));
903*d5f8dde1SJohn Marino 	    if (first_pass)
9045f2eab64SJohn Marino 	      {
905*d5f8dde1SJohn Marino 		node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
906*d5f8dde1SJohn Marino 		DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
907*d5f8dde1SJohn Marino 			node->num_tags));
908*d5f8dde1SJohn Marino 	      }
909*d5f8dde1SJohn Marino 	    else
910*d5f8dde1SJohn Marino 	      {
911*d5f8dde1SJohn Marino 		/* node->last_matched_branch will have the start tag (the tag
912*d5f8dde1SJohn Marino 		   just *before* the iteration).  iter->arg->last_matched_branch
913*d5f8dde1SJohn Marino 		   will have the tag(s) inside the iteration, the ones that
914*d5f8dde1SJohn Marino 		   may need to be reset if the iteration doesn't match.  So
915*d5f8dde1SJohn Marino 		   before we merge iter->arg into node, we need to set up
916*d5f8dde1SJohn Marino 		   a new tre_last_matched_t and tre_last_matched_branch_t,
917*d5f8dde1SJohn Marino 		   using any of the inside tags as cmp_tag (we choose the first
918*d5f8dde1SJohn Marino 		   tag found by bit_ffs).  If there are no inside tags, we
919*d5f8dde1SJohn Marino 		   don't bother creating the extra structures. */
920*d5f8dde1SJohn Marino 		tre_last_matched_branch_pre_t *b =
921*d5f8dde1SJohn Marino 						iter->arg->last_matched_branch;
922*d5f8dde1SJohn Marino 
923*d5f8dde1SJohn Marino 		if (b && b->n_tags > 0)
924*d5f8dde1SJohn Marino 		  {
925*d5f8dde1SJohn Marino 		    tre_last_matched_pre_t *u;
926*d5f8dde1SJohn Marino 
927*d5f8dde1SJohn Marino 		    bit_ffs(b->tags, num_tags, &b->cmp_tag);
928*d5f8dde1SJohn Marino 		    DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
929*d5f8dde1SJohn Marino 			    "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
930*d5f8dde1SJohn Marino 
931*d5f8dde1SJohn Marino 		    u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
932*d5f8dde1SJohn Marino 				       sizeof(tre_last_matched_branch_pre_t)
933*d5f8dde1SJohn Marino 				       + bitstr_size(tnfa->num_tags));
934*d5f8dde1SJohn Marino 		    if (!u)
935*d5f8dde1SJohn Marino 		      {
936*d5f8dde1SJohn Marino 			status = REG_ESPACE;
937*d5f8dde1SJohn Marino 			break;
938*d5f8dde1SJohn Marino 		      }
939*d5f8dde1SJohn Marino 		    u->branches = b;
940*d5f8dde1SJohn Marino 		    u->n_branches = 1;
941*d5f8dde1SJohn Marino 		    u->start_tag = b->cmp_tag;
942*d5f8dde1SJohn Marino 		    u->tot_branches = b->tot_branches;
943*d5f8dde1SJohn Marino 		    u->tot_last_matched = 1 + b->tot_last_matched;
944*d5f8dde1SJohn Marino 		    u->tot_tags = b->tot_tags;
945*d5f8dde1SJohn Marino 
946*d5f8dde1SJohn Marino 		    b = (tre_last_matched_branch_pre_t *)(u + 1);
947*d5f8dde1SJohn Marino 		    b->last_matched = u;
948*d5f8dde1SJohn Marino 		    b->n_last_matched = 1;
949*d5f8dde1SJohn Marino 		    b->tot_branches = 1 + u->tot_branches;
950*d5f8dde1SJohn Marino 		    b->tot_last_matched = u->tot_last_matched;
951*d5f8dde1SJohn Marino 		    b->tot_tags = u->tot_tags;
952*d5f8dde1SJohn Marino 
953*d5f8dde1SJohn Marino 		    iter->arg->last_matched_branch = b;
954*d5f8dde1SJohn Marino 		  }
955*d5f8dde1SJohn Marino 		status = tre_merge_branches(mem, node, iter->arg, 0,
956*d5f8dde1SJohn Marino 					    tnfa->num_tags);
957*d5f8dde1SJohn Marino 		if (status != REG_OK)
958*d5f8dde1SJohn Marino 		  break;
959*d5f8dde1SJohn Marino 
960*d5f8dde1SJohn Marino 		if (iter->minimal)
961*d5f8dde1SJohn Marino 		  {
962*d5f8dde1SJohn Marino 		    /* Add a union with a left EMPTY literal and the right
963*d5f8dde1SJohn Marino 		       being iter->arg.  This should force the tags inside
964*d5f8dde1SJohn Marino 		       the minimal iteration to prefer being unset */
965*d5f8dde1SJohn Marino 		    if (iter->min == 0 && iter->max <= 1)
966*d5f8dde1SJohn Marino 		      {
967*d5f8dde1SJohn Marino 			tre_ast_node_t *u, *e;
968*d5f8dde1SJohn Marino 
969*d5f8dde1SJohn Marino 			e = tre_ast_new_literal(mem, EMPTY, -1, -1);
970*d5f8dde1SJohn Marino 			if (e == NULL)
971*d5f8dde1SJohn Marino 			  {
972*d5f8dde1SJohn Marino 			    status = REG_ESPACE;
973*d5f8dde1SJohn Marino 			    break;
974*d5f8dde1SJohn Marino 			  }
975*d5f8dde1SJohn Marino 			u = tre_ast_new_union(mem, e, iter->arg);
976*d5f8dde1SJohn Marino 			if (u == NULL)
977*d5f8dde1SJohn Marino 			  {
978*d5f8dde1SJohn Marino 			    status = REG_ESPACE;
979*d5f8dde1SJohn Marino 			    break;
980*d5f8dde1SJohn Marino 			  }
981*d5f8dde1SJohn Marino 			iter->arg = u;
982*d5f8dde1SJohn Marino 		      }
983*d5f8dde1SJohn Marino 
9845f2eab64SJohn Marino 		    direction = TRE_TAG_MINIMIZE;
985*d5f8dde1SJohn Marino 		  }
9865f2eab64SJohn Marino 		else
9875f2eab64SJohn Marino 		  direction = TRE_TAG_MAXIMIZE;
988*d5f8dde1SJohn Marino 		DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
9895f2eab64SJohn Marino 	      }
9905f2eab64SJohn Marino 	    break;
9915f2eab64SJohn Marino 	  }
9925f2eab64SJohn Marino 
9935f2eab64SJohn Marino 	case ADDTAGS_AFTER_CAT_LEFT:
9945f2eab64SJohn Marino 	  {
9955f2eab64SJohn Marino 	    int new_tag = tre_stack_pop_int(stack);
9965f2eab64SJohn Marino 	    next_tag = tre_stack_pop_int(stack);
9975f2eab64SJohn Marino 	    DPRINT(("After cat left, tag = %d, next_tag = %d\n",
9985f2eab64SJohn Marino 		    tag, next_tag));
9995f2eab64SJohn Marino 	    if (new_tag >= 0)
10005f2eab64SJohn Marino 	      {
10015f2eab64SJohn Marino 		DPRINT(("  Setting tag to %d\n", new_tag));
10025f2eab64SJohn Marino 		tag = new_tag;
10035f2eab64SJohn Marino 	      }
10045f2eab64SJohn Marino 	    break;
10055f2eab64SJohn Marino 	  }
10065f2eab64SJohn Marino 
10075f2eab64SJohn Marino 	case ADDTAGS_AFTER_CAT_RIGHT:
1008*d5f8dde1SJohn Marino 	  {
1009*d5f8dde1SJohn Marino 	    tre_catenation_t *cat;
1010*d5f8dde1SJohn Marino 
10115f2eab64SJohn Marino 	    DPRINT(("After cat right\n"));
10125f2eab64SJohn Marino 	    node = tre_stack_pop_voidptr(stack);
1013*d5f8dde1SJohn Marino 	    cat = node->obj;
10145f2eab64SJohn Marino 	    if (first_pass)
1015*d5f8dde1SJohn Marino 	      {
1016*d5f8dde1SJohn Marino 		node->num_tags = cat->left->num_tags + cat->right->num_tags;
1017*d5f8dde1SJohn Marino 		DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1018*d5f8dde1SJohn Marino 			node->num_tags));
1019*d5f8dde1SJohn Marino 	      }
1020*d5f8dde1SJohn Marino 	    else
1021*d5f8dde1SJohn Marino 	      {
1022*d5f8dde1SJohn Marino 		status = tre_merge_branches(mem, cat->left, cat->right, 0,
1023*d5f8dde1SJohn Marino 					    tnfa->num_tags);
1024*d5f8dde1SJohn Marino 		if (status != REG_OK)
10255f2eab64SJohn Marino 		  break;
1026*d5f8dde1SJohn Marino 		status = tre_merge_branches(mem, node, cat->left, 0,
1027*d5f8dde1SJohn Marino 					    tnfa->num_tags);
1028*d5f8dde1SJohn Marino 	      }
1029*d5f8dde1SJohn Marino 	    break;
1030*d5f8dde1SJohn Marino 	  }
10315f2eab64SJohn Marino 
10325f2eab64SJohn Marino 	case ADDTAGS_AFTER_UNION_LEFT:
10335f2eab64SJohn Marino 	  DPRINT(("After union left\n"));
10345f2eab64SJohn Marino 	  /* Lift the bottom of the `regset' array so that when processing
10355f2eab64SJohn Marino 	     the right operand the items currently in the array are
10365f2eab64SJohn Marino 	     invisible.	 The original bottom was saved at ADDTAGS_UNION and
10375f2eab64SJohn Marino 	     will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1038*d5f8dde1SJohn Marino 	  while (*regset != REGSET_UNSET)
10395f2eab64SJohn Marino 	    regset++;
1040*d5f8dde1SJohn Marino 	  regset_contains = 0;
10415f2eab64SJohn Marino 	  break;
10425f2eab64SJohn Marino 
10435f2eab64SJohn Marino 	case ADDTAGS_AFTER_UNION_RIGHT:
10445f2eab64SJohn Marino 	  {
1045*d5f8dde1SJohn Marino 	    int added_tags;
1046*d5f8dde1SJohn Marino 	    tre_ast_node_t *orig;
1047*d5f8dde1SJohn Marino 	    tre_union_t *uni;
1048*d5f8dde1SJohn Marino 	    /* Note: node may not be a UNION, but a CATENATION with a left
1049*d5f8dde1SJohn Marino 	     * tag.  So that is why we pass the original node->obj on the
1050*d5f8dde1SJohn Marino 	     * stack, to get the union's true values. */
1051*d5f8dde1SJohn Marino 
10525f2eab64SJohn Marino 	    DPRINT(("After union right\n"));
10535f2eab64SJohn Marino 	    node = tre_stack_pop_voidptr(stack);
1054*d5f8dde1SJohn Marino 	    orig = node->original ? node->original : node;
1055*d5f8dde1SJohn Marino 	    uni = (tre_union_t *)orig->obj;
10565f2eab64SJohn Marino 	    added_tags = tre_stack_pop_int(stack);
10575f2eab64SJohn Marino 	    if (first_pass)
10585f2eab64SJohn Marino 	      {
1059*d5f8dde1SJohn Marino 		node->num_tags = uni->left->num_tags + uni->right->num_tags
1060*d5f8dde1SJohn Marino 				 + added_tags;
1061*d5f8dde1SJohn Marino 		if (uni->left_tag > 0)
1062*d5f8dde1SJohn Marino 		  node->num_tags++;
1063*d5f8dde1SJohn Marino 		if (uni->right_tag > 0)
1064*d5f8dde1SJohn Marino 		  node->num_tags++;
1065*d5f8dde1SJohn Marino 		DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1066*d5f8dde1SJohn Marino 			node->num_tags));
10675f2eab64SJohn Marino 	      }
10685f2eab64SJohn Marino 	    regset = tre_stack_pop_voidptr(stack);
10695f2eab64SJohn Marino 
10705f2eab64SJohn Marino 	    /* Add tags after both children, the left child gets a smaller
10715f2eab64SJohn Marino 	       tag than the right child.  This guarantees that we prefer
10725f2eab64SJohn Marino 	       the left child over the right child. */
10735f2eab64SJohn Marino 	    /* XXX - This is not always necessary (if the children have
10745f2eab64SJohn Marino 	       tags which must be seen for every match of that child). */
1075*d5f8dde1SJohn Marino 	    if (!first_pass && node->make_branches)
10765f2eab64SJohn Marino 	      {
1077*d5f8dde1SJohn Marino 		tre_last_matched_branch_pre_t *lb =
1078*d5f8dde1SJohn Marino 		  uni->left->last_matched_branch;
1079*d5f8dde1SJohn Marino 		tre_last_matched_branch_pre_t *rb =
1080*d5f8dde1SJohn Marino 		  uni->right->last_matched_branch;
1081*d5f8dde1SJohn Marino 		tre_last_matched_pre_t *lu =
1082*d5f8dde1SJohn Marino 		  uni->left->last_matched_in_progress;
1083*d5f8dde1SJohn Marino 		tre_last_matched_pre_t *ru =
1084*d5f8dde1SJohn Marino 		  uni->right->last_matched_in_progress;
1085*d5f8dde1SJohn Marino 		tre_last_matched_pre_t *u;
1086*d5f8dde1SJohn Marino 		/* We don't need to call tre_merge_branches because these
1087*d5f8dde1SJohn Marino 		 * tags don't participate in submatch ranges, so don't need
1088*d5f8dde1SJohn Marino 		 * to be recorded.  But we do set the cmp_tag entry of the
1089*d5f8dde1SJohn Marino 		 * tre_last_matched_branch_pre_t, so we might call
1090*d5f8dde1SJohn Marino 		 * tre_merge_branches if we need to create an empty
1091*d5f8dde1SJohn Marino 		 * tre_last_matched_branch_pre_t. */
1092*d5f8dde1SJohn Marino 		if (uni->left_tag > 0)
10935f2eab64SJohn Marino 		  {
1094*d5f8dde1SJohn Marino 		    DPRINT(("Setting t%d direction to maximize\n",
1095*d5f8dde1SJohn Marino 			    uni->left_tag));
1096*d5f8dde1SJohn Marino 		    status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1097*d5f8dde1SJohn Marino 		    if (status != REG_OK)
1098*d5f8dde1SJohn Marino 		      break;
1099*d5f8dde1SJohn Marino 		    tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1100*d5f8dde1SJohn Marino 		    if (!lb)
1101*d5f8dde1SJohn Marino 		      {
1102*d5f8dde1SJohn Marino 			status = tre_merge_branches(mem, uni->left, NULL, -1,
1103*d5f8dde1SJohn Marino 						    tnfa->num_tags);
1104*d5f8dde1SJohn Marino 			if (status != REG_OK)
1105*d5f8dde1SJohn Marino 			  break;
1106*d5f8dde1SJohn Marino 			lb = uni->left->last_matched_branch;
11075f2eab64SJohn Marino 		      }
1108*d5f8dde1SJohn Marino 		    lb->cmp_tag = uni->left_tag;
1109*d5f8dde1SJohn Marino 		  }
1110*d5f8dde1SJohn Marino 		if (uni->right_tag > 0)
1111*d5f8dde1SJohn Marino 		  {
1112*d5f8dde1SJohn Marino 		    DPRINT(("Setting t%d direction to maximize\n",
1113*d5f8dde1SJohn Marino 			    uni->right_tag));
1114*d5f8dde1SJohn Marino 		    status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1115*d5f8dde1SJohn Marino 		    if (status != REG_OK)
1116*d5f8dde1SJohn Marino 		      break;
1117*d5f8dde1SJohn Marino 		    tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1118*d5f8dde1SJohn Marino 		    if (!rb)
1119*d5f8dde1SJohn Marino 		      {
1120*d5f8dde1SJohn Marino 			status = tre_merge_branches(mem, uni->right, NULL, -1,
1121*d5f8dde1SJohn Marino 						    tnfa->num_tags);
1122*d5f8dde1SJohn Marino 			if (status != REG_OK)
1123*d5f8dde1SJohn Marino 			  break;
1124*d5f8dde1SJohn Marino 			rb = uni->right->last_matched_branch;
1125*d5f8dde1SJohn Marino 		      }
1126*d5f8dde1SJohn Marino 		    rb->cmp_tag = uni->right_tag;
1127*d5f8dde1SJohn Marino 		  }
1128*d5f8dde1SJohn Marino 		/* Now merge the tre_last_matched_branch_pre_t into a
1129*d5f8dde1SJohn Marino 		   tre_last_matched_pre_t */
1130*d5f8dde1SJohn Marino 		if (lu == NULL)
1131*d5f8dde1SJohn Marino 		  {
1132*d5f8dde1SJohn Marino 		    if (ru == NULL)
1133*d5f8dde1SJohn Marino 		      {
1134*d5f8dde1SJohn Marino 			/* Create a new tre_last_matched_pre_t */
1135*d5f8dde1SJohn Marino 			u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1136*d5f8dde1SJohn Marino 			if (!u)
1137*d5f8dde1SJohn Marino 			  {
1138*d5f8dde1SJohn Marino 			    status = REG_ESPACE;
1139*d5f8dde1SJohn Marino 			    break;
1140*d5f8dde1SJohn Marino 			  }
1141*d5f8dde1SJohn Marino 			u->tot_last_matched = 1;
1142*d5f8dde1SJohn Marino 
1143*d5f8dde1SJohn Marino 			if (lb)
1144*d5f8dde1SJohn Marino 			  {
1145*d5f8dde1SJohn Marino 			    u->branches = lb;
1146*d5f8dde1SJohn Marino 			    u->n_branches = 1;
1147*d5f8dde1SJohn Marino 			    u->tot_branches += lb->tot_branches;
1148*d5f8dde1SJohn Marino 			    u->tot_last_matched += lb->tot_last_matched;
1149*d5f8dde1SJohn Marino 			    u->tot_tags += lb->tot_tags;
1150*d5f8dde1SJohn Marino 			    if (rb)
1151*d5f8dde1SJohn Marino 			      {
1152*d5f8dde1SJohn Marino 				lb->next = rb;
1153*d5f8dde1SJohn Marino 				u->n_branches++;
1154*d5f8dde1SJohn Marino 				u->tot_branches += rb->tot_branches;
1155*d5f8dde1SJohn Marino 				u->tot_last_matched += rb->tot_last_matched;
1156*d5f8dde1SJohn Marino 				u->tot_tags += rb->tot_tags;
1157*d5f8dde1SJohn Marino 			      }
1158*d5f8dde1SJohn Marino 			  }
1159*d5f8dde1SJohn Marino 			else if (rb)
1160*d5f8dde1SJohn Marino 			  {
1161*d5f8dde1SJohn Marino 			    u->branches = rb;
1162*d5f8dde1SJohn Marino 			    u->n_branches = 1;
1163*d5f8dde1SJohn Marino 			    u->tot_branches += rb->tot_branches;
1164*d5f8dde1SJohn Marino 			    u->tot_last_matched += rb->tot_last_matched;
1165*d5f8dde1SJohn Marino 			    u->tot_tags += rb->tot_tags;
1166*d5f8dde1SJohn Marino 			  }
1167*d5f8dde1SJohn Marino 		      }
1168*d5f8dde1SJohn Marino 		    else
1169*d5f8dde1SJohn Marino 		      {
1170*d5f8dde1SJohn Marino 			/* Use ru, and add lb */
1171*d5f8dde1SJohn Marino 			u = ru;
1172*d5f8dde1SJohn Marino 			if (lb)
1173*d5f8dde1SJohn Marino 			  {
1174*d5f8dde1SJohn Marino 			    lb->next = u->branches;
1175*d5f8dde1SJohn Marino 			    u->branches = lb;
1176*d5f8dde1SJohn Marino 			    u->n_branches++;
1177*d5f8dde1SJohn Marino 			    u->tot_branches += lb->tot_branches;
1178*d5f8dde1SJohn Marino 			    u->tot_last_matched += lb->tot_last_matched;
1179*d5f8dde1SJohn Marino 			    u->tot_tags += lb->tot_tags;
1180*d5f8dde1SJohn Marino 			  }
1181*d5f8dde1SJohn Marino 		      }
1182*d5f8dde1SJohn Marino 		  }
1183*d5f8dde1SJohn Marino 		else if (ru == NULL)
1184*d5f8dde1SJohn Marino 		  {
1185*d5f8dde1SJohn Marino 		    /* Use lu, and add rb */
1186*d5f8dde1SJohn Marino 		    u = lu;
1187*d5f8dde1SJohn Marino 		    if (rb)
1188*d5f8dde1SJohn Marino 		      {
1189*d5f8dde1SJohn Marino 			rb->next = u->branches;
1190*d5f8dde1SJohn Marino 			u->branches = rb;
1191*d5f8dde1SJohn Marino 			u->n_branches++;
1192*d5f8dde1SJohn Marino 			u->tot_branches += rb->tot_branches;
1193*d5f8dde1SJohn Marino 			u->tot_last_matched += rb->tot_last_matched;
1194*d5f8dde1SJohn Marino 			u->tot_tags += rb->tot_tags;
1195*d5f8dde1SJohn Marino 		      }
1196*d5f8dde1SJohn Marino 		  }
1197*d5f8dde1SJohn Marino 		else
1198*d5f8dde1SJohn Marino 		  {
1199*d5f8dde1SJohn Marino 		    /* Merge lu and ru into lu */
1200*d5f8dde1SJohn Marino 		    if (lu->branches)
1201*d5f8dde1SJohn Marino 		      {
1202*d5f8dde1SJohn Marino 			if (ru->branches)
1203*d5f8dde1SJohn Marino 			  {
1204*d5f8dde1SJohn Marino 			    tre_last_matched_branch_pre_t *b = lu->branches;
1205*d5f8dde1SJohn Marino 			    while (b->next) b = b->next;
1206*d5f8dde1SJohn Marino 			    b->next = ru->branches;
1207*d5f8dde1SJohn Marino 			    lu->n_branches += ru->n_branches;
1208*d5f8dde1SJohn Marino 			  }
1209*d5f8dde1SJohn Marino 		      }
1210*d5f8dde1SJohn Marino 		    else if (ru->branches)
1211*d5f8dde1SJohn Marino 		      {
1212*d5f8dde1SJohn Marino 			lu->branches = ru->branches;
1213*d5f8dde1SJohn Marino 			lu->n_branches = ru->n_branches;
1214*d5f8dde1SJohn Marino 		      }
1215*d5f8dde1SJohn Marino 		    lu->tot_branches += ru->tot_branches;
1216*d5f8dde1SJohn Marino 		    lu->tot_last_matched += ru->tot_last_matched - 1;
1217*d5f8dde1SJohn Marino 		    lu->tot_tags += ru->tot_tags;
1218*d5f8dde1SJohn Marino 		    u = lu;
1219*d5f8dde1SJohn Marino 		  }
1220*d5f8dde1SJohn Marino 		node->last_matched_in_progress = u;
12215f2eab64SJohn Marino 	      }
12225f2eab64SJohn Marino 	    direction = TRE_TAG_MAXIMIZE;
12235f2eab64SJohn Marino 	    break;
12245f2eab64SJohn Marino 	  }
12255f2eab64SJohn Marino 
1226*d5f8dde1SJohn Marino 	case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1227*d5f8dde1SJohn Marino 	  {
1228*d5f8dde1SJohn Marino 	    tre_last_matched_branch_pre_t *b;
1229*d5f8dde1SJohn Marino 	    tre_last_matched_pre_t *u;
1230*d5f8dde1SJohn Marino 	    int start_tag;
1231*d5f8dde1SJohn Marino 
1232*d5f8dde1SJohn Marino 	    DPRINT(("After union top\n"));
1233*d5f8dde1SJohn Marino 	    node = tre_stack_pop_voidptr(stack);
1234*d5f8dde1SJohn Marino 	    start_tag = tre_stack_pop_int(stack);
1235*d5f8dde1SJohn Marino 	    b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1236*d5f8dde1SJohn Marino 			       + bitstr_size(tnfa->num_tags));
1237*d5f8dde1SJohn Marino 	    if (!b)
1238*d5f8dde1SJohn Marino 	      {
1239*d5f8dde1SJohn Marino 		status = REG_ESPACE;
1240*d5f8dde1SJohn Marino 		break;
1241*d5f8dde1SJohn Marino 	      }
1242*d5f8dde1SJohn Marino 
1243*d5f8dde1SJohn Marino 	    u = node->last_matched_in_progress;
1244*d5f8dde1SJohn Marino 	    u->start_tag = start_tag;
1245*d5f8dde1SJohn Marino 	    b->tot_branches = 1 + u->tot_branches;
1246*d5f8dde1SJohn Marino 	    b->tot_last_matched = u->tot_last_matched;
1247*d5f8dde1SJohn Marino 	    b->tot_tags = u->tot_tags;
1248*d5f8dde1SJohn Marino 	    b->last_matched = u;
1249*d5f8dde1SJohn Marino 	    b->n_last_matched = 1;
1250*d5f8dde1SJohn Marino 	    node->last_matched_branch = b;
1251*d5f8dde1SJohn Marino 	    node->last_matched_in_progress = NULL;
1252*d5f8dde1SJohn Marino 	    break;
1253*d5f8dde1SJohn Marino 	  }
1254*d5f8dde1SJohn Marino 
12555f2eab64SJohn Marino 	default:
12565f2eab64SJohn Marino 	  assert(0);
12575f2eab64SJohn Marino 	  break;
12585f2eab64SJohn Marino 
12595f2eab64SJohn Marino 	} /* end switch(symbol) */
12605f2eab64SJohn Marino     } /* end while(tre_stack_num_objects(stack) > bottom) */
12615f2eab64SJohn Marino 
1262*d5f8dde1SJohn Marino   if (status != REG_OK)
1263*d5f8dde1SJohn Marino     {
1264*d5f8dde1SJohn Marino       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1265*d5f8dde1SJohn Marino       goto error_post_compile;
1266*d5f8dde1SJohn Marino     }
12675f2eab64SJohn Marino 
1268*d5f8dde1SJohn Marino   if (!first_pass)
12695f2eab64SJohn Marino     {
12705f2eab64SJohn Marino       int i;
1271*d5f8dde1SJohn Marino       if (num_tags != tnfa->num_tags)
1272*d5f8dde1SJohn Marino 	{
1273*d5f8dde1SJohn Marino 	  DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1274*d5f8dde1SJohn Marino 		  tnfa->num_tags));
1275*d5f8dde1SJohn Marino 	  status = REG_BADPAT;
1276*d5f8dde1SJohn Marino 	  goto error_post_compile;
1277*d5f8dde1SJohn Marino 	}
1278*d5f8dde1SJohn Marino 
1279*d5f8dde1SJohn Marino       tre_purge_regset(regset, tnfa, tag);
1280*d5f8dde1SJohn Marino       DPRINT(("Setting t%d to %s\n", num_tags,
1281*d5f8dde1SJohn Marino 	      tag_dir_str[direction]));
1282*d5f8dde1SJohn Marino       tnfa->tag_directions[num_tags] = direction;
1283*d5f8dde1SJohn Marino 
1284*d5f8dde1SJohn Marino       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1285*d5f8dde1SJohn Marino 	{
1286*d5f8dde1SJohn Marino 	  DPRINT(("Processed %d reorder tags instead of %d\n",
1287*d5f8dde1SJohn Marino 		  (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1288*d5f8dde1SJohn Marino 	  status = REG_BADPAT;
1289*d5f8dde1SJohn Marino 	  goto error_post_compile;
1290*d5f8dde1SJohn Marino 	}
1291*d5f8dde1SJohn Marino     *rtp = -1;
1292*d5f8dde1SJohn Marino #if TRE_DEBUG
1293*d5f8dde1SJohn Marino       if (reorder_tags[0] >= 0)
1294*d5f8dde1SJohn Marino 	{
1295*d5f8dde1SJohn Marino 	  DPRINT(("reorder_tags:\n"));
1296*d5f8dde1SJohn Marino 	  for (rtp = reorder_tags; *rtp >= 0;)
1297*d5f8dde1SJohn Marino 	    {
1298*d5f8dde1SJohn Marino 	      DPRINT(("%d after ", *rtp++));
1299*d5f8dde1SJohn Marino 	      DPRINT(("%d\n", *rtp++));
1300*d5f8dde1SJohn Marino 	    }
1301*d5f8dde1SJohn Marino 	}
1302*d5f8dde1SJohn Marino 	else
1303*d5f8dde1SJohn Marino 	  DPRINT(("No reorder_tags\n"));
1304*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1305*d5f8dde1SJohn Marino 
1306*d5f8dde1SJohn Marino       /* Initialize to_reorder */
1307*d5f8dde1SJohn Marino       for (i = 0; i < num_tags; i++)
1308*d5f8dde1SJohn Marino 	to_reorder[i] = i;
1309*d5f8dde1SJohn Marino       /* Use to_seq_order to convert reorder_tags values, and use those to
1310*d5f8dde1SJohn Marino        * reorder to_reorder */
1311*d5f8dde1SJohn Marino       for (rtp = reorder_tags; *rtp >= 0;)
1312*d5f8dde1SJohn Marino 	{
1313*d5f8dde1SJohn Marino 	  int j, high, low;
1314*d5f8dde1SJohn Marino 	  int ti = *rtp++;
1315*d5f8dde1SJohn Marino 
1316*d5f8dde1SJohn Marino 	  /* Skip reordering the final tag */
1317*d5f8dde1SJohn Marino 	  if (ti >= num_tags)
1318*d5f8dde1SJohn Marino 	    {
1319*d5f8dde1SJohn Marino 	      DPRINT(("Skipping reorder of %d\n", ti));
1320*d5f8dde1SJohn Marino 	      rtp++;
1321*d5f8dde1SJohn Marino 	      continue;
1322*d5f8dde1SJohn Marino 	    }
1323*d5f8dde1SJohn Marino 	  /* The number of the tag to reorder */
1324*d5f8dde1SJohn Marino 	  high = to_reorder[ti];
1325*d5f8dde1SJohn Marino 	  /* Reorder after this tag */
1326*d5f8dde1SJohn Marino 	  low = to_reorder[*rtp++];
1327*d5f8dde1SJohn Marino 
1328*d5f8dde1SJohn Marino 	  DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1329*d5f8dde1SJohn Marino 	  if (low > high)
1330*d5f8dde1SJohn Marino 	    {
1331*d5f8dde1SJohn Marino 	      DPRINT(("Tag %d already before %d\n", high, low));
1332*d5f8dde1SJohn Marino 	      continue;
1333*d5f8dde1SJohn Marino 	    }
1334*d5f8dde1SJohn Marino 	  for (j = 0; j < num_tags; j++)
1335*d5f8dde1SJohn Marino 	    if (to_reorder[j] > low && to_reorder[j] < high)
1336*d5f8dde1SJohn Marino 	      to_reorder[j]++;
1337*d5f8dde1SJohn Marino 	  to_reorder[ti] = low + 1;
1338*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1339*d5f8dde1SJohn Marino 	  DPRINT(("to_reorder=("));
1340*d5f8dde1SJohn Marino 	  for (j = 0; j < num_tags; j++)
1341*d5f8dde1SJohn Marino 	    {
1342*d5f8dde1SJohn Marino 	      DPRINT(("%d", to_reorder[j]));
1343*d5f8dde1SJohn Marino 	      if (j < num_tags - 1)
1344*d5f8dde1SJohn Marino 		DPRINT((","));
1345*d5f8dde1SJohn Marino 	    }
1346*d5f8dde1SJohn Marino 	  DPRINT((")\n"));
1347*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1348*d5f8dde1SJohn Marino 	}
1349*d5f8dde1SJohn Marino       /* Determine if reordering in really necessary */
1350*d5f8dde1SJohn Marino       {
1351*d5f8dde1SJohn Marino 	int need_reorder = 0;
1352*d5f8dde1SJohn Marino 	for (i = 0; i < num_tags; i++)
1353*d5f8dde1SJohn Marino 	  if(to_reorder[i] != i)
1354*d5f8dde1SJohn Marino 	    {
1355*d5f8dde1SJohn Marino 	      need_reorder = 1;
1356*d5f8dde1SJohn Marino 	      break;
1357*d5f8dde1SJohn Marino 	    }
1358*d5f8dde1SJohn Marino 	/* If need_reorder is not set, free reorder_tags, and set to NULL,
1359*d5f8dde1SJohn Marino 	 * indicating no reordering is needed */
1360*d5f8dde1SJohn Marino 	if (!need_reorder)
1361*d5f8dde1SJohn Marino 	  {
1362*d5f8dde1SJohn Marino 	    DPRINT(("Don't need to reorder\n"));
1363*d5f8dde1SJohn Marino 	    xfree(reorder_tags);
1364*d5f8dde1SJohn Marino 	    reorder_tags = NULL;
1365*d5f8dde1SJohn Marino 	  }
1366*d5f8dde1SJohn Marino       }
1367*d5f8dde1SJohn Marino     }
1368*d5f8dde1SJohn Marino 
1369*d5f8dde1SJohn Marino   if (reorder_tags)
1370*d5f8dde1SJohn Marino     {
1371*d5f8dde1SJohn Marino       int i;
1372*d5f8dde1SJohn Marino       tre_tag_direction_t *new_tag_directions;
1373*d5f8dde1SJohn Marino #if TRE_DEBUG
1374*d5f8dde1SJohn Marino       DPRINT(("to_reorder:"));
1375*d5f8dde1SJohn Marino       for (i = 0; i < num_tags; i++)
1376*d5f8dde1SJohn Marino 	DPRINT((" %d->%d", i, to_reorder[i]));
1377*d5f8dde1SJohn Marino       DPRINT(("\n"));
1378*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1379*d5f8dde1SJohn Marino 
1380*d5f8dde1SJohn Marino       DPRINT(("Reordering submatch_data\n"));
1381*d5f8dde1SJohn Marino       for (i = 0; i < tnfa->num_submatches; i++)
1382*d5f8dde1SJohn Marino 	{
1383*d5f8dde1SJohn Marino #if TRE_DEBUG
1384*d5f8dde1SJohn Marino 	  int so = tnfa->submatch_data[i].so_tag;
1385*d5f8dde1SJohn Marino 	  int eo = tnfa->submatch_data[i].eo_tag;
1386*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1387*d5f8dde1SJohn Marino 	  tnfa->submatch_data[i].so_tag =
1388*d5f8dde1SJohn Marino 	    to_reorder[tnfa->submatch_data[i].so_tag];
1389*d5f8dde1SJohn Marino 	  tnfa->submatch_data[i].eo_tag =
1390*d5f8dde1SJohn Marino 	    tnfa->submatch_data[i].eo_tag < num_tags ?
1391*d5f8dde1SJohn Marino 	    to_reorder[tnfa->submatch_data[i].eo_tag] :
1392*d5f8dde1SJohn Marino 	    tnfa->submatch_data[i].eo_tag;
1393*d5f8dde1SJohn Marino 	  DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1394*d5f8dde1SJohn Marino 		  tnfa->submatch_data[i].so_tag,
1395*d5f8dde1SJohn Marino 		  tnfa->submatch_data[i].eo_tag));
1396*d5f8dde1SJohn Marino 	}
1397*d5f8dde1SJohn Marino 
1398*d5f8dde1SJohn Marino       DPRINT(("Reordering tag_directions\n"));
1399*d5f8dde1SJohn Marino       /* We only allocate num_tags directions and reorder them.  The
1400*d5f8dde1SJohn Marino        * num_tags-th direction (end tag) is left unchanged. */
1401*d5f8dde1SJohn Marino       new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1402*d5f8dde1SJohn Marino       if (new_tag_directions == NULL)
1403*d5f8dde1SJohn Marino 	{
1404*d5f8dde1SJohn Marino 	  status = REG_ESPACE;
1405*d5f8dde1SJohn Marino 	  goto error_post_compile;
1406*d5f8dde1SJohn Marino 	}
1407*d5f8dde1SJohn Marino       for (i = 0; i < num_tags; i++)
1408*d5f8dde1SJohn Marino 	{
1409*d5f8dde1SJohn Marino 	  new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1410*d5f8dde1SJohn Marino 	}
1411*d5f8dde1SJohn Marino #if TRE_DEBUG
1412*d5f8dde1SJohn Marino       for (i = 0; i < num_tags; i++)
1413*d5f8dde1SJohn Marino 	{
1414*d5f8dde1SJohn Marino 	  DPRINT(("t%d %s->%s\n", i,
1415*d5f8dde1SJohn Marino 		  tag_dir_str[tnfa->tag_directions[i]],
1416*d5f8dde1SJohn Marino 		  tag_dir_str[new_tag_directions[i]]));
1417*d5f8dde1SJohn Marino 	}
1418*d5f8dde1SJohn Marino 	DPRINT(("t%d %s->%s\n", num_tags,
1419*d5f8dde1SJohn Marino 		tag_dir_str[tnfa->tag_directions[num_tags]],
1420*d5f8dde1SJohn Marino 		tag_dir_str[tnfa->tag_directions[num_tags]]));
1421*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1422*d5f8dde1SJohn Marino       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1423*d5f8dde1SJohn Marino       xfree(new_tag_directions);
1424*d5f8dde1SJohn Marino 
1425*d5f8dde1SJohn Marino       DPRINT(("Reordering minimal_tags\n"));
1426*d5f8dde1SJohn Marino       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1427*d5f8dde1SJohn Marino 	tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1428*d5f8dde1SJohn Marino 				to_reorder[tnfa->minimal_tags[i]] :
1429*d5f8dde1SJohn Marino 				tnfa->minimal_tags[i];
1430*d5f8dde1SJohn Marino 
1431*d5f8dde1SJohn Marino       DPRINT(("Reordering AST tags\n"));
1432*d5f8dde1SJohn Marino       STACK_PUSH(stack, voidptr, tree);
1433*d5f8dde1SJohn Marino       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1434*d5f8dde1SJohn Marino 	{
1435*d5f8dde1SJohn Marino 	  node = tre_stack_pop_voidptr(stack);
1436*d5f8dde1SJohn Marino 
1437*d5f8dde1SJohn Marino 	  switch (node->type)
1438*d5f8dde1SJohn Marino 	    {
1439*d5f8dde1SJohn Marino 	    case LITERAL:
1440*d5f8dde1SJohn Marino 	      {
1441*d5f8dde1SJohn Marino 		tre_literal_t *lit = (tre_literal_t *)node->obj;
1442*d5f8dde1SJohn Marino 		if (IS_TAG(lit))
1443*d5f8dde1SJohn Marino 		  lit->code_max = to_reorder[lit->code_max];
1444*d5f8dde1SJohn Marino 		break;
1445*d5f8dde1SJohn Marino 	      }
1446*d5f8dde1SJohn Marino 
1447*d5f8dde1SJohn Marino 	    case UNION:
1448*d5f8dde1SJohn Marino 	      {
1449*d5f8dde1SJohn Marino 		tre_union_t *uni = (tre_union_t *)node->obj;
1450*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->right);
1451*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->left);
1452*d5f8dde1SJohn Marino 		break;
1453*d5f8dde1SJohn Marino 	      }
1454*d5f8dde1SJohn Marino 
1455*d5f8dde1SJohn Marino 	    case CATENATION:
1456*d5f8dde1SJohn Marino 	      {
1457*d5f8dde1SJohn Marino 		tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1458*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->right);
1459*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->left);
1460*d5f8dde1SJohn Marino 		break;
1461*d5f8dde1SJohn Marino 	      }
1462*d5f8dde1SJohn Marino 
1463*d5f8dde1SJohn Marino 	    case ITERATION:
1464*d5f8dde1SJohn Marino 	      {
1465*d5f8dde1SJohn Marino 		tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1466*d5f8dde1SJohn Marino 		STACK_PUSHX(stack, voidptr, iter->arg);
1467*d5f8dde1SJohn Marino 		break;
1468*d5f8dde1SJohn Marino 	      }
1469*d5f8dde1SJohn Marino 
1470*d5f8dde1SJohn Marino 	    default:
1471*d5f8dde1SJohn Marino 	      assert(0);
1472*d5f8dde1SJohn Marino 	      break;
1473*d5f8dde1SJohn Marino 	    }
1474*d5f8dde1SJohn Marino 	}
1475*d5f8dde1SJohn Marino 	if (status != REG_OK)
1476*d5f8dde1SJohn Marino 	  {
1477*d5f8dde1SJohn Marino 	    DPRINT(("Error while reordering tags\n"));
1478*d5f8dde1SJohn Marino 	    goto error_post_compile;
1479*d5f8dde1SJohn Marino 	  }
1480*d5f8dde1SJohn Marino     }
1481*d5f8dde1SJohn Marino 
1482*d5f8dde1SJohn Marino 
1483*d5f8dde1SJohn Marino   if (!first_pass)
1484*d5f8dde1SJohn Marino     {
1485*d5f8dde1SJohn Marino       if (tree->last_matched_branch)
1486*d5f8dde1SJohn Marino 	{
1487*d5f8dde1SJohn Marino 	  tre_last_matched_branch_t *buf, *b, *bb;
1488*d5f8dde1SJohn Marino 	  tre_last_matched_branch_pre_t *bp;
1489*d5f8dde1SJohn Marino 	  tre_last_matched_t *u, *uu;
1490*d5f8dde1SJohn Marino 	  tre_last_matched_pre_t *up;
1491*d5f8dde1SJohn Marino 	  int *t;
1492*d5f8dde1SJohn Marino 	  int i;
1493*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1494*d5f8dde1SJohn Marino 	  tre_last_matched_branch_t *_b;
1495*d5f8dde1SJohn Marino 	  tre_last_matched_t *_u;
1496*d5f8dde1SJohn Marino 	  int *_t;
1497*d5f8dde1SJohn Marino 
1498*d5f8dde1SJohn Marino 	  DPRINT(("last_match_branch_pre:\n"));
1499*d5f8dde1SJohn Marino 	  print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1500*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1501*d5f8dde1SJohn Marino 	  buf = (tre_last_matched_branch_t *)xcalloc(1,
1502*d5f8dde1SJohn Marino 				     tree->last_matched_branch->tot_branches
1503*d5f8dde1SJohn Marino 				     * sizeof(tre_last_matched_branch_t) +
1504*d5f8dde1SJohn Marino 				     tree->last_matched_branch->tot_last_matched
1505*d5f8dde1SJohn Marino 				     * sizeof(tre_last_matched_t) +
1506*d5f8dde1SJohn Marino 				     tree->last_matched_branch->tot_tags *
1507*d5f8dde1SJohn Marino 				     sizeof(int));
1508*d5f8dde1SJohn Marino 	  if (!buf)
1509*d5f8dde1SJohn Marino 	    {
1510*d5f8dde1SJohn Marino 	      status = REG_ESPACE;
1511*d5f8dde1SJohn Marino 	      goto error_post_compile;
1512*d5f8dde1SJohn Marino 	    }
1513*d5f8dde1SJohn Marino 
1514*d5f8dde1SJohn Marino 	  b = buf;
1515*d5f8dde1SJohn Marino 	  u = (tre_last_matched_t *)(b +
1516*d5f8dde1SJohn Marino 	      tree->last_matched_branch->tot_branches);
1517*d5f8dde1SJohn Marino 	  t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1518*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1519*d5f8dde1SJohn Marino 	  _b = b;
1520*d5f8dde1SJohn Marino 	  _u = u;
1521*d5f8dde1SJohn Marino 	  _t = t;
1522*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1523*d5f8dde1SJohn Marino 	  DPRINT(("Copying info_pre to info\n"));
1524*d5f8dde1SJohn Marino 	  STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1525*d5f8dde1SJohn Marino 	  STACK_PUSH(stack, int, 1);
1526*d5f8dde1SJohn Marino 	  STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1527*d5f8dde1SJohn Marino 
1528*d5f8dde1SJohn Marino 	  while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1529*d5f8dde1SJohn Marino 	    {
1530*d5f8dde1SJohn Marino 	      switch (tre_stack_pop_int(stack))
1531*d5f8dde1SJohn Marino 		{
1532*d5f8dde1SJohn Marino 		case COPY_LAST_MATCHED_BRANCH:
1533*d5f8dde1SJohn Marino 		  i = tre_stack_pop_int(stack);
1534*d5f8dde1SJohn Marino 		  /* The tre_last_matched_branch_pre_t * is still on the
1535*d5f8dde1SJohn Marino 		     stack */
1536*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, voidptr, b);
1537*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1538*d5f8dde1SJohn Marino 		  b += i;
1539*d5f8dde1SJohn Marino 		  break;
1540*d5f8dde1SJohn Marino 
1541*d5f8dde1SJohn Marino 		case COPY_LAST_MATCHED_BRANCH_NEXT:
1542*d5f8dde1SJohn Marino 		  bb = tre_stack_pop_voidptr(stack);
1543*d5f8dde1SJohn Marino 		  bp = tre_stack_pop_voidptr(stack);
1544*d5f8dde1SJohn Marino 		  bb->n_last_matched = bp->n_last_matched;
1545*d5f8dde1SJohn Marino 		  bb->cmp_tag = bp->cmp_tag;
1546*d5f8dde1SJohn Marino 		  if (bp->n_tags > 0)
1547*d5f8dde1SJohn Marino 		    {
1548*d5f8dde1SJohn Marino 		      int n;
1549*d5f8dde1SJohn Marino 		      n = bb->n_tags = bp->n_tags;
1550*d5f8dde1SJohn Marino 		      bb->tags = t;
1551*d5f8dde1SJohn Marino 		      for (i = 0; i < num_tags; i++)
1552*d5f8dde1SJohn Marino 			if (bit_test(bp->tags, i))
1553*d5f8dde1SJohn Marino 			  {
1554*d5f8dde1SJohn Marino 			    *t++ = i;
1555*d5f8dde1SJohn Marino 			    if (--n <= 0)
1556*d5f8dde1SJohn Marino 			      break;
1557*d5f8dde1SJohn Marino 			  }
1558*d5f8dde1SJohn Marino 		    }
1559*d5f8dde1SJohn Marino 		  if (bp->next)
1560*d5f8dde1SJohn Marino 		    {
1561*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, voidptr, bp->next);
1562*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, voidptr, bb + 1);
1563*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1564*d5f8dde1SJohn Marino 		    }
1565*d5f8dde1SJohn Marino 		  if (bp->n_last_matched > 0)
1566*d5f8dde1SJohn Marino 		    {
1567*d5f8dde1SJohn Marino 		      bb->last_matched = u;
1568*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, voidptr, bp->last_matched);
1569*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, int, bp->n_last_matched);
1570*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1571*d5f8dde1SJohn Marino 		    }
1572*d5f8dde1SJohn Marino 		  break;
1573*d5f8dde1SJohn Marino 
1574*d5f8dde1SJohn Marino 		case COPY_LAST_MATCHED:
1575*d5f8dde1SJohn Marino 		  i = tre_stack_pop_int(stack);
1576*d5f8dde1SJohn Marino 		  /* The tre_last_matched_pre_t * is still on the stack */
1577*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, voidptr, u);
1578*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1579*d5f8dde1SJohn Marino 		  u += i;
1580*d5f8dde1SJohn Marino 		  break;
1581*d5f8dde1SJohn Marino 
1582*d5f8dde1SJohn Marino 		case COPY_LAST_MATCHED_NEXT:
1583*d5f8dde1SJohn Marino 		  uu = tre_stack_pop_voidptr(stack);
1584*d5f8dde1SJohn Marino 		  up = tre_stack_pop_voidptr(stack);
1585*d5f8dde1SJohn Marino 		  uu->n_branches = up->n_branches;
1586*d5f8dde1SJohn Marino 		  uu->branches = b;
1587*d5f8dde1SJohn Marino 		  uu->start_tag = up->start_tag;
1588*d5f8dde1SJohn Marino 		  if (up->next)
1589*d5f8dde1SJohn Marino 		    {
1590*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, voidptr, up->next);
1591*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, voidptr, uu + 1);
1592*d5f8dde1SJohn Marino 		      STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1593*d5f8dde1SJohn Marino 		    }
1594*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, voidptr, up->branches);
1595*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, int, up->n_branches);
1596*d5f8dde1SJohn Marino 		  STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1597*d5f8dde1SJohn Marino 		  break;
1598*d5f8dde1SJohn Marino 		}
1599*d5f8dde1SJohn Marino 	    }
1600*d5f8dde1SJohn Marino 	  if (status != REG_OK)
1601*d5f8dde1SJohn Marino 	    goto error_post_compile;
1602*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1603*d5f8dde1SJohn Marino 	  DPRINT(("last_matched_branch:\n"));
1604*d5f8dde1SJohn Marino 	  print_last_match_branch(buf, 0);
1605*d5f8dde1SJohn Marino 	  if (b != _b + tree->last_matched_branch->tot_branches)
1606*d5f8dde1SJohn Marino 	    DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607*d5f8dde1SJohn Marino 		    b, _b + tree->last_matched_branch->tot_branches));
1608*d5f8dde1SJohn Marino 	  if (u != _u + tree->last_matched_branch->tot_last_matched)
1609*d5f8dde1SJohn Marino 	    DPRINT(("u/%p != _u + "
1610*d5f8dde1SJohn Marino 		    "tree->last_matched_branch->tot_last_matched/%p\n",
1611*d5f8dde1SJohn Marino 		    u, _u + tree->last_matched_branch->tot_last_matched));
1612*d5f8dde1SJohn Marino 	  if (t != _t + tree->last_matched_branch->tot_tags)
1613*d5f8dde1SJohn Marino 	    DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614*d5f8dde1SJohn Marino 		    t, _t + tree->last_matched_branch->tot_tags));
1615*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1616*d5f8dde1SJohn Marino 	  tnfa->last_matched_branch = buf;
1617*d5f8dde1SJohn Marino 	}
1618*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1619*d5f8dde1SJohn Marino       else
1620*d5f8dde1SJohn Marino 	DPRINT(("No last_match_branch_pre\n"));
1621*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
16225f2eab64SJohn Marino     }
16235f2eab64SJohn Marino 
16245f2eab64SJohn Marino   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
16255f2eab64SJohn Marino 	  first_pass? "First pass" : "Second pass", num_tags));
1626*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1627*d5f8dde1SJohn Marino   tre_ast_print(tree);
1628*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1629*d5f8dde1SJohn Marino   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1630*d5f8dde1SJohn Marino 	  num_tags));
16315f2eab64SJohn Marino   assert(tree->num_tags == num_tags);
16325f2eab64SJohn Marino   tnfa->end_tag = num_tags;
16335f2eab64SJohn Marino   tnfa->num_tags = num_tags;
16345f2eab64SJohn Marino   tnfa->num_minimals = num_minimals;
1635*d5f8dde1SJohn Marino error_post_compile:
1636*d5f8dde1SJohn Marino   xfree(reorder_tags);
1637*d5f8dde1SJohn Marino error_reorder_tags:
16385f2eab64SJohn Marino   xfree(orig_regset);
1639*d5f8dde1SJohn Marino error_regset:
16405f2eab64SJohn Marino   return status;
16415f2eab64SJohn Marino }
16425f2eab64SJohn Marino 
16435f2eab64SJohn Marino 
16445f2eab64SJohn Marino 
16455f2eab64SJohn Marino /*
16465f2eab64SJohn Marino   AST to TNFA compilation routines.
16475f2eab64SJohn Marino */
16485f2eab64SJohn Marino 
16495f2eab64SJohn Marino typedef enum {
16505f2eab64SJohn Marino   COPY_RECURSE,
16515f2eab64SJohn Marino   COPY_SET_RESULT_PTR
16525f2eab64SJohn Marino } tre_copyast_symbol_t;
16535f2eab64SJohn Marino 
16545f2eab64SJohn Marino /* Flags for tre_copy_ast(). */
16555f2eab64SJohn Marino #define COPY_REMOVE_TAGS	 1
16565f2eab64SJohn Marino #define COPY_MAXIMIZE_FIRST_TAG	 2
16575f2eab64SJohn Marino 
16585f2eab64SJohn Marino static reg_errcode_t
tre_copy_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int flags,int * pos_add,tre_tag_direction_t * tag_directions,tre_ast_node_t ** copy,int * max_pos)16595f2eab64SJohn Marino tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
16605f2eab64SJohn Marino 	     int flags, int *pos_add, tre_tag_direction_t *tag_directions,
16615f2eab64SJohn Marino 	     tre_ast_node_t **copy, int *max_pos)
16625f2eab64SJohn Marino {
16635f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
16645f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
16655f2eab64SJohn Marino   int num_copied = 0;
16665f2eab64SJohn Marino   int first_tag = 1;
16675f2eab64SJohn Marino   tre_ast_node_t **result = copy;
16685f2eab64SJohn Marino   tre_copyast_symbol_t symbol;
16695f2eab64SJohn Marino 
16705f2eab64SJohn Marino   STACK_PUSH(stack, voidptr, ast);
16715f2eab64SJohn Marino   STACK_PUSH(stack, int, COPY_RECURSE);
16725f2eab64SJohn Marino 
16735f2eab64SJohn Marino   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
16745f2eab64SJohn Marino     {
16755f2eab64SJohn Marino       tre_ast_node_t *node;
16765f2eab64SJohn Marino       if (status != REG_OK)
16775f2eab64SJohn Marino 	break;
16785f2eab64SJohn Marino 
16795f2eab64SJohn Marino       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
16805f2eab64SJohn Marino       switch (symbol)
16815f2eab64SJohn Marino 	{
16825f2eab64SJohn Marino 	case COPY_SET_RESULT_PTR:
16835f2eab64SJohn Marino 	  result = tre_stack_pop_voidptr(stack);
16845f2eab64SJohn Marino 	  break;
16855f2eab64SJohn Marino 	case COPY_RECURSE:
16865f2eab64SJohn Marino 	  node = tre_stack_pop_voidptr(stack);
16875f2eab64SJohn Marino 	  switch (node->type)
16885f2eab64SJohn Marino 	    {
16895f2eab64SJohn Marino 	    case LITERAL:
16905f2eab64SJohn Marino 	      {
16915f2eab64SJohn Marino 		tre_literal_t *lit = node->obj;
16925f2eab64SJohn Marino 		int pos = lit->position;
16935f2eab64SJohn Marino 		int min = lit->code_min;
16945f2eab64SJohn Marino 		int max = lit->code_max;
1695*d5f8dde1SJohn Marino 		tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1696*d5f8dde1SJohn Marino 						  lit->u.bracket_match_list :
1697*d5f8dde1SJohn Marino 						  NULL;
16985f2eab64SJohn Marino 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
16995f2eab64SJohn Marino 		  {
17005f2eab64SJohn Marino 		    /* XXX - e.g. [ab] has only one position but two
17015f2eab64SJohn Marino 		       nodes, so we are creating holes in the state space
17025f2eab64SJohn Marino 		       here.  Not fatal, just wastes memory. */
17035f2eab64SJohn Marino 		    pos += *pos_add;
17045f2eab64SJohn Marino 		    num_copied++;
17055f2eab64SJohn Marino 		  }
17065f2eab64SJohn Marino 		else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
17075f2eab64SJohn Marino 		  {
17085f2eab64SJohn Marino 		    /* Change this tag to empty. */
17095f2eab64SJohn Marino 		    min = EMPTY;
17105f2eab64SJohn Marino 		    max = pos = -1;
17115f2eab64SJohn Marino 		  }
17125f2eab64SJohn Marino 		else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
17135f2eab64SJohn Marino 			 && first_tag)
17145f2eab64SJohn Marino 		  {
17155f2eab64SJohn Marino 		    /* Maximize the first tag. */
1716*d5f8dde1SJohn Marino 		    if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
17175f2eab64SJohn Marino 		      tag_directions[max] = TRE_TAG_MAXIMIZE;
17185f2eab64SJohn Marino 		    first_tag = 0;
17195f2eab64SJohn Marino 		  }
17205f2eab64SJohn Marino 		*result = tre_ast_new_literal(mem, min, max, pos);
17215f2eab64SJohn Marino 		if (*result == NULL)
17225f2eab64SJohn Marino 		  status = REG_ESPACE;
17235f2eab64SJohn Marino 
17245f2eab64SJohn Marino 		if (pos > *max_pos)
17255f2eab64SJohn Marino 		  *max_pos = pos;
1726*d5f8dde1SJohn Marino 
1727*d5f8dde1SJohn Marino 		if (!IS_SPECIAL(lit))
1728*d5f8dde1SJohn Marino 		  ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1729*d5f8dde1SJohn Marino 		      = list;
17305f2eab64SJohn Marino 		break;
17315f2eab64SJohn Marino 	      }
17325f2eab64SJohn Marino 	    case UNION:
17335f2eab64SJohn Marino 	      {
17345f2eab64SJohn Marino 		tre_union_t *uni = node->obj;
17355f2eab64SJohn Marino 		tre_union_t *tmp;
17365f2eab64SJohn Marino 		*result = tre_ast_new_union(mem, uni->left, uni->right);
17375f2eab64SJohn Marino 		if (*result == NULL)
17385f2eab64SJohn Marino 		  {
17395f2eab64SJohn Marino 		    status = REG_ESPACE;
17405f2eab64SJohn Marino 		    break;
17415f2eab64SJohn Marino 		  }
17425f2eab64SJohn Marino 		tmp = (*result)->obj;
17435f2eab64SJohn Marino 		result = &tmp->left;
17445f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->right);
17455f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_RECURSE);
17465f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, &tmp->right);
17475f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
17485f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->left);
17495f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_RECURSE);
17505f2eab64SJohn Marino 		break;
17515f2eab64SJohn Marino 	      }
17525f2eab64SJohn Marino 	    case CATENATION:
17535f2eab64SJohn Marino 	      {
17545f2eab64SJohn Marino 		tre_catenation_t *cat = node->obj;
17555f2eab64SJohn Marino 		tre_catenation_t *tmp;
17565f2eab64SJohn Marino 		*result = tre_ast_new_catenation(mem, cat->left, cat->right);
17575f2eab64SJohn Marino 		if (*result == NULL)
17585f2eab64SJohn Marino 		  {
17595f2eab64SJohn Marino 		    status = REG_ESPACE;
17605f2eab64SJohn Marino 		    break;
17615f2eab64SJohn Marino 		  }
17625f2eab64SJohn Marino 		tmp = (*result)->obj;
17635f2eab64SJohn Marino 		tmp->left = NULL;
17645f2eab64SJohn Marino 		tmp->right = NULL;
17655f2eab64SJohn Marino 		result = &tmp->left;
17665f2eab64SJohn Marino 
17675f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->right);
17685f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_RECURSE);
17695f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, &tmp->right);
17705f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
17715f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->left);
17725f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_RECURSE);
17735f2eab64SJohn Marino 		break;
17745f2eab64SJohn Marino 	      }
17755f2eab64SJohn Marino 	    case ITERATION:
17765f2eab64SJohn Marino 	      {
17775f2eab64SJohn Marino 		tre_iteration_t *iter = node->obj;
17785f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, iter->arg);
17795f2eab64SJohn Marino 		STACK_PUSHX(stack, int, COPY_RECURSE);
17805f2eab64SJohn Marino 		*result = tre_ast_new_iter(mem, iter->arg, iter->min,
17815f2eab64SJohn Marino 					   iter->max, iter->minimal);
17825f2eab64SJohn Marino 		if (*result == NULL)
17835f2eab64SJohn Marino 		  {
17845f2eab64SJohn Marino 		    status = REG_ESPACE;
17855f2eab64SJohn Marino 		    break;
17865f2eab64SJohn Marino 		  }
17875f2eab64SJohn Marino 		iter = (*result)->obj;
17885f2eab64SJohn Marino 		result = &iter->arg;
17895f2eab64SJohn Marino 		break;
17905f2eab64SJohn Marino 	      }
17915f2eab64SJohn Marino 	    default:
17925f2eab64SJohn Marino 	      assert(0);
17935f2eab64SJohn Marino 	      break;
17945f2eab64SJohn Marino 	    }
17955f2eab64SJohn Marino 	  break;
17965f2eab64SJohn Marino 	}
17975f2eab64SJohn Marino     }
17985f2eab64SJohn Marino   *pos_add += num_copied;
17995f2eab64SJohn Marino   return status;
18005f2eab64SJohn Marino }
18015f2eab64SJohn Marino 
18025f2eab64SJohn Marino typedef enum {
18035f2eab64SJohn Marino   EXPAND_RECURSE,
18045f2eab64SJohn Marino   EXPAND_AFTER_ITER
18055f2eab64SJohn Marino } tre_expand_ast_symbol_t;
18065f2eab64SJohn Marino 
18075f2eab64SJohn Marino /* Expands each iteration node that has a finite nonzero minimum or maximum
18085f2eab64SJohn Marino    iteration count to a catenated sequence of copies of the node. */
18095f2eab64SJohn Marino static reg_errcode_t
tre_expand_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int * position,tre_tag_direction_t * tag_directions,int * max_depth)18105f2eab64SJohn Marino tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
18115f2eab64SJohn Marino 	       int *position, tre_tag_direction_t *tag_directions,
18125f2eab64SJohn Marino 	       int *max_depth)
18135f2eab64SJohn Marino {
18145f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
18155f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
18165f2eab64SJohn Marino   int pos_add = 0;
18175f2eab64SJohn Marino   int pos_add_total = 0;
18185f2eab64SJohn Marino   int max_pos = 0;
1819*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18205f2eab64SJohn Marino   /* Current approximate matching parameters. */
18215f2eab64SJohn Marino   int params[TRE_PARAM_LAST];
18225f2eab64SJohn Marino   /* Approximate parameter nesting level. */
18235f2eab64SJohn Marino   int params_depth = 0;
1824*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18255f2eab64SJohn Marino   int iter_depth = 0;
1826*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18275f2eab64SJohn Marino   int i;
1828*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18295f2eab64SJohn Marino 
1830*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18315f2eab64SJohn Marino   for (i = 0; i < TRE_PARAM_LAST; i++)
18325f2eab64SJohn Marino     params[i] = TRE_PARAM_DEFAULT;
1833*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18345f2eab64SJohn Marino 
18355f2eab64SJohn Marino   STACK_PUSHR(stack, voidptr, ast);
18365f2eab64SJohn Marino   STACK_PUSHR(stack, int, EXPAND_RECURSE);
18375f2eab64SJohn Marino   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
18385f2eab64SJohn Marino     {
18395f2eab64SJohn Marino       tre_ast_node_t *node;
18405f2eab64SJohn Marino       tre_expand_ast_symbol_t symbol;
18415f2eab64SJohn Marino 
18425f2eab64SJohn Marino       if (status != REG_OK)
18435f2eab64SJohn Marino 	break;
18445f2eab64SJohn Marino 
18455f2eab64SJohn Marino       DPRINT(("pos_add %d\n", pos_add));
18465f2eab64SJohn Marino 
18475f2eab64SJohn Marino       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
18485f2eab64SJohn Marino       node = tre_stack_pop_voidptr(stack);
18495f2eab64SJohn Marino       switch (symbol)
18505f2eab64SJohn Marino 	{
18515f2eab64SJohn Marino 	case EXPAND_RECURSE:
18525f2eab64SJohn Marino 	  switch (node->type)
18535f2eab64SJohn Marino 	    {
18545f2eab64SJohn Marino 	    case LITERAL:
18555f2eab64SJohn Marino 	      {
18565f2eab64SJohn Marino 		tre_literal_t *lit= node->obj;
18575f2eab64SJohn Marino 		if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
18585f2eab64SJohn Marino 		  {
18595f2eab64SJohn Marino 		    lit->position += pos_add;
18605f2eab64SJohn Marino 		    if (lit->position > max_pos)
18615f2eab64SJohn Marino 		      max_pos = lit->position;
18625f2eab64SJohn Marino 		  }
18635f2eab64SJohn Marino 		break;
18645f2eab64SJohn Marino 	      }
18655f2eab64SJohn Marino 	    case UNION:
18665f2eab64SJohn Marino 	      {
18675f2eab64SJohn Marino 		tre_union_t *uni = node->obj;
18685f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->right);
18695f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
18705f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, uni->left);
18715f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
18725f2eab64SJohn Marino 		break;
18735f2eab64SJohn Marino 	      }
18745f2eab64SJohn Marino 	    case CATENATION:
18755f2eab64SJohn Marino 	      {
18765f2eab64SJohn Marino 		tre_catenation_t *cat = node->obj;
18775f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->right);
18785f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
18795f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, cat->left);
18805f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
18815f2eab64SJohn Marino 		break;
18825f2eab64SJohn Marino 	      }
18835f2eab64SJohn Marino 	    case ITERATION:
18845f2eab64SJohn Marino 	      {
18855f2eab64SJohn Marino 		tre_iteration_t *iter = node->obj;
18865f2eab64SJohn Marino 		STACK_PUSHX(stack, int, pos_add);
18875f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, node);
18885f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
18895f2eab64SJohn Marino 		STACK_PUSHX(stack, voidptr, iter->arg);
18905f2eab64SJohn Marino 		STACK_PUSHX(stack, int, EXPAND_RECURSE);
18915f2eab64SJohn Marino 		/* If we are going to expand this node at EXPAND_AFTER_ITER
18925f2eab64SJohn Marino 		   then don't increase the `pos' fields of the nodes now, it
18935f2eab64SJohn Marino 		   will get done when expanding. */
18945f2eab64SJohn Marino 		if (iter->min > 1 || iter->max > 1)
18955f2eab64SJohn Marino 		  pos_add = 0;
18965f2eab64SJohn Marino 		iter_depth++;
18975f2eab64SJohn Marino 		DPRINT(("iter\n"));
18985f2eab64SJohn Marino 		break;
18995f2eab64SJohn Marino 	      }
19005f2eab64SJohn Marino 	    default:
19015f2eab64SJohn Marino 	      assert(0);
19025f2eab64SJohn Marino 	      break;
19035f2eab64SJohn Marino 	    }
19045f2eab64SJohn Marino 	  break;
19055f2eab64SJohn Marino 	case EXPAND_AFTER_ITER:
19065f2eab64SJohn Marino 	  {
19075f2eab64SJohn Marino 	    tre_iteration_t *iter = node->obj;
19085f2eab64SJohn Marino 	    int pos_add_last;
19095f2eab64SJohn Marino 	    pos_add = tre_stack_pop_int(stack);
19105f2eab64SJohn Marino 	    pos_add_last = pos_add;
1911*d5f8dde1SJohn Marino 	    /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912*d5f8dde1SJohn Marino 	       immediate replace the whole iteration with EMPTY.  This
1913*d5f8dde1SJohn Marino 	       unfortunately drops any submatches, and messes up setting the
1914*d5f8dde1SJohn Marino 	       pmatch values (we can get tags of -1, and tag values in the
1915*d5f8dde1SJohn Marino 	       billions).  So we left it there and replace with EMPTY here. */
1916*d5f8dde1SJohn Marino 	    if (iter->min == 0 && iter->max == 0)
1917*d5f8dde1SJohn Marino 	      {
1918*d5f8dde1SJohn Marino 		tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1919*d5f8dde1SJohn Marino 		if (empty == NULL)
1920*d5f8dde1SJohn Marino 		  return REG_ESPACE;
1921*d5f8dde1SJohn Marino 		node->obj = empty->obj;
1922*d5f8dde1SJohn Marino 		node->type = empty->type;
1923*d5f8dde1SJohn Marino 	      }
1924*d5f8dde1SJohn Marino 	    else if (iter->min > 1 || iter->max > 1)
19255f2eab64SJohn Marino 	      {
19265f2eab64SJohn Marino 		tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
19275f2eab64SJohn Marino 		int j;
19285f2eab64SJohn Marino 		int pos_add_save = pos_add;
19295f2eab64SJohn Marino 
19305f2eab64SJohn Marino 		/* Create a catenated sequence of copies of the node. */
19315f2eab64SJohn Marino 		for (j = 0; j < iter->min; j++)
19325f2eab64SJohn Marino 		  {
19335f2eab64SJohn Marino 		    tre_ast_node_t *copy;
19345f2eab64SJohn Marino 		    /* Remove tags from all but the last copy. */
19355f2eab64SJohn Marino 		    int flags = ((j + 1 < iter->min)
19365f2eab64SJohn Marino 				 ? COPY_REMOVE_TAGS
19375f2eab64SJohn Marino 				 : COPY_MAXIMIZE_FIRST_TAG);
19385f2eab64SJohn Marino 		    DPRINT(("  pos_add %d\n", pos_add));
19395f2eab64SJohn Marino 		    pos_add_save = pos_add;
19405f2eab64SJohn Marino 		    status = tre_copy_ast(mem, stack, iter->arg, flags,
19415f2eab64SJohn Marino 					  &pos_add, tag_directions, &copy,
19425f2eab64SJohn Marino 					  &max_pos);
19435f2eab64SJohn Marino 		    if (status != REG_OK)
19445f2eab64SJohn Marino 		      return status;
19455f2eab64SJohn Marino 		    if (seq1 != NULL)
19465f2eab64SJohn Marino 		      seq1 = tre_ast_new_catenation(mem, seq1, copy);
19475f2eab64SJohn Marino 		    else
19485f2eab64SJohn Marino 		      seq1 = copy;
19495f2eab64SJohn Marino 		    if (seq1 == NULL)
19505f2eab64SJohn Marino 		      return REG_ESPACE;
19515f2eab64SJohn Marino 		  }
19525f2eab64SJohn Marino 
19535f2eab64SJohn Marino 		if (iter->max == -1)
19545f2eab64SJohn Marino 		  {
19555f2eab64SJohn Marino 		    /* No upper limit. */
19565f2eab64SJohn Marino 		    pos_add_save = pos_add;
19575f2eab64SJohn Marino 		    status = tre_copy_ast(mem, stack, iter->arg, 0,
19585f2eab64SJohn Marino 					  &pos_add, NULL, &seq2, &max_pos);
19595f2eab64SJohn Marino 		    if (status != REG_OK)
19605f2eab64SJohn Marino 		      return status;
19615f2eab64SJohn Marino 		    seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
19625f2eab64SJohn Marino 		    if (seq2 == NULL)
19635f2eab64SJohn Marino 		      return REG_ESPACE;
19645f2eab64SJohn Marino 		  }
19655f2eab64SJohn Marino 		else
19665f2eab64SJohn Marino 		  {
19675f2eab64SJohn Marino 		    for (j = iter->min; j < iter->max; j++)
19685f2eab64SJohn Marino 		      {
19695f2eab64SJohn Marino 			tre_ast_node_t *tmp, *copy;
19705f2eab64SJohn Marino 			pos_add_save = pos_add;
19715f2eab64SJohn Marino 			status = tre_copy_ast(mem, stack, iter->arg, 0,
19725f2eab64SJohn Marino 					      &pos_add, NULL, &copy, &max_pos);
19735f2eab64SJohn Marino 			if (status != REG_OK)
19745f2eab64SJohn Marino 			  return status;
19755f2eab64SJohn Marino 			if (seq2 != NULL)
19765f2eab64SJohn Marino 			  seq2 = tre_ast_new_catenation(mem, copy, seq2);
19775f2eab64SJohn Marino 			else
19785f2eab64SJohn Marino 			  seq2 = copy;
19795f2eab64SJohn Marino 			if (seq2 == NULL)
19805f2eab64SJohn Marino 			  return REG_ESPACE;
19815f2eab64SJohn Marino 			tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
19825f2eab64SJohn Marino 			if (tmp == NULL)
19835f2eab64SJohn Marino 			  return REG_ESPACE;
19845f2eab64SJohn Marino 			seq2 = tre_ast_new_union(mem, tmp, seq2);
19855f2eab64SJohn Marino 			if (seq2 == NULL)
19865f2eab64SJohn Marino 			  return REG_ESPACE;
19875f2eab64SJohn Marino 		      }
19885f2eab64SJohn Marino 		  }
19895f2eab64SJohn Marino 
19905f2eab64SJohn Marino 		pos_add = pos_add_save;
19915f2eab64SJohn Marino 		if (seq1 == NULL)
19925f2eab64SJohn Marino 		  seq1 = seq2;
19935f2eab64SJohn Marino 		else if (seq2 != NULL)
19945f2eab64SJohn Marino 		  seq1 = tre_ast_new_catenation(mem, seq1, seq2);
19955f2eab64SJohn Marino 		if (seq1 == NULL)
19965f2eab64SJohn Marino 		  return REG_ESPACE;
19975f2eab64SJohn Marino 		node->obj = seq1->obj;
19985f2eab64SJohn Marino 		node->type = seq1->type;
19995f2eab64SJohn Marino 	      }
20005f2eab64SJohn Marino 
20015f2eab64SJohn Marino 	    iter_depth--;
20025f2eab64SJohn Marino 	    pos_add_total += pos_add - pos_add_last;
20035f2eab64SJohn Marino 	    if (iter_depth == 0)
20045f2eab64SJohn Marino 	      pos_add = pos_add_total;
20055f2eab64SJohn Marino 
2006*d5f8dde1SJohn Marino #ifdef TRE_APPROX
20075f2eab64SJohn Marino 	    /* If approximate parameters are specified, surround the result
20085f2eab64SJohn Marino 	       with two parameter setting nodes.  The one on the left sets
20095f2eab64SJohn Marino 	       the specified parameters, and the one on the right restores
20105f2eab64SJohn Marino 	       the old parameters. */
20115f2eab64SJohn Marino 	    if (iter->params)
20125f2eab64SJohn Marino 	      {
20135f2eab64SJohn Marino 		tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
20145f2eab64SJohn Marino 		int *old_params;
20155f2eab64SJohn Marino 
20165f2eab64SJohn Marino 		tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
20175f2eab64SJohn Marino 		if (!tmp_l)
20185f2eab64SJohn Marino 		  return REG_ESPACE;
20195f2eab64SJohn Marino 		((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
20205f2eab64SJohn Marino 		iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
20215f2eab64SJohn Marino 		tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
20225f2eab64SJohn Marino 		if (!tmp_r)
20235f2eab64SJohn Marino 		  return REG_ESPACE;
20245f2eab64SJohn Marino 		old_params = tre_mem_alloc(mem, sizeof(*old_params)
20255f2eab64SJohn Marino 					   * TRE_PARAM_LAST);
20265f2eab64SJohn Marino 		if (!old_params)
20275f2eab64SJohn Marino 		  return REG_ESPACE;
20285f2eab64SJohn Marino 		for (i = 0; i < TRE_PARAM_LAST; i++)
20295f2eab64SJohn Marino 		  old_params[i] = params[i];
20305f2eab64SJohn Marino 		((tre_literal_t *)tmp_r->obj)->u.params = old_params;
20315f2eab64SJohn Marino 		old_params[TRE_PARAM_DEPTH] = params_depth;
20325f2eab64SJohn Marino 		/* XXX - this is the only place where ast_new_node is
20335f2eab64SJohn Marino 		   needed -- should be moved inside AST module. */
20345f2eab64SJohn Marino 		node_copy = tre_ast_new_node(mem, ITERATION,
20355f2eab64SJohn Marino 					     sizeof(tre_iteration_t));
20365f2eab64SJohn Marino 		if (!node_copy)
20375f2eab64SJohn Marino 		  return REG_ESPACE;
20385f2eab64SJohn Marino 		node_copy->obj = node->obj;
20395f2eab64SJohn Marino 		tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
20405f2eab64SJohn Marino 		if (!tmp_node)
20415f2eab64SJohn Marino 		  return REG_ESPACE;
20425f2eab64SJohn Marino 		tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
20435f2eab64SJohn Marino 		if (!tmp_node)
20445f2eab64SJohn Marino 		  return REG_ESPACE;
20455f2eab64SJohn Marino 		/* Replace the contents of `node' with `tmp_node'. */
20465f2eab64SJohn Marino 		memcpy(node, tmp_node, sizeof(*node));
20475f2eab64SJohn Marino 		node->obj = tmp_node->obj;
20485f2eab64SJohn Marino 		node->type = tmp_node->type;
20495f2eab64SJohn Marino 		params_depth++;
20505f2eab64SJohn Marino 		if (params_depth > *max_depth)
20515f2eab64SJohn Marino 		  *max_depth = params_depth;
20525f2eab64SJohn Marino 	      }
2053*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
20545f2eab64SJohn Marino 	    break;
20555f2eab64SJohn Marino 	  }
20565f2eab64SJohn Marino 	default:
20575f2eab64SJohn Marino 	  assert(0);
20585f2eab64SJohn Marino 	  break;
20595f2eab64SJohn Marino 	}
20605f2eab64SJohn Marino     }
20615f2eab64SJohn Marino 
20625f2eab64SJohn Marino   *position += pos_add_total;
20635f2eab64SJohn Marino 
20645f2eab64SJohn Marino   /* `max_pos' should never be larger than `*position' if the above
20655f2eab64SJohn Marino      code works, but just an extra safeguard let's make sure
20665f2eab64SJohn Marino      `*position' is set large enough so enough memory will be
20675f2eab64SJohn Marino      allocated for the transition table. */
20685f2eab64SJohn Marino   if (max_pos > *position)
20695f2eab64SJohn Marino     *position = max_pos;
20705f2eab64SJohn Marino 
20715f2eab64SJohn Marino #ifdef TRE_DEBUG
20725f2eab64SJohn Marino   DPRINT(("Expanded AST:\n"));
20735f2eab64SJohn Marino   tre_ast_print(ast);
20745f2eab64SJohn Marino   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
20755f2eab64SJohn Marino #endif
20765f2eab64SJohn Marino 
20775f2eab64SJohn Marino   return status;
20785f2eab64SJohn Marino }
20795f2eab64SJohn Marino 
20805f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_empty(tre_mem_t mem)20815f2eab64SJohn Marino tre_set_empty(tre_mem_t mem)
20825f2eab64SJohn Marino {
20835f2eab64SJohn Marino   tre_pos_and_tags_t *new_set;
20845f2eab64SJohn Marino 
20855f2eab64SJohn Marino   new_set = tre_mem_calloc(mem, sizeof(*new_set));
20865f2eab64SJohn Marino   if (new_set == NULL)
20875f2eab64SJohn Marino     return NULL;
20885f2eab64SJohn Marino 
20895f2eab64SJohn Marino   new_set[0].position = -1;
20905f2eab64SJohn Marino   new_set[0].code_min = -1;
20915f2eab64SJohn Marino   new_set[0].code_max = -1;
20925f2eab64SJohn Marino 
20935f2eab64SJohn Marino   return new_set;
20945f2eab64SJohn Marino }
20955f2eab64SJohn Marino 
20965f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_one(tre_mem_t mem,int position,int code_min,int code_max,tre_bracket_match_list_t * bracket_match_list,int backref)20975f2eab64SJohn Marino tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2098*d5f8dde1SJohn Marino 	    tre_bracket_match_list_t *bracket_match_list, int backref)
20995f2eab64SJohn Marino {
21005f2eab64SJohn Marino   tre_pos_and_tags_t *new_set;
21015f2eab64SJohn Marino 
21025f2eab64SJohn Marino   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
21035f2eab64SJohn Marino   if (new_set == NULL)
21045f2eab64SJohn Marino     return NULL;
21055f2eab64SJohn Marino 
21065f2eab64SJohn Marino   new_set[0].position = position;
21075f2eab64SJohn Marino   new_set[0].code_min = code_min;
21085f2eab64SJohn Marino   new_set[0].code_max = code_max;
2109*d5f8dde1SJohn Marino   new_set[0].bracket_match_list = bracket_match_list;
21105f2eab64SJohn Marino   new_set[0].backref = backref;
21115f2eab64SJohn Marino   new_set[1].position = -1;
21125f2eab64SJohn Marino   new_set[1].code_min = -1;
21135f2eab64SJohn Marino   new_set[1].code_max = -1;
21145f2eab64SJohn Marino 
21155f2eab64SJohn Marino   return new_set;
21165f2eab64SJohn Marino }
21175f2eab64SJohn Marino 
21185f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_union(tre_mem_t mem,tre_pos_and_tags_t * set1,tre_pos_and_tags_t * set2,int * tags,int assertions,int * params)21195f2eab64SJohn Marino tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
21205f2eab64SJohn Marino 	      int *tags, int assertions, int *params)
21215f2eab64SJohn Marino {
21225f2eab64SJohn Marino   int s1, s2, i, j;
21235f2eab64SJohn Marino   tre_pos_and_tags_t *new_set;
21245f2eab64SJohn Marino   int *new_tags;
21255f2eab64SJohn Marino   int num_tags;
21265f2eab64SJohn Marino 
21275f2eab64SJohn Marino   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
21285f2eab64SJohn Marino   for (s1 = 0; set1[s1].position >= 0; s1++);
21295f2eab64SJohn Marino   for (s2 = 0; set2[s2].position >= 0; s2++);
21305f2eab64SJohn Marino   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
21315f2eab64SJohn Marino   if (!new_set )
21325f2eab64SJohn Marino     return NULL;
21335f2eab64SJohn Marino 
21345f2eab64SJohn Marino   for (s1 = 0; set1[s1].position >= 0; s1++)
21355f2eab64SJohn Marino     {
21365f2eab64SJohn Marino       new_set[s1].position = set1[s1].position;
21375f2eab64SJohn Marino       new_set[s1].code_min = set1[s1].code_min;
21385f2eab64SJohn Marino       new_set[s1].code_max = set1[s1].code_max;
21395f2eab64SJohn Marino       new_set[s1].assertions = set1[s1].assertions | assertions;
2140*d5f8dde1SJohn Marino       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
21415f2eab64SJohn Marino       new_set[s1].backref = set1[s1].backref;
21425f2eab64SJohn Marino       if (set1[s1].tags == NULL && tags == NULL)
21435f2eab64SJohn Marino 	new_set[s1].tags = NULL;
21445f2eab64SJohn Marino       else
21455f2eab64SJohn Marino 	{
21465f2eab64SJohn Marino 	  for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
21475f2eab64SJohn Marino 	  new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
21485f2eab64SJohn Marino 					 * (i + num_tags + 1)));
21495f2eab64SJohn Marino 	  if (new_tags == NULL)
21505f2eab64SJohn Marino 	    return NULL;
21515f2eab64SJohn Marino 	  for (j = 0; j < i; j++)
21525f2eab64SJohn Marino 	    new_tags[j] = set1[s1].tags[j];
21535f2eab64SJohn Marino 	  for (i = 0; i < num_tags; i++)
21545f2eab64SJohn Marino 	    new_tags[j + i] = tags[i];
21555f2eab64SJohn Marino 	  new_tags[j + i] = -1;
21565f2eab64SJohn Marino 	  new_set[s1].tags = new_tags;
21575f2eab64SJohn Marino 	}
21585f2eab64SJohn Marino       if (set1[s1].params)
21595f2eab64SJohn Marino 	new_set[s1].params = set1[s1].params;
21605f2eab64SJohn Marino       if (params)
21615f2eab64SJohn Marino 	{
21625f2eab64SJohn Marino 	  if (!new_set[s1].params)
21635f2eab64SJohn Marino 	    new_set[s1].params = params;
21645f2eab64SJohn Marino 	  else
21655f2eab64SJohn Marino 	    {
21665f2eab64SJohn Marino 	      new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
21675f2eab64SJohn Marino 						 TRE_PARAM_LAST);
21685f2eab64SJohn Marino 	      if (!new_set[s1].params)
21695f2eab64SJohn Marino 		return NULL;
21705f2eab64SJohn Marino 	      for (i = 0; i < TRE_PARAM_LAST; i++)
21715f2eab64SJohn Marino 		if (params[i] != TRE_PARAM_UNSET)
21725f2eab64SJohn Marino 		  new_set[s1].params[i] = params[i];
21735f2eab64SJohn Marino 	    }
21745f2eab64SJohn Marino 	}
21755f2eab64SJohn Marino     }
21765f2eab64SJohn Marino 
21775f2eab64SJohn Marino   for (s2 = 0; set2[s2].position >= 0; s2++)
21785f2eab64SJohn Marino     {
21795f2eab64SJohn Marino       new_set[s1 + s2].position = set2[s2].position;
21805f2eab64SJohn Marino       new_set[s1 + s2].code_min = set2[s2].code_min;
21815f2eab64SJohn Marino       new_set[s1 + s2].code_max = set2[s2].code_max;
21825f2eab64SJohn Marino       /* XXX - why not | assertions here as well? */
21835f2eab64SJohn Marino       new_set[s1 + s2].assertions = set2[s2].assertions;
2184*d5f8dde1SJohn Marino       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
21855f2eab64SJohn Marino       new_set[s1 + s2].backref = set2[s2].backref;
21865f2eab64SJohn Marino       if (set2[s2].tags == NULL)
21875f2eab64SJohn Marino 	new_set[s1 + s2].tags = NULL;
21885f2eab64SJohn Marino       else
21895f2eab64SJohn Marino 	{
21905f2eab64SJohn Marino 	  for (i = 0; set2[s2].tags[i] >= 0; i++);
21915f2eab64SJohn Marino 	  new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
21925f2eab64SJohn Marino 	  if (new_tags == NULL)
21935f2eab64SJohn Marino 	    return NULL;
21945f2eab64SJohn Marino 	  for (j = 0; j < i; j++)
21955f2eab64SJohn Marino 	    new_tags[j] = set2[s2].tags[j];
21965f2eab64SJohn Marino 	  new_tags[j] = -1;
21975f2eab64SJohn Marino 	  new_set[s1 + s2].tags = new_tags;
21985f2eab64SJohn Marino 	}
21995f2eab64SJohn Marino       if (set2[s2].params)
22005f2eab64SJohn Marino 	new_set[s1 + s2].params = set2[s2].params;
22015f2eab64SJohn Marino       if (params)
22025f2eab64SJohn Marino 	{
22035f2eab64SJohn Marino 	  if (!new_set[s1 + s2].params)
22045f2eab64SJohn Marino 	    new_set[s1 + s2].params = params;
22055f2eab64SJohn Marino 	  else
22065f2eab64SJohn Marino 	    {
22075f2eab64SJohn Marino 	      new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
22085f2eab64SJohn Marino 						      TRE_PARAM_LAST);
22095f2eab64SJohn Marino 	      if (!new_set[s1 + s2].params)
22105f2eab64SJohn Marino 		return NULL;
22115f2eab64SJohn Marino 	      for (i = 0; i < TRE_PARAM_LAST; i++)
22125f2eab64SJohn Marino 		if (params[i] != TRE_PARAM_UNSET)
22135f2eab64SJohn Marino 		  new_set[s1 + s2].params[i] = params[i];
22145f2eab64SJohn Marino 	    }
22155f2eab64SJohn Marino 	}
22165f2eab64SJohn Marino     }
22175f2eab64SJohn Marino   new_set[s1 + s2].position = -1;
22185f2eab64SJohn Marino   return new_set;
22195f2eab64SJohn Marino }
22205f2eab64SJohn Marino 
22215f2eab64SJohn Marino /* Finds the empty path through `node' which is the one that should be
22225f2eab64SJohn Marino    taken according to POSIX.2 rules, and adds the tags on that path to
22235f2eab64SJohn Marino    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
22245f2eab64SJohn Marino    set to the number of tags seen on the path. */
22255f2eab64SJohn Marino static reg_errcode_t
tre_match_empty(tre_stack_t * stack,tre_ast_node_t * node,int * tags,int * assertions,int * params,int * num_tags_seen,int * params_seen)22265f2eab64SJohn Marino tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
22275f2eab64SJohn Marino 		int *assertions, int *params, int *num_tags_seen,
22285f2eab64SJohn Marino 		int *params_seen)
22295f2eab64SJohn Marino {
22305f2eab64SJohn Marino   tre_literal_t *lit;
22315f2eab64SJohn Marino   tre_union_t *uni;
22325f2eab64SJohn Marino   tre_catenation_t *cat;
22335f2eab64SJohn Marino   tre_iteration_t *iter;
22345f2eab64SJohn Marino   int i;
22355f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
22365f2eab64SJohn Marino   reg_errcode_t status = REG_OK;
22375f2eab64SJohn Marino   if (num_tags_seen)
22385f2eab64SJohn Marino     *num_tags_seen = 0;
22395f2eab64SJohn Marino   if (params_seen)
22405f2eab64SJohn Marino     *params_seen = 0;
22415f2eab64SJohn Marino 
22425f2eab64SJohn Marino   status = tre_stack_push_voidptr(stack, node);
22435f2eab64SJohn Marino 
22445f2eab64SJohn Marino   /* Walk through the tree recursively. */
22455f2eab64SJohn Marino   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
22465f2eab64SJohn Marino     {
22475f2eab64SJohn Marino       node = tre_stack_pop_voidptr(stack);
22485f2eab64SJohn Marino 
22495f2eab64SJohn Marino       switch (node->type)
22505f2eab64SJohn Marino 	{
22515f2eab64SJohn Marino 	case LITERAL:
22525f2eab64SJohn Marino 	  lit = (tre_literal_t *)node->obj;
22535f2eab64SJohn Marino 	  switch (lit->code_min)
22545f2eab64SJohn Marino 	    {
22555f2eab64SJohn Marino 	    case TAG:
22565f2eab64SJohn Marino 	      if (lit->code_max >= 0)
22575f2eab64SJohn Marino 		{
22585f2eab64SJohn Marino 		  if (tags != NULL)
22595f2eab64SJohn Marino 		    {
22605f2eab64SJohn Marino 		      /* Add the tag to `tags'. */
22615f2eab64SJohn Marino 		      for (i = 0; tags[i] >= 0; i++)
22625f2eab64SJohn Marino 			if (tags[i] == lit->code_max)
22635f2eab64SJohn Marino 			  break;
22645f2eab64SJohn Marino 		      if (tags[i] < 0)
22655f2eab64SJohn Marino 			{
22665f2eab64SJohn Marino 			  tags[i] = lit->code_max;
22675f2eab64SJohn Marino 			  tags[i + 1] = -1;
22685f2eab64SJohn Marino 			}
22695f2eab64SJohn Marino 		    }
22705f2eab64SJohn Marino 		  if (num_tags_seen)
22715f2eab64SJohn Marino 		    (*num_tags_seen)++;
22725f2eab64SJohn Marino 		}
22735f2eab64SJohn Marino 	      break;
22745f2eab64SJohn Marino 	    case ASSERTION:
22755f2eab64SJohn Marino 	      assert(lit->code_max >= 1
22765f2eab64SJohn Marino 		     || lit->code_max <= ASSERT_LAST);
22775f2eab64SJohn Marino 	      if (assertions != NULL)
22785f2eab64SJohn Marino 		*assertions |= lit->code_max;
22795f2eab64SJohn Marino 	      break;
22805f2eab64SJohn Marino 	    case PARAMETER:
22815f2eab64SJohn Marino 	      if (params != NULL)
22825f2eab64SJohn Marino 		for (i = 0; i < TRE_PARAM_LAST; i++)
22835f2eab64SJohn Marino 		  params[i] = lit->u.params[i];
22845f2eab64SJohn Marino 	      if (params_seen != NULL)
22855f2eab64SJohn Marino 		*params_seen = 1;
22865f2eab64SJohn Marino 	      break;
22875f2eab64SJohn Marino 	    case EMPTY:
22885f2eab64SJohn Marino 	      break;
22895f2eab64SJohn Marino 	    default:
22905f2eab64SJohn Marino 	      assert(0);
22915f2eab64SJohn Marino 	      break;
22925f2eab64SJohn Marino 	    }
22935f2eab64SJohn Marino 	  break;
22945f2eab64SJohn Marino 
22955f2eab64SJohn Marino 	case UNION:
22965f2eab64SJohn Marino 	  /* Subexpressions starting earlier take priority over ones
22975f2eab64SJohn Marino 	     starting later, so we prefer the left subexpression over the
22985f2eab64SJohn Marino 	     right subexpression. */
22995f2eab64SJohn Marino 	  uni = (tre_union_t *)node->obj;
23005f2eab64SJohn Marino 	  if (uni->left->nullable)
23015f2eab64SJohn Marino 	    STACK_PUSHX(stack, voidptr, uni->left)
23025f2eab64SJohn Marino 	  else if (uni->right->nullable)
23035f2eab64SJohn Marino 	    STACK_PUSHX(stack, voidptr, uni->right)
23045f2eab64SJohn Marino 	  else
23055f2eab64SJohn Marino 	    assert(0);
23065f2eab64SJohn Marino 	  break;
23075f2eab64SJohn Marino 
23085f2eab64SJohn Marino 	case CATENATION:
23095f2eab64SJohn Marino 	  /* The path must go through both children. */
23105f2eab64SJohn Marino 	  cat = (tre_catenation_t *)node->obj;
23115f2eab64SJohn Marino 	  assert(cat->left->nullable);
23125f2eab64SJohn Marino 	  assert(cat->right->nullable);
23135f2eab64SJohn Marino 	  STACK_PUSHX(stack, voidptr, cat->left);
23145f2eab64SJohn Marino 	  STACK_PUSHX(stack, voidptr, cat->right);
23155f2eab64SJohn Marino 	  break;
23165f2eab64SJohn Marino 
23175f2eab64SJohn Marino 	case ITERATION:
23185f2eab64SJohn Marino 	  /* A match with an empty string is preferred over no match at
23195f2eab64SJohn Marino 	     all, so we go through the argument if possible. */
23205f2eab64SJohn Marino 	  iter = (tre_iteration_t *)node->obj;
23215f2eab64SJohn Marino 	  if (iter->arg->nullable)
23225f2eab64SJohn Marino 	    STACK_PUSHX(stack, voidptr, iter->arg);
23235f2eab64SJohn Marino 	  break;
23245f2eab64SJohn Marino 
23255f2eab64SJohn Marino 	default:
23265f2eab64SJohn Marino 	  assert(0);
23275f2eab64SJohn Marino 	  break;
23285f2eab64SJohn Marino 	}
23295f2eab64SJohn Marino     }
23305f2eab64SJohn Marino 
23315f2eab64SJohn Marino   return status;
23325f2eab64SJohn Marino }
23335f2eab64SJohn Marino 
23345f2eab64SJohn Marino 
23355f2eab64SJohn Marino typedef enum {
23365f2eab64SJohn Marino   NFL_RECURSE,
23375f2eab64SJohn Marino   NFL_POST_UNION,
23385f2eab64SJohn Marino   NFL_POST_CATENATION,
23395f2eab64SJohn Marino   NFL_POST_ITERATION
23405f2eab64SJohn Marino } tre_nfl_stack_symbol_t;
23415f2eab64SJohn Marino 
23425f2eab64SJohn Marino 
23435f2eab64SJohn Marino /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
23445f2eab64SJohn Marino    the nodes of the AST `tree'. */
23455f2eab64SJohn Marino static reg_errcode_t
tre_compute_nfl(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree)23465f2eab64SJohn Marino tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
23475f2eab64SJohn Marino {
23485f2eab64SJohn Marino   int bottom = tre_stack_num_objects(stack);
23495f2eab64SJohn Marino 
23505f2eab64SJohn Marino   STACK_PUSHR(stack, voidptr, tree);
23515f2eab64SJohn Marino   STACK_PUSHR(stack, int, NFL_RECURSE);
23525f2eab64SJohn Marino 
23535f2eab64SJohn Marino   while (tre_stack_num_objects(stack) > bottom)
23545f2eab64SJohn Marino     {
23555f2eab64SJohn Marino       tre_nfl_stack_symbol_t symbol;
23565f2eab64SJohn Marino       tre_ast_node_t *node;
23575f2eab64SJohn Marino 
23585f2eab64SJohn Marino       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
23595f2eab64SJohn Marino       node = tre_stack_pop_voidptr(stack);
23605f2eab64SJohn Marino       switch (symbol)
23615f2eab64SJohn Marino 	{
23625f2eab64SJohn Marino 	case NFL_RECURSE:
23635f2eab64SJohn Marino 	  switch (node->type)
23645f2eab64SJohn Marino 	    {
23655f2eab64SJohn Marino 	    case LITERAL:
23665f2eab64SJohn Marino 	      {
23675f2eab64SJohn Marino 		tre_literal_t *lit = (tre_literal_t *)node->obj;
23685f2eab64SJohn Marino 		if (IS_BACKREF(lit))
23695f2eab64SJohn Marino 		  {
23705f2eab64SJohn Marino 		    /* Back references: nullable = false, firstpos = {i},
23715f2eab64SJohn Marino 		       lastpos = {i}. */
23725f2eab64SJohn Marino 		    node->nullable = 0;
23735f2eab64SJohn Marino 		    node->firstpos = tre_set_one(mem, lit->position, 0,
2374*d5f8dde1SJohn Marino 					     TRE_CHAR_MAX, NULL, -1);
23755f2eab64SJohn Marino 		    if (!node->firstpos)
23765f2eab64SJohn Marino 		      return REG_ESPACE;
23775f2eab64SJohn Marino 		    node->lastpos = tre_set_one(mem, lit->position, 0,
2378*d5f8dde1SJohn Marino 						TRE_CHAR_MAX, NULL,
23795f2eab64SJohn Marino 						(int)lit->code_max);
23805f2eab64SJohn Marino 		    if (!node->lastpos)
23815f2eab64SJohn Marino 		      return REG_ESPACE;
23825f2eab64SJohn Marino 		  }
23835f2eab64SJohn Marino 		else if (lit->code_min < 0)
23845f2eab64SJohn Marino 		  {
23855f2eab64SJohn Marino 		    /* Tags, empty strings, params, and zero width assertions:
23865f2eab64SJohn Marino 		       nullable = true, firstpos = {}, and lastpos = {}. */
23875f2eab64SJohn Marino 		    node->nullable = 1;
23885f2eab64SJohn Marino 		    node->firstpos = tre_set_empty(mem);
23895f2eab64SJohn Marino 		    if (!node->firstpos)
23905f2eab64SJohn Marino 		      return REG_ESPACE;
23915f2eab64SJohn Marino 		    node->lastpos = tre_set_empty(mem);
23925f2eab64SJohn Marino 		    if (!node->lastpos)
23935f2eab64SJohn Marino 		      return REG_ESPACE;
23945f2eab64SJohn Marino 		  }
23955f2eab64SJohn Marino 		else
23965f2eab64SJohn Marino 		  {
23975f2eab64SJohn Marino 		    /* Literal at position i: nullable = false, firstpos = {i},
23985f2eab64SJohn Marino 		       lastpos = {i}. */
23995f2eab64SJohn Marino 		    node->nullable = 0;
24005f2eab64SJohn Marino 		    node->firstpos =
24015f2eab64SJohn Marino 		      tre_set_one(mem, lit->position, (int)lit->code_min,
2402*d5f8dde1SJohn Marino 				  (int)lit->code_max, NULL, -1);
24035f2eab64SJohn Marino 		    if (!node->firstpos)
24045f2eab64SJohn Marino 		      return REG_ESPACE;
24055f2eab64SJohn Marino 		    node->lastpos = tre_set_one(mem, lit->position,
24065f2eab64SJohn Marino 						(int)lit->code_min,
24075f2eab64SJohn Marino 						(int)lit->code_max,
2408*d5f8dde1SJohn Marino 						lit->u.bracket_match_list,
24095f2eab64SJohn Marino 						-1);
24105f2eab64SJohn Marino 		    if (!node->lastpos)
24115f2eab64SJohn Marino 		      return REG_ESPACE;
24125f2eab64SJohn Marino 		  }
24135f2eab64SJohn Marino 		break;
24145f2eab64SJohn Marino 	      }
24155f2eab64SJohn Marino 
24165f2eab64SJohn Marino 	    case UNION:
24175f2eab64SJohn Marino 	      /* Compute the attributes for the two subtrees, and after that
24185f2eab64SJohn Marino 		 for this node. */
24195f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, node);
24205f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_POST_UNION);
24215f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
24225f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_RECURSE);
24235f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
24245f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_RECURSE);
24255f2eab64SJohn Marino 	      break;
24265f2eab64SJohn Marino 
24275f2eab64SJohn Marino 	    case CATENATION:
24285f2eab64SJohn Marino 	      /* Compute the attributes for the two subtrees, and after that
24295f2eab64SJohn Marino 		 for this node. */
24305f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, node);
24315f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_POST_CATENATION);
24325f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
24335f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_RECURSE);
24345f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
24355f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_RECURSE);
24365f2eab64SJohn Marino 	      break;
24375f2eab64SJohn Marino 
24385f2eab64SJohn Marino 	    case ITERATION:
24395f2eab64SJohn Marino 	      /* Compute the attributes for the subtree, and after that for
24405f2eab64SJohn Marino 		 this node. */
24415f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, node);
24425f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_POST_ITERATION);
24435f2eab64SJohn Marino 	      STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
24445f2eab64SJohn Marino 	      STACK_PUSHR(stack, int, NFL_RECURSE);
24455f2eab64SJohn Marino 	      break;
24465f2eab64SJohn Marino 	    }
24475f2eab64SJohn Marino 	  break; /* end case: NFL_RECURSE */
24485f2eab64SJohn Marino 
24495f2eab64SJohn Marino 	case NFL_POST_UNION:
24505f2eab64SJohn Marino 	  {
24515f2eab64SJohn Marino 	    tre_union_t *uni = (tre_union_t *)node->obj;
24525f2eab64SJohn Marino 	    node->nullable = uni->left->nullable || uni->right->nullable;
24535f2eab64SJohn Marino 	    node->firstpos = tre_set_union(mem, uni->left->firstpos,
24545f2eab64SJohn Marino 					   uni->right->firstpos, NULL, 0, NULL);
24555f2eab64SJohn Marino 	    if (!node->firstpos)
24565f2eab64SJohn Marino 	      return REG_ESPACE;
24575f2eab64SJohn Marino 	    node->lastpos = tre_set_union(mem, uni->left->lastpos,
24585f2eab64SJohn Marino 					  uni->right->lastpos, NULL, 0, NULL);
24595f2eab64SJohn Marino 	    if (!node->lastpos)
24605f2eab64SJohn Marino 	      return REG_ESPACE;
24615f2eab64SJohn Marino 	    break;
24625f2eab64SJohn Marino 	  }
24635f2eab64SJohn Marino 
24645f2eab64SJohn Marino 	case NFL_POST_ITERATION:
24655f2eab64SJohn Marino 	  {
2466*d5f8dde1SJohn Marino 	    int num_tags, *tags, assertions, params_seen;
2467*d5f8dde1SJohn Marino 	    int *params;
2468*d5f8dde1SJohn Marino 	    reg_errcode_t status;
24695f2eab64SJohn Marino 	    tre_iteration_t *iter = (tre_iteration_t *)node->obj;
24705f2eab64SJohn Marino 
2471*d5f8dde1SJohn Marino 	    /* From Ville Laurikari's original 2001 Master's thesis, the
2472*d5f8dde1SJohn Marino 	       firstpos(n) and lastpos(n) of an iteration is just the
2473*d5f8dde1SJohn Marino 	       corresponding values of the iteration's argument.  Unfortunately,
2474*d5f8dde1SJohn Marino 	       this isn't sufficient for the following BRE:
2475*d5f8dde1SJohn Marino 
2476*d5f8dde1SJohn Marino 	           \(a*\)*b\(\1\)    matched against    ab
2477*d5f8dde1SJohn Marino 
2478*d5f8dde1SJohn Marino 	       The backreference wants to force the first subexpression to
2479*d5f8dde1SJohn Marino 	       be the empty string, but there is no transition for this.  So
2480*d5f8dde1SJohn Marino 	       we need to modify the lastpos(n) of an iteration to be the
2481*d5f8dde1SJohn Marino 	       equivalent of that of catentation.  Using the same notation as
2482*d5f8dde1SJohn Marino 	       in the thesis, lastpos(n) is redefined as:
2483*d5f8dde1SJohn Marino 
2484*d5f8dde1SJohn Marino 	           if nullable(c1) then
2485*d5f8dde1SJohn Marino 		       lastpos(c1) U
2486*d5f8dde1SJohn Marino 		       addtags(lastpos(c1),
2487*d5f8dde1SJohn Marino 		               emptymatch(c1))
24885f2eab64SJohn Marino 		   else
2489*d5f8dde1SJohn Marino 		       lastpos(c1)
2490*d5f8dde1SJohn Marino 
2491*d5f8dde1SJohn Marino 	       where c1 is the argument node.  firstpos(n) remains the same. */
2492*d5f8dde1SJohn Marino 
2493*d5f8dde1SJohn Marino 	    /* Compute lastpos. */
2494*d5f8dde1SJohn Marino 	    if (iter->min == 0 || iter->arg->nullable)
2495*d5f8dde1SJohn Marino 	      {
2496*d5f8dde1SJohn Marino 		node->nullable = 1;
2497*d5f8dde1SJohn Marino 		if (iter->arg->nullable)
2498*d5f8dde1SJohn Marino 		  {
2499*d5f8dde1SJohn Marino 		    /* The arg matches the empty string.  Make a first pass
2500*d5f8dde1SJohn Marino 		       with tre_match_empty() to get the number of tags and
2501*d5f8dde1SJohn Marino 		       parameters. */
2502*d5f8dde1SJohn Marino 		    status = tre_match_empty(stack, iter->arg,
2503*d5f8dde1SJohn Marino 					     NULL, NULL, NULL, &num_tags,
2504*d5f8dde1SJohn Marino 					     &params_seen);
2505*d5f8dde1SJohn Marino 		    if (status != REG_OK)
2506*d5f8dde1SJohn Marino 		      return status;
2507*d5f8dde1SJohn Marino 		    /* Allocate arrays for the tags and parameters. */
2508*d5f8dde1SJohn Marino 		    tags = xmalloc(sizeof(int) * (num_tags + 1));
2509*d5f8dde1SJohn Marino 		    if (!tags)
2510*d5f8dde1SJohn Marino 		      return REG_ESPACE;
2511*d5f8dde1SJohn Marino 		    tags[0] = -1;
2512*d5f8dde1SJohn Marino 		    assertions = 0;
2513*d5f8dde1SJohn Marino 		    params = NULL;
2514*d5f8dde1SJohn Marino 		    if (params_seen)
2515*d5f8dde1SJohn Marino 		      {
2516*d5f8dde1SJohn Marino 			params = tre_mem_alloc(mem, sizeof(*params)
2517*d5f8dde1SJohn Marino 					       * TRE_PARAM_LAST);
2518*d5f8dde1SJohn Marino 			if (!params)
2519*d5f8dde1SJohn Marino 			  {
2520*d5f8dde1SJohn Marino 			    xfree(tags);
2521*d5f8dde1SJohn Marino 			    return REG_ESPACE;
2522*d5f8dde1SJohn Marino 			  }
2523*d5f8dde1SJohn Marino 		      }
2524*d5f8dde1SJohn Marino 		    /* Second pass with tre_mach_empty() to get the list of
2525*d5f8dde1SJohn Marino 		       tags and parameters. */
2526*d5f8dde1SJohn Marino 		    status = tre_match_empty(stack, iter->arg, tags,
2527*d5f8dde1SJohn Marino 					     &assertions, params, NULL, NULL);
2528*d5f8dde1SJohn Marino 		    if (status != REG_OK)
2529*d5f8dde1SJohn Marino 		      {
2530*d5f8dde1SJohn Marino 			xfree(tags);
2531*d5f8dde1SJohn Marino 			return status;
2532*d5f8dde1SJohn Marino 		      }
2533*d5f8dde1SJohn Marino 		    node->lastpos =
2534*d5f8dde1SJohn Marino 		      tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2535*d5f8dde1SJohn Marino 				    tags, assertions, params);
2536*d5f8dde1SJohn Marino 		    xfree(tags);
2537*d5f8dde1SJohn Marino 		    if (!node->lastpos)
2538*d5f8dde1SJohn Marino 		      return REG_ESPACE;
2539*d5f8dde1SJohn Marino 		  }
2540*d5f8dde1SJohn Marino 		else
25415f2eab64SJohn Marino 		  node->lastpos = iter->arg->lastpos;
2542*d5f8dde1SJohn Marino 	      }
2543*d5f8dde1SJohn Marino 	    else
2544*d5f8dde1SJohn Marino 	      {
2545*d5f8dde1SJohn Marino 		node->nullable = 0;
2546*d5f8dde1SJohn Marino 		node->lastpos = iter->arg->lastpos;
2547*d5f8dde1SJohn Marino 	      }
2548*d5f8dde1SJohn Marino 	    node->firstpos = iter->arg->firstpos;
25495f2eab64SJohn Marino 	    break;
25505f2eab64SJohn Marino 	  }
25515f2eab64SJohn Marino 
25525f2eab64SJohn Marino 	case NFL_POST_CATENATION:
25535f2eab64SJohn Marino 	  {
25545f2eab64SJohn Marino 	    int num_tags, *tags, assertions, params_seen;
25555f2eab64SJohn Marino 	    int *params;
25565f2eab64SJohn Marino 	    reg_errcode_t status;
25575f2eab64SJohn Marino 	    tre_catenation_t *cat = node->obj;
25585f2eab64SJohn Marino 	    node->nullable = cat->left->nullable && cat->right->nullable;
25595f2eab64SJohn Marino 
25605f2eab64SJohn Marino 	    /* Compute firstpos. */
25615f2eab64SJohn Marino 	    if (cat->left->nullable)
25625f2eab64SJohn Marino 	      {
25635f2eab64SJohn Marino 		/* The left side matches the empty string.  Make a first pass
25645f2eab64SJohn Marino 		   with tre_match_empty() to get the number of tags and
25655f2eab64SJohn Marino 		   parameters. */
25665f2eab64SJohn Marino 		status = tre_match_empty(stack, cat->left,
25675f2eab64SJohn Marino 					 NULL, NULL, NULL, &num_tags,
25685f2eab64SJohn Marino 					 &params_seen);
25695f2eab64SJohn Marino 		if (status != REG_OK)
25705f2eab64SJohn Marino 		  return status;
25715f2eab64SJohn Marino 		/* Allocate arrays for the tags and parameters. */
25725f2eab64SJohn Marino 		tags = xmalloc(sizeof(*tags) * (num_tags + 1));
25735f2eab64SJohn Marino 		if (!tags)
25745f2eab64SJohn Marino 		  return REG_ESPACE;
25755f2eab64SJohn Marino 		tags[0] = -1;
25765f2eab64SJohn Marino 		assertions = 0;
25775f2eab64SJohn Marino 		params = NULL;
25785f2eab64SJohn Marino 		if (params_seen)
25795f2eab64SJohn Marino 		  {
25805f2eab64SJohn Marino 		    params = tre_mem_alloc(mem, sizeof(*params)
25815f2eab64SJohn Marino 					   * TRE_PARAM_LAST);
25825f2eab64SJohn Marino 		    if (!params)
25835f2eab64SJohn Marino 		      {
25845f2eab64SJohn Marino 			xfree(tags);
25855f2eab64SJohn Marino 			return REG_ESPACE;
25865f2eab64SJohn Marino 		      }
25875f2eab64SJohn Marino 		  }
25885f2eab64SJohn Marino 		/* Second pass with tre_mach_empty() to get the list of
25895f2eab64SJohn Marino 		   tags and parameters. */
25905f2eab64SJohn Marino 		status = tre_match_empty(stack, cat->left, tags,
25915f2eab64SJohn Marino 					 &assertions, params, NULL, NULL);
25925f2eab64SJohn Marino 		if (status != REG_OK)
25935f2eab64SJohn Marino 		  {
25945f2eab64SJohn Marino 		    xfree(tags);
25955f2eab64SJohn Marino 		    return status;
25965f2eab64SJohn Marino 		  }
25975f2eab64SJohn Marino 		node->firstpos =
25985f2eab64SJohn Marino 		  tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
25995f2eab64SJohn Marino 				tags, assertions, params);
26005f2eab64SJohn Marino 		xfree(tags);
26015f2eab64SJohn Marino 		if (!node->firstpos)
26025f2eab64SJohn Marino 		  return REG_ESPACE;
26035f2eab64SJohn Marino 	      }
26045f2eab64SJohn Marino 	    else
26055f2eab64SJohn Marino 	      {
26065f2eab64SJohn Marino 		node->firstpos = cat->left->firstpos;
26075f2eab64SJohn Marino 	      }
26085f2eab64SJohn Marino 
26095f2eab64SJohn Marino 	    /* Compute lastpos. */
26105f2eab64SJohn Marino 	    if (cat->right->nullable)
26115f2eab64SJohn Marino 	      {
26125f2eab64SJohn Marino 		/* The right side matches the empty string.  Make a first pass
26135f2eab64SJohn Marino 		   with tre_match_empty() to get the number of tags and
26145f2eab64SJohn Marino 		   parameters. */
26155f2eab64SJohn Marino 		status = tre_match_empty(stack, cat->right,
26165f2eab64SJohn Marino 					 NULL, NULL, NULL, &num_tags,
26175f2eab64SJohn Marino 					 &params_seen);
26185f2eab64SJohn Marino 		if (status != REG_OK)
26195f2eab64SJohn Marino 		  return status;
26205f2eab64SJohn Marino 		/* Allocate arrays for the tags and parameters. */
26215f2eab64SJohn Marino 		tags = xmalloc(sizeof(int) * (num_tags + 1));
26225f2eab64SJohn Marino 		if (!tags)
26235f2eab64SJohn Marino 		  return REG_ESPACE;
26245f2eab64SJohn Marino 		tags[0] = -1;
26255f2eab64SJohn Marino 		assertions = 0;
26265f2eab64SJohn Marino 		params = NULL;
26275f2eab64SJohn Marino 		if (params_seen)
26285f2eab64SJohn Marino 		  {
26295f2eab64SJohn Marino 		    params = tre_mem_alloc(mem, sizeof(*params)
26305f2eab64SJohn Marino 					   * TRE_PARAM_LAST);
26315f2eab64SJohn Marino 		    if (!params)
26325f2eab64SJohn Marino 		      {
26335f2eab64SJohn Marino 			xfree(tags);
26345f2eab64SJohn Marino 			return REG_ESPACE;
26355f2eab64SJohn Marino 		      }
26365f2eab64SJohn Marino 		  }
26375f2eab64SJohn Marino 		/* Second pass with tre_mach_empty() to get the list of
26385f2eab64SJohn Marino 		   tags and parameters. */
26395f2eab64SJohn Marino 		status = tre_match_empty(stack, cat->right, tags,
26405f2eab64SJohn Marino 					 &assertions, params, NULL, NULL);
26415f2eab64SJohn Marino 		if (status != REG_OK)
26425f2eab64SJohn Marino 		  {
26435f2eab64SJohn Marino 		    xfree(tags);
26445f2eab64SJohn Marino 		    return status;
26455f2eab64SJohn Marino 		  }
26465f2eab64SJohn Marino 		node->lastpos =
26475f2eab64SJohn Marino 		  tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
26485f2eab64SJohn Marino 				tags, assertions, params);
26495f2eab64SJohn Marino 		xfree(tags);
26505f2eab64SJohn Marino 		if (!node->lastpos)
26515f2eab64SJohn Marino 		  return REG_ESPACE;
26525f2eab64SJohn Marino 	      }
26535f2eab64SJohn Marino 	    else
26545f2eab64SJohn Marino 	      {
26555f2eab64SJohn Marino 		node->lastpos = cat->right->lastpos;
26565f2eab64SJohn Marino 	      }
26575f2eab64SJohn Marino 	    break;
26585f2eab64SJohn Marino 	  }
26595f2eab64SJohn Marino 
26605f2eab64SJohn Marino 	default:
26615f2eab64SJohn Marino 	  assert(0);
26625f2eab64SJohn Marino 	  break;
26635f2eab64SJohn Marino 	}
26645f2eab64SJohn Marino     }
26655f2eab64SJohn Marino 
26665f2eab64SJohn Marino   return REG_OK;
26675f2eab64SJohn Marino }
26685f2eab64SJohn Marino 
26695f2eab64SJohn Marino 
26705f2eab64SJohn Marino /* Adds a transition from each position in `p1' to each position in `p2'. */
26715f2eab64SJohn Marino static reg_errcode_t
tre_make_trans(tre_pos_and_tags_t * p1,tre_pos_and_tags_t * p2,tre_tnfa_transition_t * transitions,int * counts,int * offs)26725f2eab64SJohn Marino tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
26735f2eab64SJohn Marino 	       tre_tnfa_transition_t *transitions,
26745f2eab64SJohn Marino 	       int *counts, int *offs)
26755f2eab64SJohn Marino {
26765f2eab64SJohn Marino   tre_pos_and_tags_t *orig_p2 = p2;
26775f2eab64SJohn Marino   tre_tnfa_transition_t *trans;
26785f2eab64SJohn Marino   int i, j, k, l, dup, prev_p2_pos;
26795f2eab64SJohn Marino 
26805f2eab64SJohn Marino   if (transitions != NULL)
26815f2eab64SJohn Marino     while (p1->position >= 0)
26825f2eab64SJohn Marino       {
26835f2eab64SJohn Marino 	p2 = orig_p2;
26845f2eab64SJohn Marino 	prev_p2_pos = -1;
26855f2eab64SJohn Marino 	while (p2->position >= 0)
26865f2eab64SJohn Marino 	  {
26875f2eab64SJohn Marino 	    /* Optimization: if this position was already handled, skip it. */
26885f2eab64SJohn Marino 	    if (p2->position == prev_p2_pos)
26895f2eab64SJohn Marino 	      {
26905f2eab64SJohn Marino 		p2++;
26915f2eab64SJohn Marino 		continue;
26925f2eab64SJohn Marino 	      }
26935f2eab64SJohn Marino 	    prev_p2_pos = p2->position;
26945f2eab64SJohn Marino 	    /* Set `trans' to point to the next unused transition from
26955f2eab64SJohn Marino 	       position `p1->position'. */
26965f2eab64SJohn Marino 	    trans = transitions + offs[p1->position];
26975f2eab64SJohn Marino 	    while (trans->state != NULL)
26985f2eab64SJohn Marino 	      {
26995f2eab64SJohn Marino #if 0
27005f2eab64SJohn Marino 		/* If we find a previous transition from `p1->position' to
27015f2eab64SJohn Marino 		   `p2->position', it is overwritten.  This can happen only
27025f2eab64SJohn Marino 		   if there are nested loops in the regexp, like in "((a)*)*".
27035f2eab64SJohn Marino 		   In POSIX.2 repetition using the outer loop is always
27045f2eab64SJohn Marino 		   preferred over using the inner loop.	 Therefore the
27055f2eab64SJohn Marino 		   transition for the inner loop is useless and can be thrown
27065f2eab64SJohn Marino 		   away. */
27075f2eab64SJohn Marino 		/* XXX - The same position is used for all nodes in a bracket
27085f2eab64SJohn Marino 		   expression, so this optimization cannot be used (it will
27095f2eab64SJohn Marino 		   break bracket expressions) unless I figure out a way to
27105f2eab64SJohn Marino 		   detect it here. */
27115f2eab64SJohn Marino 		if (trans->state_id == p2->position)
27125f2eab64SJohn Marino 		  {
27135f2eab64SJohn Marino 		    DPRINT(("*"));
27145f2eab64SJohn Marino 		    break;
27155f2eab64SJohn Marino 		  }
27165f2eab64SJohn Marino #endif
27175f2eab64SJohn Marino 		trans++;
27185f2eab64SJohn Marino 	      }
27195f2eab64SJohn Marino 
27205f2eab64SJohn Marino 	    if (trans->state == NULL)
27215f2eab64SJohn Marino 	      (trans + 1)->state = NULL;
27225f2eab64SJohn Marino 	    /* Use the character ranges, assertions, etc. from `p1' for
27235f2eab64SJohn Marino 	       the transition from `p1' to `p2'. */
27245f2eab64SJohn Marino 	    trans->code_min = p1->code_min;
27255f2eab64SJohn Marino 	    trans->code_max = p1->code_max;
27265f2eab64SJohn Marino 	    trans->state = transitions + offs[p2->position];
27275f2eab64SJohn Marino 	    trans->state_id = p2->position;
27285f2eab64SJohn Marino 	    trans->assertions = p1->assertions | p2->assertions
2729*d5f8dde1SJohn Marino 	      | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
27305f2eab64SJohn Marino 	    if (p1->backref >= 0)
27315f2eab64SJohn Marino 	      {
2732*d5f8dde1SJohn Marino 		assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
27335f2eab64SJohn Marino 		assert(p2->backref < 0);
27345f2eab64SJohn Marino 		trans->u.backref = p1->backref;
27355f2eab64SJohn Marino 		trans->assertions |= ASSERT_BACKREF;
27365f2eab64SJohn Marino 	      }
2737*d5f8dde1SJohn Marino 	    if (p1->bracket_match_list != NULL)
27385f2eab64SJohn Marino 	      {
2739*d5f8dde1SJohn Marino 		trans->u.bracket_match_list =
2740*d5f8dde1SJohn Marino 		  xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2741*d5f8dde1SJohn Marino 		if (trans->u.bracket_match_list == NULL)
27425f2eab64SJohn Marino 		  return REG_ESPACE;
2743*d5f8dde1SJohn Marino 		memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2744*d5f8dde1SJohn Marino 		       SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
27455f2eab64SJohn Marino 	      }
27465f2eab64SJohn Marino 
27475f2eab64SJohn Marino 	    /* Find out how many tags this transition has. */
27485f2eab64SJohn Marino 	    i = 0;
27495f2eab64SJohn Marino 	    if (p1->tags != NULL)
27505f2eab64SJohn Marino 	      while(p1->tags[i] >= 0)
27515f2eab64SJohn Marino 		i++;
27525f2eab64SJohn Marino 	    j = 0;
27535f2eab64SJohn Marino 	    if (p2->tags != NULL)
27545f2eab64SJohn Marino 	      while(p2->tags[j] >= 0)
27555f2eab64SJohn Marino 		j++;
27565f2eab64SJohn Marino 
27575f2eab64SJohn Marino 	    /* If we are overwriting a transition, free the old tag array. */
27585f2eab64SJohn Marino 	    if (trans->tags != NULL)
27595f2eab64SJohn Marino 	      xfree(trans->tags);
27605f2eab64SJohn Marino 	    trans->tags = NULL;
27615f2eab64SJohn Marino 
27625f2eab64SJohn Marino 	    /* If there were any tags, allocate an array and fill it. */
27635f2eab64SJohn Marino 	    if (i + j > 0)
27645f2eab64SJohn Marino 	      {
27655f2eab64SJohn Marino 		trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
27665f2eab64SJohn Marino 		if (!trans->tags)
27675f2eab64SJohn Marino 		  return REG_ESPACE;
27685f2eab64SJohn Marino 		i = 0;
27695f2eab64SJohn Marino 		if (p1->tags != NULL)
27705f2eab64SJohn Marino 		  while(p1->tags[i] >= 0)
27715f2eab64SJohn Marino 		    {
27725f2eab64SJohn Marino 		      trans->tags[i] = p1->tags[i];
27735f2eab64SJohn Marino 		      i++;
27745f2eab64SJohn Marino 		    }
27755f2eab64SJohn Marino 		l = i;
27765f2eab64SJohn Marino 		j = 0;
27775f2eab64SJohn Marino 		if (p2->tags != NULL)
27785f2eab64SJohn Marino 		  while (p2->tags[j] >= 0)
27795f2eab64SJohn Marino 		    {
27805f2eab64SJohn Marino 		      /* Don't add duplicates. */
27815f2eab64SJohn Marino 		      dup = 0;
27825f2eab64SJohn Marino 		      for (k = 0; k < i; k++)
27835f2eab64SJohn Marino 			if (trans->tags[k] == p2->tags[j])
27845f2eab64SJohn Marino 			  {
27855f2eab64SJohn Marino 			    dup = 1;
27865f2eab64SJohn Marino 			    break;
27875f2eab64SJohn Marino 			  }
27885f2eab64SJohn Marino 		      if (!dup)
27895f2eab64SJohn Marino 			trans->tags[l++] = p2->tags[j];
27905f2eab64SJohn Marino 		      j++;
27915f2eab64SJohn Marino 		    }
27925f2eab64SJohn Marino 		trans->tags[l] = -1;
27935f2eab64SJohn Marino 	      }
27945f2eab64SJohn Marino 
27955f2eab64SJohn Marino 	    /* Set the parameter array.	 If both `p2' and `p1' have same
27965f2eab64SJohn Marino 	       parameters, the values in `p2' override those in `p1'. */
27975f2eab64SJohn Marino 	    if (p1->params || p2->params)
27985f2eab64SJohn Marino 	      {
27995f2eab64SJohn Marino 		if (!trans->params)
28005f2eab64SJohn Marino 		  trans->params = xmalloc(sizeof(*trans->params)
28015f2eab64SJohn Marino 					  * TRE_PARAM_LAST);
28025f2eab64SJohn Marino 		if (!trans->params)
28035f2eab64SJohn Marino 		  return REG_ESPACE;
28045f2eab64SJohn Marino 		for (i = 0; i < TRE_PARAM_LAST; i++)
28055f2eab64SJohn Marino 		  {
28065f2eab64SJohn Marino 		    trans->params[i] = TRE_PARAM_UNSET;
28075f2eab64SJohn Marino 		    if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
28085f2eab64SJohn Marino 		      trans->params[i] = p1->params[i];
28095f2eab64SJohn Marino 		    if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
28105f2eab64SJohn Marino 		      trans->params[i] = p2->params[i];
28115f2eab64SJohn Marino 		  }
28125f2eab64SJohn Marino 	      }
28135f2eab64SJohn Marino 	    else
28145f2eab64SJohn Marino 	      {
28155f2eab64SJohn Marino 		if (trans->params)
28165f2eab64SJohn Marino 		  xfree(trans->params);
28175f2eab64SJohn Marino 		trans->params = NULL;
28185f2eab64SJohn Marino 	      }
28195f2eab64SJohn Marino 
28205f2eab64SJohn Marino 
28215f2eab64SJohn Marino #ifdef TRE_DEBUG
28225f2eab64SJohn Marino 	    {
28235f2eab64SJohn Marino 	      int *tags;
28245f2eab64SJohn Marino 
28255f2eab64SJohn Marino 	      DPRINT(("	 %2d -> %2d on %3d", p1->position, p2->position,
28265f2eab64SJohn Marino 		      p1->code_min));
28275f2eab64SJohn Marino 	      if (p1->code_max != p1->code_min)
28285f2eab64SJohn Marino 		DPRINT(("-%3d", p1->code_max));
28295f2eab64SJohn Marino 	      tags = trans->tags;
28305f2eab64SJohn Marino 	      if (tags)
28315f2eab64SJohn Marino 		{
28325f2eab64SJohn Marino 		  DPRINT((", tags ["));
28335f2eab64SJohn Marino 		  while (*tags >= 0)
28345f2eab64SJohn Marino 		    {
28355f2eab64SJohn Marino 		      DPRINT(("%d", *tags));
28365f2eab64SJohn Marino 		      tags++;
28375f2eab64SJohn Marino 		      if (*tags >= 0)
28385f2eab64SJohn Marino 			DPRINT((","));
28395f2eab64SJohn Marino 		    }
28405f2eab64SJohn Marino 		  DPRINT(("]"));
28415f2eab64SJohn Marino 		}
28425f2eab64SJohn Marino 	      if (trans->assertions)
28435f2eab64SJohn Marino 		DPRINT((", assert %d", trans->assertions));
28445f2eab64SJohn Marino 	      if (trans->assertions & ASSERT_BACKREF)
28455f2eab64SJohn Marino 		DPRINT((", backref %d", trans->u.backref));
2846*d5f8dde1SJohn Marino 	      else if (trans->assertions & ASSERT_BRACKET_MATCH)
2847*d5f8dde1SJohn Marino 		DPRINT((", bracket_match_list %p",
2848*d5f8dde1SJohn Marino 			trans->u.bracket_match_list));
28495f2eab64SJohn Marino 	      if (trans->params)
28505f2eab64SJohn Marino 		{
28515f2eab64SJohn Marino 		  DPRINT((", "));
28525f2eab64SJohn Marino 		  tre_print_params(trans->params);
28535f2eab64SJohn Marino 		}
28545f2eab64SJohn Marino 	      DPRINT(("\n"));
28555f2eab64SJohn Marino 	    }
28565f2eab64SJohn Marino #endif /* TRE_DEBUG */
28575f2eab64SJohn Marino 	    p2++;
28585f2eab64SJohn Marino 	  }
28595f2eab64SJohn Marino 	p1++;
28605f2eab64SJohn Marino       }
28615f2eab64SJohn Marino   else
28625f2eab64SJohn Marino     /* Compute a maximum limit for the number of transitions leaving
28635f2eab64SJohn Marino        from each state. */
28645f2eab64SJohn Marino     while (p1->position >= 0)
28655f2eab64SJohn Marino       {
28665f2eab64SJohn Marino 	p2 = orig_p2;
28675f2eab64SJohn Marino 	while (p2->position >= 0)
28685f2eab64SJohn Marino 	  {
28695f2eab64SJohn Marino 	    counts[p1->position]++;
28705f2eab64SJohn Marino 	    p2++;
28715f2eab64SJohn Marino 	  }
28725f2eab64SJohn Marino 	p1++;
28735f2eab64SJohn Marino       }
28745f2eab64SJohn Marino   return REG_OK;
28755f2eab64SJohn Marino }
28765f2eab64SJohn Marino 
28775f2eab64SJohn Marino /* Converts the syntax tree to a TNFA.	All the transitions in the TNFA are
28785f2eab64SJohn Marino    labelled with one character range (there are no transitions on empty
28795f2eab64SJohn Marino    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
28805f2eab64SJohn Marino    the regexp. */
28815f2eab64SJohn Marino static reg_errcode_t
tre_ast_to_tnfa(tre_ast_node_t * node,tre_tnfa_transition_t * transitions,int * counts,int * offs)28825f2eab64SJohn Marino tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
28835f2eab64SJohn Marino 		int *counts, int *offs)
28845f2eab64SJohn Marino {
28855f2eab64SJohn Marino   tre_union_t *uni;
28865f2eab64SJohn Marino   tre_catenation_t *cat;
28875f2eab64SJohn Marino   tre_iteration_t *iter;
28885f2eab64SJohn Marino   reg_errcode_t errcode = REG_OK;
28895f2eab64SJohn Marino 
28905f2eab64SJohn Marino   /* XXX - recurse using a stack!. */
28915f2eab64SJohn Marino   switch (node->type)
28925f2eab64SJohn Marino     {
28935f2eab64SJohn Marino     case LITERAL:
28945f2eab64SJohn Marino       break;
28955f2eab64SJohn Marino     case UNION:
28965f2eab64SJohn Marino       uni = (tre_union_t *)node->obj;
28975f2eab64SJohn Marino       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
28985f2eab64SJohn Marino       if (errcode != REG_OK)
28995f2eab64SJohn Marino 	return errcode;
29005f2eab64SJohn Marino       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
29015f2eab64SJohn Marino       break;
29025f2eab64SJohn Marino 
29035f2eab64SJohn Marino     case CATENATION:
29045f2eab64SJohn Marino       cat = (tre_catenation_t *)node->obj;
29055f2eab64SJohn Marino       /* Add a transition from each position in cat->left->lastpos
29065f2eab64SJohn Marino 	 to each position in cat->right->firstpos. */
29075f2eab64SJohn Marino       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
29085f2eab64SJohn Marino 			       transitions, counts, offs);
29095f2eab64SJohn Marino       if (errcode != REG_OK)
29105f2eab64SJohn Marino 	return errcode;
29115f2eab64SJohn Marino       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
29125f2eab64SJohn Marino       if (errcode != REG_OK)
29135f2eab64SJohn Marino 	return errcode;
29145f2eab64SJohn Marino       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
29155f2eab64SJohn Marino       break;
29165f2eab64SJohn Marino 
29175f2eab64SJohn Marino     case ITERATION:
29185f2eab64SJohn Marino       iter = (tre_iteration_t *)node->obj;
29195f2eab64SJohn Marino       assert(iter->max == -1 || iter->max == 1);
29205f2eab64SJohn Marino 
29215f2eab64SJohn Marino       if (iter->max == -1)
29225f2eab64SJohn Marino 	{
29235f2eab64SJohn Marino 	  assert(iter->min == 0 || iter->min == 1);
29245f2eab64SJohn Marino 	  /* Add a transition from each last position in the iterated
29255f2eab64SJohn Marino 	     expression to each first position. */
29265f2eab64SJohn Marino 	  errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
29275f2eab64SJohn Marino 				   transitions, counts, offs);
29285f2eab64SJohn Marino 	  if (errcode != REG_OK)
29295f2eab64SJohn Marino 	    return errcode;
29305f2eab64SJohn Marino 	}
29315f2eab64SJohn Marino       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
29325f2eab64SJohn Marino       break;
29335f2eab64SJohn Marino     }
29345f2eab64SJohn Marino   return errcode;
29355f2eab64SJohn Marino }
29365f2eab64SJohn Marino 
29375f2eab64SJohn Marino 
29385f2eab64SJohn Marino #define ERROR_EXIT(err)		  \
29395f2eab64SJohn Marino   do				  \
29405f2eab64SJohn Marino     {				  \
29415f2eab64SJohn Marino       errcode = err;		  \
29425f2eab64SJohn Marino       if (/*CONSTCOND*/1)	  \
29435f2eab64SJohn Marino       	goto error_exit;	  \
29445f2eab64SJohn Marino     }				  \
29455f2eab64SJohn Marino  while (/*CONSTCOND*/0)
29465f2eab64SJohn Marino 
29475f2eab64SJohn Marino 
29485f2eab64SJohn Marino int
tre_compile(regex_t * preg,const tre_char_t * regex,size_t n,int cflags,locale_t loc)2949*d5f8dde1SJohn Marino tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2950*d5f8dde1SJohn Marino 	    locale_t loc)
29515f2eab64SJohn Marino {
29525f2eab64SJohn Marino   tre_stack_t *stack;
29535f2eab64SJohn Marino   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
29545f2eab64SJohn Marino   tre_pos_and_tags_t *p;
29555f2eab64SJohn Marino   int *counts = NULL, *offs = NULL;
29565f2eab64SJohn Marino   int i, add = 0;
29575f2eab64SJohn Marino   tre_tnfa_transition_t *transitions, *initial;
29585f2eab64SJohn Marino   tre_tnfa_t *tnfa = NULL;
2959*d5f8dde1SJohn Marino   tre_submatch_data_t *submatch_data = NULL;
29605f2eab64SJohn Marino   tre_tag_direction_t *tag_directions = NULL;
29615f2eab64SJohn Marino   reg_errcode_t errcode;
29625f2eab64SJohn Marino   tre_mem_t mem;
29635f2eab64SJohn Marino 
29645f2eab64SJohn Marino   /* Parse context. */
29655f2eab64SJohn Marino   tre_parse_ctx_t parse_ctx;
29665f2eab64SJohn Marino 
29675f2eab64SJohn Marino   /* Allocate a stack used throughout the compilation process for various
29685f2eab64SJohn Marino      purposes. */
29695f2eab64SJohn Marino   stack = tre_stack_new(512, 10240, 128);
29705f2eab64SJohn Marino   if (!stack)
29715f2eab64SJohn Marino     return REG_ESPACE;
29725f2eab64SJohn Marino   /* Allocate a fast memory allocator. */
29735f2eab64SJohn Marino   mem = tre_mem_new();
29745f2eab64SJohn Marino   if (!mem)
29755f2eab64SJohn Marino     {
29765f2eab64SJohn Marino       tre_stack_destroy(stack);
29775f2eab64SJohn Marino       return REG_ESPACE;
29785f2eab64SJohn Marino     }
29795f2eab64SJohn Marino 
29805f2eab64SJohn Marino   /* Parse the regexp. */
29815f2eab64SJohn Marino   memset(&parse_ctx, 0, sizeof(parse_ctx));
29825f2eab64SJohn Marino   parse_ctx.mem = mem;
29835f2eab64SJohn Marino   parse_ctx.stack = stack;
29845f2eab64SJohn Marino   parse_ctx.re = regex;
29855f2eab64SJohn Marino   parse_ctx.len = n;
2986*d5f8dde1SJohn Marino   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2987*d5f8dde1SJohn Marino      are also set */
2988*d5f8dde1SJohn Marino   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2989*d5f8dde1SJohn Marino     cflags &= ~REG_UNGREEDY;
29905f2eab64SJohn Marino   parse_ctx.cflags = cflags;
29915f2eab64SJohn Marino   parse_ctx.max_backref = -1;
2992*d5f8dde1SJohn Marino   parse_ctx.loc = loc;
2993*d5f8dde1SJohn Marino   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2994*d5f8dde1SJohn Marino 
29955f2eab64SJohn Marino   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
29965f2eab64SJohn Marino   errcode = tre_parse(&parse_ctx);
29975f2eab64SJohn Marino   if (errcode != REG_OK)
29985f2eab64SJohn Marino     ERROR_EXIT(errcode);
29995f2eab64SJohn Marino   preg->re_nsub = parse_ctx.submatch_id - 1;
30005f2eab64SJohn Marino   tree = parse_ctx.result;
30015f2eab64SJohn Marino 
30025f2eab64SJohn Marino   /* Back references and approximate matching cannot currently be used
30035f2eab64SJohn Marino      in the same regexp. */
30045f2eab64SJohn Marino   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
30055f2eab64SJohn Marino     ERROR_EXIT(REG_BADPAT);
30065f2eab64SJohn Marino 
30075f2eab64SJohn Marino #ifdef TRE_DEBUG
30085f2eab64SJohn Marino   tre_ast_print(tree);
30095f2eab64SJohn Marino #endif /* TRE_DEBUG */
30105f2eab64SJohn Marino 
30115f2eab64SJohn Marino   /* Referring to nonexistent subexpressions is illegal. */
30125f2eab64SJohn Marino   if (parse_ctx.max_backref > (int)preg->re_nsub)
30135f2eab64SJohn Marino     ERROR_EXIT(REG_ESUBREG);
30145f2eab64SJohn Marino 
30155f2eab64SJohn Marino   /* Allocate the TNFA struct. */
30165f2eab64SJohn Marino   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
30175f2eab64SJohn Marino   if (tnfa == NULL)
30185f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
30195f2eab64SJohn Marino   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
30205f2eab64SJohn Marino   tnfa->have_approx = parse_ctx.have_approx;
30215f2eab64SJohn Marino   tnfa->num_submatches = parse_ctx.submatch_id;
3022*d5f8dde1SJohn Marino   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3023*d5f8dde1SJohn Marino 				   - SUBMATCH_ID_INVISIBLE_START;
3024*d5f8dde1SJohn Marino   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3025*d5f8dde1SJohn Marino   tnfa->loc = parse_ctx.loc;
30265f2eab64SJohn Marino 
30275f2eab64SJohn Marino   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
30285f2eab64SJohn Marino      regexp does not have back references, this can be skipped. */
3029*d5f8dde1SJohn Marino   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
30305f2eab64SJohn Marino     {
30315f2eab64SJohn Marino       DPRINT(("tre_compile: setting up tags\n"));
30325f2eab64SJohn Marino 
30335f2eab64SJohn Marino       /* Figure out how many tags we will need. */
30345f2eab64SJohn Marino       errcode = tre_add_tags(NULL, stack, tree, tnfa);
30355f2eab64SJohn Marino       if (errcode != REG_OK)
30365f2eab64SJohn Marino 	ERROR_EXIT(errcode);
30375f2eab64SJohn Marino #ifdef TRE_DEBUG
30385f2eab64SJohn Marino       tre_ast_print(tree);
30395f2eab64SJohn Marino #endif /* TRE_DEBUG */
30405f2eab64SJohn Marino 
30415f2eab64SJohn Marino       if (tnfa->num_tags > 0)
30425f2eab64SJohn Marino 	{
30435f2eab64SJohn Marino 	  tag_directions = xmalloc(sizeof(*tag_directions)
30445f2eab64SJohn Marino 				   * (tnfa->num_tags + 1));
30455f2eab64SJohn Marino 	  if (tag_directions == NULL)
30465f2eab64SJohn Marino 	    ERROR_EXIT(REG_ESPACE);
30475f2eab64SJohn Marino 	  tnfa->tag_directions = tag_directions;
30485f2eab64SJohn Marino 	  memset(tag_directions, -1,
30495f2eab64SJohn Marino 		 sizeof(*tag_directions) * (tnfa->num_tags + 1));
30505f2eab64SJohn Marino 	}
3051*d5f8dde1SJohn Marino       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
30525f2eab64SJohn Marino 				   sizeof(tnfa->minimal_tags));
30535f2eab64SJohn Marino       if (tnfa->minimal_tags == NULL)
30545f2eab64SJohn Marino 	ERROR_EXIT(REG_ESPACE);
30555f2eab64SJohn Marino 
30565f2eab64SJohn Marino       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
30575f2eab64SJohn Marino 			      sizeof(*submatch_data));
30585f2eab64SJohn Marino       if (submatch_data == NULL)
30595f2eab64SJohn Marino 	ERROR_EXIT(REG_ESPACE);
3060*d5f8dde1SJohn Marino       /* Set the eo_tag value to -1 to indicate that that corresponding
3061*d5f8dde1SJohn Marino        * submatch has not be completed yet */
3062*d5f8dde1SJohn Marino       for (i = 0; i < parse_ctx.submatch_id; i++)
3063*d5f8dde1SJohn Marino 	{
3064*d5f8dde1SJohn Marino 	  submatch_data[i].eo_tag = -1;
3065*d5f8dde1SJohn Marino 	}
30665f2eab64SJohn Marino       tnfa->submatch_data = submatch_data;
30675f2eab64SJohn Marino 
30685f2eab64SJohn Marino       errcode = tre_add_tags(mem, stack, tree, tnfa);
30695f2eab64SJohn Marino       if (errcode != REG_OK)
30705f2eab64SJohn Marino 	ERROR_EXIT(errcode);
30715f2eab64SJohn Marino 
30725f2eab64SJohn Marino #ifdef TRE_DEBUG
30735f2eab64SJohn Marino       for (i = 0; i < parse_ctx.submatch_id; i++)
30745f2eab64SJohn Marino 	DPRINT(("pmatch[%d] = {t%d, t%d}\n",
30755f2eab64SJohn Marino 		i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3076*d5f8dde1SJohn Marino       for (i = 0; i <= tnfa->num_tags; i++)
3077*d5f8dde1SJohn Marino 	DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
30785f2eab64SJohn Marino #endif /* TRE_DEBUG */
30795f2eab64SJohn Marino     }
30805f2eab64SJohn Marino 
30815f2eab64SJohn Marino   /* Expand iteration nodes. */
30825f2eab64SJohn Marino   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
30835f2eab64SJohn Marino 			   tag_directions, &tnfa->params_depth);
30845f2eab64SJohn Marino   if (errcode != REG_OK)
30855f2eab64SJohn Marino     ERROR_EXIT(errcode);
30865f2eab64SJohn Marino 
30875f2eab64SJohn Marino   /* Add a dummy node for the final state.
30885f2eab64SJohn Marino      XXX - For certain patterns this dummy node can be optimized away,
30895f2eab64SJohn Marino 	   for example "a*" or "ab*".	Figure out a simple way to detect
30905f2eab64SJohn Marino 	   this possibility. */
30915f2eab64SJohn Marino   tmp_ast_l = tree;
30925f2eab64SJohn Marino   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
30935f2eab64SJohn Marino   if (tmp_ast_r == NULL)
30945f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
30955f2eab64SJohn Marino 
30965f2eab64SJohn Marino   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
30975f2eab64SJohn Marino   if (tree == NULL)
30985f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
30995f2eab64SJohn Marino 
31005f2eab64SJohn Marino #ifdef TRE_DEBUG
31015f2eab64SJohn Marino   tre_ast_print(tree);
31025f2eab64SJohn Marino   DPRINT(("Number of states: %d\n", parse_ctx.position));
3103*d5f8dde1SJohn Marino   if (submatch_data)
3104*d5f8dde1SJohn Marino     for (i = 0; i < parse_ctx.submatch_id; i++)
3105*d5f8dde1SJohn Marino       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106*d5f8dde1SJohn Marino 	      i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3107*d5f8dde1SJohn Marino   if (tag_directions)
3108*d5f8dde1SJohn Marino     for (i = 0; i <= tnfa->num_tags; i++)
3109*d5f8dde1SJohn Marino       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
31105f2eab64SJohn Marino #endif /* TRE_DEBUG */
31115f2eab64SJohn Marino 
31125f2eab64SJohn Marino   errcode = tre_compute_nfl(mem, stack, tree);
31135f2eab64SJohn Marino   if (errcode != REG_OK)
31145f2eab64SJohn Marino     ERROR_EXIT(errcode);
31155f2eab64SJohn Marino 
31165f2eab64SJohn Marino   counts = xmalloc(sizeof(int) * parse_ctx.position);
31175f2eab64SJohn Marino   if (counts == NULL)
31185f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
31195f2eab64SJohn Marino 
31205f2eab64SJohn Marino   offs = xmalloc(sizeof(int) * parse_ctx.position);
31215f2eab64SJohn Marino   if (offs == NULL)
31225f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
31235f2eab64SJohn Marino 
31245f2eab64SJohn Marino   for (i = 0; i < parse_ctx.position; i++)
31255f2eab64SJohn Marino     counts[i] = 0;
31265f2eab64SJohn Marino   tre_ast_to_tnfa(tree, NULL, counts, NULL);
31275f2eab64SJohn Marino 
31285f2eab64SJohn Marino   add = 0;
31295f2eab64SJohn Marino   for (i = 0; i < parse_ctx.position; i++)
31305f2eab64SJohn Marino     {
31315f2eab64SJohn Marino       offs[i] = add;
31325f2eab64SJohn Marino       add += counts[i] + 1;
31335f2eab64SJohn Marino       counts[i] = 0;
31345f2eab64SJohn Marino     }
31355f2eab64SJohn Marino   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
31365f2eab64SJohn Marino   if (transitions == NULL)
31375f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
31385f2eab64SJohn Marino   tnfa->transitions = transitions;
31395f2eab64SJohn Marino   tnfa->num_transitions = add;
31405f2eab64SJohn Marino 
31415f2eab64SJohn Marino   DPRINT(("Converting to TNFA:\n"));
31425f2eab64SJohn Marino   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
31435f2eab64SJohn Marino   if (errcode != REG_OK)
31445f2eab64SJohn Marino     ERROR_EXIT(errcode);
31455f2eab64SJohn Marino 
3146*d5f8dde1SJohn Marino 
3147*d5f8dde1SJohn Marino   /* Set first_char only if there is only one character that can be the
3148*d5f8dde1SJohn Marino      first character of a match */
31495f2eab64SJohn Marino   tnfa->first_char = -1;
3150*d5f8dde1SJohn Marino   if (!tmp_ast_l->nullable)
31515f2eab64SJohn Marino     {
3152*d5f8dde1SJohn Marino       int scanning = 1;
3153*d5f8dde1SJohn Marino       for (p = tree->firstpos; scanning && p->position >= 0; p++)
31545f2eab64SJohn Marino 	{
31555f2eab64SJohn Marino 	  tre_tnfa_transition_t *j = transitions + offs[p->position];
31565f2eab64SJohn Marino 	  while (j->state != NULL)
31575f2eab64SJohn Marino 	    {
3158*d5f8dde1SJohn Marino 	      if (j->code_min <= j->code_max)
31595f2eab64SJohn Marino 		{
3160*d5f8dde1SJohn Marino 		  if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3161*d5f8dde1SJohn Marino 		    {
3162*d5f8dde1SJohn Marino 		      tnfa->first_char = -1;
3163*d5f8dde1SJohn Marino 		      scanning = 0;
3164*d5f8dde1SJohn Marino 		      break;
3165*d5f8dde1SJohn Marino 		    }
3166*d5f8dde1SJohn Marino 		  tnfa->first_char = j->code_min;
31675f2eab64SJohn Marino 		}
31685f2eab64SJohn Marino 	      j++;
31695f2eab64SJohn Marino 	    }
31705f2eab64SJohn Marino 	}
3171*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
3172*d5f8dde1SJohn Marino       if (tnfa->first_char >= 0)
3173*d5f8dde1SJohn Marino 	DPRINT(("first char must be %d\n", tnfa->first_char));
3174*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
31755f2eab64SJohn Marino     }
31765f2eab64SJohn Marino 
31775f2eab64SJohn Marino   p = tree->firstpos;
31785f2eab64SJohn Marino   i = 0;
31795f2eab64SJohn Marino   while (p->position >= 0)
31805f2eab64SJohn Marino     {
31815f2eab64SJohn Marino       i++;
31825f2eab64SJohn Marino 
31835f2eab64SJohn Marino #ifdef TRE_DEBUG
31845f2eab64SJohn Marino       {
31855f2eab64SJohn Marino 	int *tags;
31865f2eab64SJohn Marino 	DPRINT(("initial: %d", p->position));
31875f2eab64SJohn Marino 	tags = p->tags;
31885f2eab64SJohn Marino 	if (tags != NULL)
31895f2eab64SJohn Marino 	  {
31905f2eab64SJohn Marino 	    if (*tags >= 0)
31915f2eab64SJohn Marino 	      DPRINT(("/"));
31925f2eab64SJohn Marino 	    while (*tags >= 0)
31935f2eab64SJohn Marino 	      {
31945f2eab64SJohn Marino 		DPRINT(("%d", *tags));
31955f2eab64SJohn Marino 		tags++;
31965f2eab64SJohn Marino 		if (*tags >= 0)
31975f2eab64SJohn Marino 		  DPRINT((","));
31985f2eab64SJohn Marino 	      }
31995f2eab64SJohn Marino 	  }
32005f2eab64SJohn Marino 	DPRINT((", assert %d", p->assertions));
32015f2eab64SJohn Marino 	if (p->params)
32025f2eab64SJohn Marino 	  {
32035f2eab64SJohn Marino 	    DPRINT((", "));
32045f2eab64SJohn Marino 	    tre_print_params(p->params);
32055f2eab64SJohn Marino 	  }
32065f2eab64SJohn Marino 	DPRINT(("\n"));
32075f2eab64SJohn Marino       }
32085f2eab64SJohn Marino #endif /* TRE_DEBUG */
32095f2eab64SJohn Marino 
32105f2eab64SJohn Marino       p++;
32115f2eab64SJohn Marino     }
32125f2eab64SJohn Marino 
32135f2eab64SJohn Marino   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
32145f2eab64SJohn Marino   if (initial == NULL)
32155f2eab64SJohn Marino     ERROR_EXIT(REG_ESPACE);
32165f2eab64SJohn Marino   tnfa->initial = initial;
32175f2eab64SJohn Marino 
32185f2eab64SJohn Marino   i = 0;
32195f2eab64SJohn Marino   for (p = tree->firstpos; p->position >= 0; p++)
32205f2eab64SJohn Marino     {
32215f2eab64SJohn Marino       initial[i].state = transitions + offs[p->position];
32225f2eab64SJohn Marino       initial[i].state_id = p->position;
32235f2eab64SJohn Marino       initial[i].tags = NULL;
32245f2eab64SJohn Marino       /* Copy the arrays p->tags, and p->params, they are allocated
32255f2eab64SJohn Marino 	 from a tre_mem object. */
32265f2eab64SJohn Marino       if (p->tags)
32275f2eab64SJohn Marino 	{
32285f2eab64SJohn Marino 	  int j;
32295f2eab64SJohn Marino 	  for (j = 0; p->tags[j] >= 0; j++);
32305f2eab64SJohn Marino 	  initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
32315f2eab64SJohn Marino 	  if (!initial[i].tags)
32325f2eab64SJohn Marino 	    ERROR_EXIT(REG_ESPACE);
32335f2eab64SJohn Marino 	  memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
32345f2eab64SJohn Marino 	}
32355f2eab64SJohn Marino       initial[i].params = NULL;
32365f2eab64SJohn Marino       if (p->params)
32375f2eab64SJohn Marino 	{
32385f2eab64SJohn Marino 	  initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
32395f2eab64SJohn Marino 	  if (!initial[i].params)
32405f2eab64SJohn Marino 	    ERROR_EXIT(REG_ESPACE);
32415f2eab64SJohn Marino 	  memcpy(initial[i].params, p->params,
32425f2eab64SJohn Marino 		 sizeof(*p->params) * TRE_PARAM_LAST);
32435f2eab64SJohn Marino 	}
32445f2eab64SJohn Marino       initial[i].assertions = p->assertions;
32455f2eab64SJohn Marino       i++;
32465f2eab64SJohn Marino     }
32475f2eab64SJohn Marino   initial[i].state = NULL;
32485f2eab64SJohn Marino 
32495f2eab64SJohn Marino   tnfa->num_transitions = add;
32505f2eab64SJohn Marino   tnfa->final = transitions + offs[tree->lastpos[0].position];
32515f2eab64SJohn Marino   tnfa->num_states = parse_ctx.position;
32525f2eab64SJohn Marino   tnfa->cflags = cflags;
32535f2eab64SJohn Marino 
3254*d5f8dde1SJohn Marino   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3255*d5f8dde1SJohn Marino 	  (void *)tnfa->final));
32565f2eab64SJohn Marino 
32575f2eab64SJohn Marino   tre_mem_destroy(mem);
32585f2eab64SJohn Marino   tre_stack_destroy(stack);
32595f2eab64SJohn Marino   xfree(counts);
32605f2eab64SJohn Marino   xfree(offs);
32615f2eab64SJohn Marino 
3262*d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
3263*d5f8dde1SJohn Marino   preg->re_magic = RE_MAGIC;
3264*d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
32655f2eab64SJohn Marino   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3266*d5f8dde1SJohn Marino   xlocale_retain(tnfa->loc);
32675f2eab64SJohn Marino   return REG_OK;
32685f2eab64SJohn Marino 
32695f2eab64SJohn Marino  error_exit:
32705f2eab64SJohn Marino   /* Free everything that was allocated and return the error code. */
32715f2eab64SJohn Marino   tre_mem_destroy(mem);
32725f2eab64SJohn Marino   if (stack != NULL)
32735f2eab64SJohn Marino     tre_stack_destroy(stack);
32745f2eab64SJohn Marino   if (counts != NULL)
32755f2eab64SJohn Marino     xfree(counts);
32765f2eab64SJohn Marino   if (offs != NULL)
32775f2eab64SJohn Marino     xfree(offs);
3278*d5f8dde1SJohn Marino 
3279*d5f8dde1SJohn Marino   /* Set tnfa into preg, so that calling tre_free() will free the contents
3280*d5f8dde1SJohn Marino      of tnfa.  NULL out the loc field since we never retained the locale. */
32815f2eab64SJohn Marino   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3282*d5f8dde1SJohn Marino   if(tnfa) tnfa->loc = NULL;
3283*d5f8dde1SJohn Marino 
32845f2eab64SJohn Marino   tre_free(preg);
32855f2eab64SJohn Marino   return errcode;
32865f2eab64SJohn Marino }
32875f2eab64SJohn Marino 
32885f2eab64SJohn Marino 
32895f2eab64SJohn Marino 
32905f2eab64SJohn Marino 
32915f2eab64SJohn Marino void
tre_free(regex_t * preg)32925f2eab64SJohn Marino tre_free(regex_t *preg)
32935f2eab64SJohn Marino {
32945f2eab64SJohn Marino   tre_tnfa_t *tnfa;
32955f2eab64SJohn Marino   unsigned int i;
32965f2eab64SJohn Marino   tre_tnfa_transition_t *trans;
32975f2eab64SJohn Marino 
3298*d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
3299*d5f8dde1SJohn Marino   preg->re_magic = 0;
3300*d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
33015f2eab64SJohn Marino   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
33025f2eab64SJohn Marino   if (!tnfa)
33035f2eab64SJohn Marino     return;
3304*d5f8dde1SJohn Marino   preg->TRE_REGEX_T_FIELD = NULL;
33055f2eab64SJohn Marino 
33065f2eab64SJohn Marino   for (i = 0; i < tnfa->num_transitions; i++)
33075f2eab64SJohn Marino     if (tnfa->transitions[i].state)
33085f2eab64SJohn Marino       {
33095f2eab64SJohn Marino 	if (tnfa->transitions[i].tags)
33105f2eab64SJohn Marino 	  xfree(tnfa->transitions[i].tags);
3311*d5f8dde1SJohn Marino 	if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3312*d5f8dde1SJohn Marino 	  xfree(tnfa->transitions[i].u.bracket_match_list);
33135f2eab64SJohn Marino 	if (tnfa->transitions[i].params)
33145f2eab64SJohn Marino 	  xfree(tnfa->transitions[i].params);
33155f2eab64SJohn Marino       }
33165f2eab64SJohn Marino   if (tnfa->transitions)
33175f2eab64SJohn Marino     xfree(tnfa->transitions);
33185f2eab64SJohn Marino 
33195f2eab64SJohn Marino   if (tnfa->initial)
33205f2eab64SJohn Marino     {
33215f2eab64SJohn Marino       for (trans = tnfa->initial; trans->state; trans++)
33225f2eab64SJohn Marino 	{
33235f2eab64SJohn Marino 	  if (trans->tags)
33245f2eab64SJohn Marino 	    xfree(trans->tags);
33255f2eab64SJohn Marino 	  if (trans->params)
33265f2eab64SJohn Marino 	    xfree(trans->params);
33275f2eab64SJohn Marino 	}
33285f2eab64SJohn Marino       xfree(tnfa->initial);
33295f2eab64SJohn Marino     }
33305f2eab64SJohn Marino 
33315f2eab64SJohn Marino   if (tnfa->submatch_data)
33325f2eab64SJohn Marino     {
33335f2eab64SJohn Marino       xfree(tnfa->submatch_data);
33345f2eab64SJohn Marino     }
33355f2eab64SJohn Marino 
33365f2eab64SJohn Marino   if (tnfa->tag_directions)
33375f2eab64SJohn Marino     xfree(tnfa->tag_directions);
33385f2eab64SJohn Marino   if (tnfa->minimal_tags)
33395f2eab64SJohn Marino     xfree(tnfa->minimal_tags);
3340*d5f8dde1SJohn Marino 
3341*d5f8dde1SJohn Marino   if (tnfa->loc)
3342*d5f8dde1SJohn Marino     xlocale_release(tnfa->loc);
3343*d5f8dde1SJohn Marino 
3344*d5f8dde1SJohn Marino   if (tnfa->last_matched_branch)
3345*d5f8dde1SJohn Marino     xfree(tnfa->last_matched_branch);
3346*d5f8dde1SJohn Marino 
33475f2eab64SJohn Marino   xfree(tnfa);
33485f2eab64SJohn Marino }
33495f2eab64SJohn Marino 
33505f2eab64SJohn Marino char *
tre_version(void)33515f2eab64SJohn Marino tre_version(void)
33525f2eab64SJohn Marino {
33535f2eab64SJohn Marino   static char str[256];
33545f2eab64SJohn Marino   char *version;
33555f2eab64SJohn Marino 
33565f2eab64SJohn Marino   if (str[0] == 0)
33575f2eab64SJohn Marino     {
33585f2eab64SJohn Marino       (void) tre_config(TRE_CONFIG_VERSION, &version);
33595f2eab64SJohn Marino       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
33605f2eab64SJohn Marino     }
33615f2eab64SJohn Marino   return str;
33625f2eab64SJohn Marino }
33635f2eab64SJohn Marino 
33645f2eab64SJohn Marino int
tre_config(int query,void * result)33655f2eab64SJohn Marino tre_config(int query, void *result)
33665f2eab64SJohn Marino {
33675f2eab64SJohn Marino   int *int_result = result;
33685f2eab64SJohn Marino   const char **string_result = result;
33695f2eab64SJohn Marino 
33705f2eab64SJohn Marino   switch (query)
33715f2eab64SJohn Marino     {
33725f2eab64SJohn Marino     case TRE_CONFIG_APPROX:
33735f2eab64SJohn Marino #ifdef TRE_APPROX
33745f2eab64SJohn Marino       *int_result = 1;
33755f2eab64SJohn Marino #else /* !TRE_APPROX */
33765f2eab64SJohn Marino       *int_result = 0;
33775f2eab64SJohn Marino #endif /* !TRE_APPROX */
33785f2eab64SJohn Marino       return REG_OK;
33795f2eab64SJohn Marino 
33805f2eab64SJohn Marino     case TRE_CONFIG_WCHAR:
33815f2eab64SJohn Marino #ifdef TRE_WCHAR
33825f2eab64SJohn Marino       *int_result = 1;
33835f2eab64SJohn Marino #else /* !TRE_WCHAR */
33845f2eab64SJohn Marino       *int_result = 0;
33855f2eab64SJohn Marino #endif /* !TRE_WCHAR */
33865f2eab64SJohn Marino       return REG_OK;
33875f2eab64SJohn Marino 
33885f2eab64SJohn Marino     case TRE_CONFIG_MULTIBYTE:
33895f2eab64SJohn Marino #ifdef TRE_MULTIBYTE
33905f2eab64SJohn Marino       *int_result = 1;
33915f2eab64SJohn Marino #else /* !TRE_MULTIBYTE */
33925f2eab64SJohn Marino       *int_result = 0;
33935f2eab64SJohn Marino #endif /* !TRE_MULTIBYTE */
33945f2eab64SJohn Marino       return REG_OK;
33955f2eab64SJohn Marino 
33965f2eab64SJohn Marino     case TRE_CONFIG_SYSTEM_ABI:
33975f2eab64SJohn Marino #ifdef TRE_CONFIG_SYSTEM_ABI
33985f2eab64SJohn Marino       *int_result = 1;
33995f2eab64SJohn Marino #else /* !TRE_CONFIG_SYSTEM_ABI */
34005f2eab64SJohn Marino       *int_result = 0;
34015f2eab64SJohn Marino #endif /* !TRE_CONFIG_SYSTEM_ABI */
34025f2eab64SJohn Marino       return REG_OK;
34035f2eab64SJohn Marino 
34045f2eab64SJohn Marino     case TRE_CONFIG_VERSION:
34055f2eab64SJohn Marino       *string_result = TRE_VERSION;
34065f2eab64SJohn Marino       return REG_OK;
34075f2eab64SJohn Marino     }
34085f2eab64SJohn Marino 
34095f2eab64SJohn Marino   return REG_NOMATCH;
34105f2eab64SJohn Marino }
34115f2eab64SJohn Marino 
34125f2eab64SJohn Marino 
34135f2eab64SJohn Marino /* EOF */
3414