15f2eab64SJohn Marino /*
25f2eab64SJohn Marino tre-compile.c - TRE regex compiler
35f2eab64SJohn Marino
45f2eab64SJohn Marino This software is released under a BSD-style license.
55f2eab64SJohn Marino See the file LICENSE for details and copyright.
65f2eab64SJohn Marino
75f2eab64SJohn Marino */
85f2eab64SJohn Marino
95f2eab64SJohn Marino /*
105f2eab64SJohn Marino TODO:
115f2eab64SJohn Marino - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
125f2eab64SJohn Marino function calls.
135f2eab64SJohn Marino */
145f2eab64SJohn Marino
155f2eab64SJohn Marino
165f2eab64SJohn Marino #ifdef HAVE_CONFIG_H
175f2eab64SJohn Marino #include <config.h>
185f2eab64SJohn Marino #endif /* HAVE_CONFIG_H */
195f2eab64SJohn Marino #include <stdio.h>
205f2eab64SJohn Marino #include <assert.h>
215f2eab64SJohn Marino #include <string.h>
22*d5f8dde1SJohn Marino #include <limits.h>
235f2eab64SJohn Marino
245f2eab64SJohn Marino #include "tre-internal.h"
255f2eab64SJohn Marino #include "tre-mem.h"
265f2eab64SJohn Marino #include "tre-stack.h"
275f2eab64SJohn Marino #include "tre-ast.h"
285f2eab64SJohn Marino #include "tre-parse.h"
295f2eab64SJohn Marino #include "tre-compile.h"
305f2eab64SJohn Marino #include "tre.h"
315f2eab64SJohn Marino #include "xmalloc.h"
325f2eab64SJohn Marino
335f2eab64SJohn Marino /*
34*d5f8dde1SJohn Marino The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
35*d5f8dde1SJohn Marino */
36*d5f8dde1SJohn Marino #undef bit_ffs
37*d5f8dde1SJohn Marino #define bit_ffs(name, nbits, value) { \
38*d5f8dde1SJohn Marino register bitstr_t *_name = name; \
39*d5f8dde1SJohn Marino register int _byte, _nbits = nbits; \
40*d5f8dde1SJohn Marino register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41*d5f8dde1SJohn Marino for (_byte = 0; _byte <= _stopbyte; ++_byte) \
42*d5f8dde1SJohn Marino if (_name[_byte]) { \
43*d5f8dde1SJohn Marino _value = _byte << 3; \
44*d5f8dde1SJohn Marino for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45*d5f8dde1SJohn Marino ++_value, _stopbyte >>= 1); \
46*d5f8dde1SJohn Marino break; \
47*d5f8dde1SJohn Marino } \
48*d5f8dde1SJohn Marino *(value) = _value; \
49*d5f8dde1SJohn Marino }
50*d5f8dde1SJohn Marino
51*d5f8dde1SJohn Marino /*
525f2eab64SJohn Marino Algorithms to setup tags so that submatch addressing can be done.
535f2eab64SJohn Marino */
545f2eab64SJohn Marino
555f2eab64SJohn Marino
56*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
57*d5f8dde1SJohn Marino static const char *tag_dir_str[] = {
58*d5f8dde1SJohn Marino "minimize",
59*d5f8dde1SJohn Marino "maximize",
60*d5f8dde1SJohn Marino "left-maximize"
61*d5f8dde1SJohn Marino };
62*d5f8dde1SJohn Marino
63*d5f8dde1SJohn Marino static const char _indent[] = " ";
64*d5f8dde1SJohn Marino
65*d5f8dde1SJohn Marino static void
print_indent(int indent)66*d5f8dde1SJohn Marino print_indent(int indent)
67*d5f8dde1SJohn Marino {
68*d5f8dde1SJohn Marino while (indent-- > 0)
69*d5f8dde1SJohn Marino DPRINT((_indent));
70*d5f8dde1SJohn Marino }
71*d5f8dde1SJohn Marino
72*d5f8dde1SJohn Marino static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
73*d5f8dde1SJohn Marino int num_tags);
74*d5f8dde1SJohn Marino static void
print_last_match_branch_pre(tre_last_matched_branch_pre_t * branch,int indent,int num_tags)75*d5f8dde1SJohn Marino print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
76*d5f8dde1SJohn Marino int num_tags)
77*d5f8dde1SJohn Marino {
78*d5f8dde1SJohn Marino tre_last_matched_pre_t *u = branch->last_matched;
79*d5f8dde1SJohn Marino int n_last_matched = 0;
80*d5f8dde1SJohn Marino
81*d5f8dde1SJohn Marino while (u)
82*d5f8dde1SJohn Marino {
83*d5f8dde1SJohn Marino n_last_matched++;
84*d5f8dde1SJohn Marino u = u->next;
85*d5f8dde1SJohn Marino }
86*d5f8dde1SJohn Marino
87*d5f8dde1SJohn Marino print_indent(indent);
88*d5f8dde1SJohn Marino DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89*d5f8dde1SJohn Marino branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
90*d5f8dde1SJohn Marino print_indent(indent);
91*d5f8dde1SJohn Marino DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
92*d5f8dde1SJohn Marino n_last_matched));
93*d5f8dde1SJohn Marino if (branch->n_last_matched != n_last_matched)
94*d5f8dde1SJohn Marino DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95*d5f8dde1SJohn Marino if (branch->cmp_tag > 0)
96*d5f8dde1SJohn Marino {
97*d5f8dde1SJohn Marino int i;
98*d5f8dde1SJohn Marino const char *sep = " tags=";
99*d5f8dde1SJohn Marino print_indent(indent);
100*d5f8dde1SJohn Marino DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
101*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
102*d5f8dde1SJohn Marino if (bit_test(branch->tags, i))
103*d5f8dde1SJohn Marino {
104*d5f8dde1SJohn Marino DPRINT(("%s%d", sep, i));
105*d5f8dde1SJohn Marino sep = ",";
106*d5f8dde1SJohn Marino }
107*d5f8dde1SJohn Marino DPRINT(("\n"));
108*d5f8dde1SJohn Marino }
109*d5f8dde1SJohn Marino
110*d5f8dde1SJohn Marino u = branch->last_matched;
111*d5f8dde1SJohn Marino indent++;
112*d5f8dde1SJohn Marino while (u)
113*d5f8dde1SJohn Marino {
114*d5f8dde1SJohn Marino print_last_matched_pre(u, indent, num_tags);
115*d5f8dde1SJohn Marino u = u->next;
116*d5f8dde1SJohn Marino }
117*d5f8dde1SJohn Marino }
118*d5f8dde1SJohn Marino
119*d5f8dde1SJohn Marino static void
print_last_matched_pre(tre_last_matched_pre_t * lm,int indent,int num_tags)120*d5f8dde1SJohn Marino print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
121*d5f8dde1SJohn Marino {
122*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *b = lm->branches;
123*d5f8dde1SJohn Marino int n_branches = 0;
124*d5f8dde1SJohn Marino
125*d5f8dde1SJohn Marino while (b)
126*d5f8dde1SJohn Marino {
127*d5f8dde1SJohn Marino n_branches++;
128*d5f8dde1SJohn Marino b = b->next;
129*d5f8dde1SJohn Marino }
130*d5f8dde1SJohn Marino
131*d5f8dde1SJohn Marino print_indent(indent);
132*d5f8dde1SJohn Marino DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133*d5f8dde1SJohn Marino lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
134*d5f8dde1SJohn Marino print_indent(indent);
135*d5f8dde1SJohn Marino DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
136*d5f8dde1SJohn Marino lm->n_branches, n_branches));
137*d5f8dde1SJohn Marino if (lm->n_branches != n_branches)
138*d5f8dde1SJohn Marino DPRINT(("*** mismatch between n and branches ***\n"));
139*d5f8dde1SJohn Marino
140*d5f8dde1SJohn Marino b = lm->branches;
141*d5f8dde1SJohn Marino indent++;
142*d5f8dde1SJohn Marino while (b)
143*d5f8dde1SJohn Marino {
144*d5f8dde1SJohn Marino print_last_match_branch_pre(b, indent, num_tags);
145*d5f8dde1SJohn Marino b = b->next;
146*d5f8dde1SJohn Marino }
147*d5f8dde1SJohn Marino }
148*d5f8dde1SJohn Marino
149*d5f8dde1SJohn Marino static void print_last_matched(tre_last_matched_t *lm, int indent);
150*d5f8dde1SJohn Marino static void
print_last_match_branch(tre_last_matched_branch_t * branch,int indent)151*d5f8dde1SJohn Marino print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
152*d5f8dde1SJohn Marino {
153*d5f8dde1SJohn Marino tre_last_matched_t *u;
154*d5f8dde1SJohn Marino int i;
155*d5f8dde1SJohn Marino
156*d5f8dde1SJohn Marino print_indent(indent);
157*d5f8dde1SJohn Marino DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
158*d5f8dde1SJohn Marino if (branch->cmp_tag > 0)
159*d5f8dde1SJohn Marino {
160*d5f8dde1SJohn Marino print_indent(indent);
161*d5f8dde1SJohn Marino DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
162*d5f8dde1SJohn Marino if (branch->n_tags > 0)
163*d5f8dde1SJohn Marino {
164*d5f8dde1SJohn Marino const char *sep = " tags=";
165*d5f8dde1SJohn Marino for (i = 0; i < branch->n_tags; i++)
166*d5f8dde1SJohn Marino {
167*d5f8dde1SJohn Marino DPRINT(("%s%d", sep, branch->tags[i]));
168*d5f8dde1SJohn Marino sep = ",";
169*d5f8dde1SJohn Marino }
170*d5f8dde1SJohn Marino }
171*d5f8dde1SJohn Marino DPRINT(("\n"));
172*d5f8dde1SJohn Marino }
173*d5f8dde1SJohn Marino
174*d5f8dde1SJohn Marino u = branch->last_matched;
175*d5f8dde1SJohn Marino indent++;
176*d5f8dde1SJohn Marino for (i = branch->n_last_matched; i > 0; i--, u++)
177*d5f8dde1SJohn Marino print_last_matched(u, indent);
178*d5f8dde1SJohn Marino }
179*d5f8dde1SJohn Marino
180*d5f8dde1SJohn Marino static void
print_last_matched(tre_last_matched_t * lm,int indent)181*d5f8dde1SJohn Marino print_last_matched(tre_last_matched_t *lm, int indent)
182*d5f8dde1SJohn Marino {
183*d5f8dde1SJohn Marino int i;
184*d5f8dde1SJohn Marino tre_last_matched_branch_t *b;
185*d5f8dde1SJohn Marino
186*d5f8dde1SJohn Marino print_indent(indent);
187*d5f8dde1SJohn Marino DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
188*d5f8dde1SJohn Marino lm->start_tag));
189*d5f8dde1SJohn Marino
190*d5f8dde1SJohn Marino b = lm->branches;
191*d5f8dde1SJohn Marino indent++;
192*d5f8dde1SJohn Marino for (i = lm->n_branches; i > 0; i--, b++)
193*d5f8dde1SJohn Marino print_last_match_branch(b, indent);
194*d5f8dde1SJohn Marino }
195*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
196*d5f8dde1SJohn Marino
197*d5f8dde1SJohn Marino
198*d5f8dde1SJohn Marino /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199*d5f8dde1SJohn Marino one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
200*d5f8dde1SJohn Marino create an unset tre_last_matched_branch_pre_t. */
201*d5f8dde1SJohn Marino static reg_errcode_t
tre_merge_branches(tre_mem_t mem,tre_ast_node_t * dst,tre_ast_node_t * src,int tag_id,int num_tags)202*d5f8dde1SJohn Marino tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
203*d5f8dde1SJohn Marino int tag_id, int num_tags)
204*d5f8dde1SJohn Marino {
205*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
206*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
207*d5f8dde1SJohn Marino
208*d5f8dde1SJohn Marino if (db)
209*d5f8dde1SJohn Marino {
210*d5f8dde1SJohn Marino if (sb)
211*d5f8dde1SJohn Marino {
212*d5f8dde1SJohn Marino bitstr_t *l = db->tags;
213*d5f8dde1SJohn Marino bitstr_t *r = sb->tags;
214*d5f8dde1SJohn Marino int i = bitstr_size(num_tags);
215*d5f8dde1SJohn Marino
216*d5f8dde1SJohn Marino while(i-- > 0)
217*d5f8dde1SJohn Marino *l++ |= *r++;
218*d5f8dde1SJohn Marino /* db and sb are the info from two parallel sub-trees, so the tags
219*d5f8dde1SJohn Marino must be mutually exclusive, and we can just add their numbers */
220*d5f8dde1SJohn Marino db->n_tags += sb->n_tags;
221*d5f8dde1SJohn Marino db->tot_tags += sb->tot_tags;
222*d5f8dde1SJohn Marino if (db->last_matched)
223*d5f8dde1SJohn Marino {
224*d5f8dde1SJohn Marino if (sb->last_matched)
225*d5f8dde1SJohn Marino {
226*d5f8dde1SJohn Marino tre_last_matched_pre_t *u = db->last_matched;
227*d5f8dde1SJohn Marino
228*d5f8dde1SJohn Marino while(u->next)
229*d5f8dde1SJohn Marino u = u->next;
230*d5f8dde1SJohn Marino u->next = sb->last_matched;
231*d5f8dde1SJohn Marino db->n_last_matched += sb->n_last_matched;
232*d5f8dde1SJohn Marino db->tot_branches += sb->tot_branches;
233*d5f8dde1SJohn Marino db->tot_last_matched += sb->tot_last_matched;
234*d5f8dde1SJohn Marino }
235*d5f8dde1SJohn Marino }
236*d5f8dde1SJohn Marino else if (sb->last_matched)
237*d5f8dde1SJohn Marino {
238*d5f8dde1SJohn Marino db->last_matched = sb->last_matched;
239*d5f8dde1SJohn Marino db->n_last_matched = sb->n_last_matched;
240*d5f8dde1SJohn Marino db->tot_branches = sb->tot_branches;
241*d5f8dde1SJohn Marino db->tot_last_matched = sb->tot_last_matched;
242*d5f8dde1SJohn Marino }
243*d5f8dde1SJohn Marino }
244*d5f8dde1SJohn Marino }
245*d5f8dde1SJohn Marino else
246*d5f8dde1SJohn Marino db = sb;
247*d5f8dde1SJohn Marino
248*d5f8dde1SJohn Marino if (tag_id != 0)
249*d5f8dde1SJohn Marino {
250*d5f8dde1SJohn Marino if (!db)
251*d5f8dde1SJohn Marino {
252*d5f8dde1SJohn Marino db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
253*d5f8dde1SJohn Marino + bitstr_size(num_tags));
254*d5f8dde1SJohn Marino if (db == NULL)
255*d5f8dde1SJohn Marino return REG_ESPACE;
256*d5f8dde1SJohn Marino db->tot_branches = 1;
257*d5f8dde1SJohn Marino }
258*d5f8dde1SJohn Marino if (tag_id > 0)
259*d5f8dde1SJohn Marino {
260*d5f8dde1SJohn Marino /* tag_id is a new tag, and shouldn't exist in db's tags,
261*d5f8dde1SJohn Marino so we can always increment n_tags */
262*d5f8dde1SJohn Marino bit_set(db->tags, tag_id);
263*d5f8dde1SJohn Marino db->n_tags++;
264*d5f8dde1SJohn Marino db->tot_tags++;
265*d5f8dde1SJohn Marino }
266*d5f8dde1SJohn Marino }
267*d5f8dde1SJohn Marino dst->last_matched_branch = db;
268*d5f8dde1SJohn Marino return REG_OK;
269*d5f8dde1SJohn Marino }
270*d5f8dde1SJohn Marino
271*d5f8dde1SJohn Marino
2725f2eab64SJohn Marino /* Inserts a catenation node to the root of the tree given in `node'.
2735f2eab64SJohn Marino As the left child a new tag with number `tag_id' to `node' is added,
2745f2eab64SJohn Marino and the right child is the old root. */
2755f2eab64SJohn Marino static reg_errcode_t
tre_add_tag_left(tre_mem_t mem,tre_ast_node_t * node,int tag_id)2765f2eab64SJohn Marino tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
2775f2eab64SJohn Marino {
2785f2eab64SJohn Marino tre_catenation_t *c;
2795f2eab64SJohn Marino
2805f2eab64SJohn Marino DPRINT(("add_tag_left: tag %d\n", tag_id));
2815f2eab64SJohn Marino
2825f2eab64SJohn Marino c = tre_mem_alloc(mem, sizeof(*c));
2835f2eab64SJohn Marino if (c == NULL)
2845f2eab64SJohn Marino return REG_ESPACE;
2855f2eab64SJohn Marino c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
2865f2eab64SJohn Marino if (c->left == NULL)
2875f2eab64SJohn Marino return REG_ESPACE;
288*d5f8dde1SJohn Marino c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
2895f2eab64SJohn Marino if (c->right == NULL)
2905f2eab64SJohn Marino return REG_ESPACE;
2915f2eab64SJohn Marino
2925f2eab64SJohn Marino c->right->obj = node->obj;
2935f2eab64SJohn Marino c->right->type = node->type;
294*d5f8dde1SJohn Marino c->right->last_matched_branch = node->last_matched_branch;
2955f2eab64SJohn Marino c->right->nullable = -1;
2965f2eab64SJohn Marino c->right->submatch_id = -1;
2975f2eab64SJohn Marino node->obj = c;
2985f2eab64SJohn Marino node->type = CATENATION;
299*d5f8dde1SJohn Marino node->original = c->right;
3005f2eab64SJohn Marino return REG_OK;
3015f2eab64SJohn Marino }
3025f2eab64SJohn Marino
3035f2eab64SJohn Marino /* Inserts a catenation node to the root of the tree given in `node'.
3045f2eab64SJohn Marino As the right child a new tag with number `tag_id' to `node' is added,
3055f2eab64SJohn Marino and the left child is the old root. */
3065f2eab64SJohn Marino static reg_errcode_t
tre_add_tag_right(tre_mem_t mem,tre_ast_node_t * node,int tag_id)3075f2eab64SJohn Marino tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
3085f2eab64SJohn Marino {
3095f2eab64SJohn Marino tre_catenation_t *c;
3105f2eab64SJohn Marino
3115f2eab64SJohn Marino DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
3125f2eab64SJohn Marino
3135f2eab64SJohn Marino c = tre_mem_alloc(mem, sizeof(*c));
3145f2eab64SJohn Marino if (c == NULL)
3155f2eab64SJohn Marino return REG_ESPACE;
3165f2eab64SJohn Marino c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
3175f2eab64SJohn Marino if (c->right == NULL)
3185f2eab64SJohn Marino return REG_ESPACE;
319*d5f8dde1SJohn Marino c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
3205f2eab64SJohn Marino if (c->left == NULL)
3215f2eab64SJohn Marino return REG_ESPACE;
3225f2eab64SJohn Marino
3235f2eab64SJohn Marino c->left->obj = node->obj;
3245f2eab64SJohn Marino c->left->type = node->type;
325*d5f8dde1SJohn Marino c->left->last_matched_branch = node->last_matched_branch;
3265f2eab64SJohn Marino c->left->nullable = -1;
3275f2eab64SJohn Marino c->left->submatch_id = -1;
3285f2eab64SJohn Marino node->obj = c;
3295f2eab64SJohn Marino node->type = CATENATION;
330*d5f8dde1SJohn Marino node->original = c->left;
3315f2eab64SJohn Marino return REG_OK;
3325f2eab64SJohn Marino }
3335f2eab64SJohn Marino
3345f2eab64SJohn Marino typedef enum {
3355f2eab64SJohn Marino ADDTAGS_RECURSE,
336*d5f8dde1SJohn Marino ADDTAGS_RECURSE_NOT_TOP_UNION,
3375f2eab64SJohn Marino ADDTAGS_AFTER_ITERATION,
3385f2eab64SJohn Marino ADDTAGS_AFTER_UNION_LEFT,
3395f2eab64SJohn Marino ADDTAGS_AFTER_UNION_RIGHT,
3405f2eab64SJohn Marino ADDTAGS_AFTER_CAT_LEFT,
3415f2eab64SJohn Marino ADDTAGS_AFTER_CAT_RIGHT,
342*d5f8dde1SJohn Marino ADDTAGS_SET_SUBMATCH_END,
343*d5f8dde1SJohn Marino ADDTAGS_UNION_RECURSE,
344*d5f8dde1SJohn Marino ADDTAGS_UNION_RIGHT_RECURSE,
345*d5f8dde1SJohn Marino ADDTAGS_AFTER_UNION_TOP,
3465f2eab64SJohn Marino } tre_addtags_symbol_t;
3475f2eab64SJohn Marino
348*d5f8dde1SJohn Marino enum {
349*d5f8dde1SJohn Marino COPY_LAST_MATCHED_BRANCH,
350*d5f8dde1SJohn Marino COPY_LAST_MATCHED_BRANCH_NEXT,
351*d5f8dde1SJohn Marino COPY_LAST_MATCHED,
352*d5f8dde1SJohn Marino COPY_LAST_MATCHED_NEXT,
353*d5f8dde1SJohn Marino };
3545f2eab64SJohn Marino
3555f2eab64SJohn Marino
356*d5f8dde1SJohn Marino #define REGSET_UNSET ((unsigned)-1)
3575f2eab64SJohn Marino
3585f2eab64SJohn Marino /* Go through `regset' and set submatch data for submatches that are
3595f2eab64SJohn Marino using this tag. */
3605f2eab64SJohn Marino static void
tre_purge_regset(unsigned * regset,tre_tnfa_t * tnfa,int tag)361*d5f8dde1SJohn Marino tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
3625f2eab64SJohn Marino {
3635f2eab64SJohn Marino int i;
3645f2eab64SJohn Marino
365*d5f8dde1SJohn Marino for (i = 0; regset[i] != REGSET_UNSET; i++)
3665f2eab64SJohn Marino {
3675f2eab64SJohn Marino int id = regset[i] / 2;
3685f2eab64SJohn Marino int start = !(regset[i] % 2);
369*d5f8dde1SJohn Marino if (id >= SUBMATCH_ID_INVISIBLE_START)
370*d5f8dde1SJohn Marino continue;
3715f2eab64SJohn Marino DPRINT((" Using tag %d for %s offset of "
3725f2eab64SJohn Marino "submatch %d\n", tag,
3735f2eab64SJohn Marino start ? "start" : "end", id));
3745f2eab64SJohn Marino if (start)
3755f2eab64SJohn Marino tnfa->submatch_data[id].so_tag = tag;
3765f2eab64SJohn Marino else
3775f2eab64SJohn Marino tnfa->submatch_data[id].eo_tag = tag;
3785f2eab64SJohn Marino }
3795f2eab64SJohn Marino regset[0] = -1;
3805f2eab64SJohn Marino }
3815f2eab64SJohn Marino
3825f2eab64SJohn Marino
383*d5f8dde1SJohn Marino #define REGSET_HAS_STARTS 0x1
384*d5f8dde1SJohn Marino #define REGSET_HAS_ENDS 0x2
385*d5f8dde1SJohn Marino
386*d5f8dde1SJohn Marino
3875f2eab64SJohn Marino /* Adds tags to appropriate locations in the parse tree in `tree', so that
3885f2eab64SJohn Marino subexpressions marked for submatch addressing can be traced. */
3895f2eab64SJohn Marino static reg_errcode_t
tre_add_tags(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree,tre_tnfa_t * tnfa)3905f2eab64SJohn Marino tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
3915f2eab64SJohn Marino tre_tnfa_t *tnfa)
3925f2eab64SJohn Marino {
3935f2eab64SJohn Marino reg_errcode_t status = REG_OK;
3945f2eab64SJohn Marino tre_addtags_symbol_t symbol;
3955f2eab64SJohn Marino tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
3965f2eab64SJohn Marino int bottom = tre_stack_num_objects(stack);
3975f2eab64SJohn Marino /* True for first pass (counting number of needed tags) */
3985f2eab64SJohn Marino int first_pass = (mem == NULL || tnfa == NULL);
399*d5f8dde1SJohn Marino unsigned *regset, *orig_regset;
400*d5f8dde1SJohn Marino int regset_contains = 0;
4015f2eab64SJohn Marino int num_tags = 0; /* Total number of tags. */
4025f2eab64SJohn Marino int num_minimals = 0; /* Number of special minimal tags. */
4035f2eab64SJohn Marino int tag = 0; /* The tag that is to be added next. */
4045f2eab64SJohn Marino int next_tag = 1; /* Next tag to use after this one. */
4055f2eab64SJohn Marino int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
406*d5f8dde1SJohn Marino int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
407*d5f8dde1SJohn Marino * the first is the tag to reorder, the second
408*d5f8dde1SJohn Marino * is the tag after which the first is reordered */
409*d5f8dde1SJohn Marino int *rtp; /* Pointer used to fill in reorder_tags and
410*d5f8dde1SJohn Marino * tag_order */
411*d5f8dde1SJohn Marino int *to_reorder; /* Transform array converting sequential order to
412*d5f8dde1SJohn Marino * that specified by reorder_tags */
413*d5f8dde1SJohn Marino int id;
4145f2eab64SJohn Marino
415*d5f8dde1SJohn Marino tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
4165f2eab64SJohn Marino if (!first_pass)
4175f2eab64SJohn Marino {
418*d5f8dde1SJohn Marino DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
4195f2eab64SJohn Marino tnfa->end_tag = 0;
4205f2eab64SJohn Marino tnfa->minimal_tags[0] = -1;
4215f2eab64SJohn Marino }
4225f2eab64SJohn Marino
423*d5f8dde1SJohn Marino regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
424*d5f8dde1SJohn Marino + tnfa->num_submatches_invisible + 1) * 2));
4255f2eab64SJohn Marino if (regset == NULL)
426*d5f8dde1SJohn Marino {
427*d5f8dde1SJohn Marino status = REG_ESPACE;
428*d5f8dde1SJohn Marino goto error_regset;
429*d5f8dde1SJohn Marino }
430*d5f8dde1SJohn Marino regset[0] = REGSET_UNSET;
4315f2eab64SJohn Marino orig_regset = regset;
4325f2eab64SJohn Marino
433*d5f8dde1SJohn Marino if (!first_pass)
4345f2eab64SJohn Marino {
435*d5f8dde1SJohn Marino /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436*d5f8dde1SJohn Marino * to_reorder in one batch (assuming all are the same type) */
437*d5f8dde1SJohn Marino rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
438*d5f8dde1SJohn Marino ((2 * tnfa->num_reorder_tags + 1) +
439*d5f8dde1SJohn Marino tnfa->num_tags));
440*d5f8dde1SJohn Marino if (reorder_tags == NULL)
441*d5f8dde1SJohn Marino {
442*d5f8dde1SJohn Marino status = REG_ESPACE;
443*d5f8dde1SJohn Marino goto error_reorder_tags;
4445f2eab64SJohn Marino }
445*d5f8dde1SJohn Marino to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
4465f2eab64SJohn Marino }
4475f2eab64SJohn Marino
4485f2eab64SJohn Marino STACK_PUSH(stack, voidptr, node);
4495f2eab64SJohn Marino STACK_PUSH(stack, int, ADDTAGS_RECURSE);
4505f2eab64SJohn Marino
4515f2eab64SJohn Marino while (tre_stack_num_objects(stack) > bottom)
4525f2eab64SJohn Marino {
4535f2eab64SJohn Marino if (status != REG_OK)
4545f2eab64SJohn Marino break;
4555f2eab64SJohn Marino
4565f2eab64SJohn Marino symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
4575f2eab64SJohn Marino switch (symbol)
4585f2eab64SJohn Marino {
459*d5f8dde1SJohn Marino int top_union;
4605f2eab64SJohn Marino
4615f2eab64SJohn Marino case ADDTAGS_SET_SUBMATCH_END:
4625f2eab64SJohn Marino {
4635f2eab64SJohn Marino int i;
4645f2eab64SJohn Marino
465*d5f8dde1SJohn Marino id = tre_stack_pop_int(stack);
466*d5f8dde1SJohn Marino node = tre_stack_pop_voidptr(stack);
4675f2eab64SJohn Marino /* Add end of this submatch to regset. */
468*d5f8dde1SJohn Marino for (i = 0; regset[i] != REGSET_UNSET; i++);
4695f2eab64SJohn Marino regset[i] = id * 2 + 1;
4705f2eab64SJohn Marino regset[i + 1] = -1;
471*d5f8dde1SJohn Marino regset_contains |= REGSET_HAS_ENDS;
4725f2eab64SJohn Marino
473*d5f8dde1SJohn Marino /* Always put a tag after a minimal iterator. */
474*d5f8dde1SJohn Marino if (minimal_tag >= 0)
475*d5f8dde1SJohn Marino {
476*d5f8dde1SJohn Marino if (first_pass)
477*d5f8dde1SJohn Marino {
478*d5f8dde1SJohn Marino node->num_tags++;
479*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
480*d5f8dde1SJohn Marino node->num_tags));
481*d5f8dde1SJohn Marino }
482*d5f8dde1SJohn Marino else
483*d5f8dde1SJohn Marino {
484*d5f8dde1SJohn Marino int i;
485*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, NULL, tag,
486*d5f8dde1SJohn Marino tnfa->num_tags);
487*d5f8dde1SJohn Marino if (status != REG_OK)
488*d5f8dde1SJohn Marino break;
489*d5f8dde1SJohn Marino status = tre_add_tag_right(mem, node, tag);
490*d5f8dde1SJohn Marino if (status != REG_OK)
491*d5f8dde1SJohn Marino break;
492*d5f8dde1SJohn Marino tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
493*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to %s\n", tag,
494*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[tag]]));
495*d5f8dde1SJohn Marino DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
496*d5f8dde1SJohn Marino for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
497*d5f8dde1SJohn Marino tnfa->minimal_tags[i] = tag;
498*d5f8dde1SJohn Marino tnfa->minimal_tags[i + 1] = minimal_tag;
499*d5f8dde1SJohn Marino tnfa->minimal_tags[i + 2] = -1;
500*d5f8dde1SJohn Marino
501*d5f8dde1SJohn Marino DPRINT((" Minimal end: t%d reordered to "
502*d5f8dde1SJohn Marino "after t%d\n", tag, minimal_tag));
503*d5f8dde1SJohn Marino /* Append to tag_order, move "tag" after
504*d5f8dde1SJohn Marino * "minimal_tag" */
505*d5f8dde1SJohn Marino *rtp++ = tag;
506*d5f8dde1SJohn Marino *rtp++ = minimal_tag;
507*d5f8dde1SJohn Marino
508*d5f8dde1SJohn Marino num_minimals++;
509*d5f8dde1SJohn Marino tre_purge_regset(regset, tnfa, tag);
510*d5f8dde1SJohn Marino }
511*d5f8dde1SJohn Marino
512*d5f8dde1SJohn Marino minimal_tag = -1;
513*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
514*d5f8dde1SJohn Marino regset[0] = REGSET_UNSET;
515*d5f8dde1SJohn Marino regset_contains = 0;
516*d5f8dde1SJohn Marino tag = next_tag;
517*d5f8dde1SJohn Marino num_tags++;
518*d5f8dde1SJohn Marino next_tag++;
519*d5f8dde1SJohn Marino }
5205f2eab64SJohn Marino break;
5215f2eab64SJohn Marino }
5225f2eab64SJohn Marino
523*d5f8dde1SJohn Marino case ADDTAGS_RECURSE_NOT_TOP_UNION:
524*d5f8dde1SJohn Marino /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525*d5f8dde1SJohn Marino * indicating that if a union is being processed, it is not the
526*d5f8dde1SJohn Marino * top-most of a series */
527*d5f8dde1SJohn Marino top_union = 0;
528*d5f8dde1SJohn Marino goto do_addtags_recurse;
529*d5f8dde1SJohn Marino
5305f2eab64SJohn Marino case ADDTAGS_RECURSE:
531*d5f8dde1SJohn Marino /* Setting top_union to 1 means that if a union is begin processed,
532*d5f8dde1SJohn Marino * it is the top-most of a series, and should recurse through the
533*d5f8dde1SJohn Marino * series to set the left_tag and right_tag values */
534*d5f8dde1SJohn Marino top_union = 1;
535*d5f8dde1SJohn Marino
536*d5f8dde1SJohn Marino do_addtags_recurse:
5375f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
5385f2eab64SJohn Marino
539*d5f8dde1SJohn Marino id = node->submatch_id;
540*d5f8dde1SJohn Marino if (id >= 0)
5415f2eab64SJohn Marino {
5425f2eab64SJohn Marino int i;
5435f2eab64SJohn Marino
5445f2eab64SJohn Marino
5455f2eab64SJohn Marino /* Add start of this submatch to regset. */
546*d5f8dde1SJohn Marino for (i = 0; regset[i] != REGSET_UNSET; i++);
5475f2eab64SJohn Marino regset[i] = id * 2;
5485f2eab64SJohn Marino regset[i + 1] = -1;
549*d5f8dde1SJohn Marino regset_contains |= REGSET_HAS_STARTS;
5505f2eab64SJohn Marino
5515f2eab64SJohn Marino /* Add end of this submatch to regset after processing this
5525f2eab64SJohn Marino node. */
553*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, node);
554*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, id);
5555f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
5565f2eab64SJohn Marino }
5575f2eab64SJohn Marino
5585f2eab64SJohn Marino switch (node->type)
5595f2eab64SJohn Marino {
5605f2eab64SJohn Marino case LITERAL:
5615f2eab64SJohn Marino {
5625f2eab64SJohn Marino tre_literal_t *lit = node->obj;
5635f2eab64SJohn Marino
564*d5f8dde1SJohn Marino if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
5655f2eab64SJohn Marino {
5665f2eab64SJohn Marino DPRINT(("Literal %d-%d\n",
5675f2eab64SJohn Marino (int)lit->code_min, (int)lit->code_max));
568*d5f8dde1SJohn Marino if (regset_contains)
5695f2eab64SJohn Marino {
5705f2eab64SJohn Marino /* Regset is not empty, so add a tag before the
5715f2eab64SJohn Marino literal or backref. */
572*d5f8dde1SJohn Marino if (first_pass)
5735f2eab64SJohn Marino {
574*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
575*d5f8dde1SJohn Marino node->num_tags = 1;
5765f2eab64SJohn Marino }
5775f2eab64SJohn Marino else
5785f2eab64SJohn Marino {
579*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, NULL, tag,
580*d5f8dde1SJohn Marino tnfa->num_tags);
581*d5f8dde1SJohn Marino if (status != REG_OK)
582*d5f8dde1SJohn Marino break;
583*d5f8dde1SJohn Marino status = tre_add_tag_left(mem, node, tag);
584*d5f8dde1SJohn Marino if (status != REG_OK)
585*d5f8dde1SJohn Marino break;
586*d5f8dde1SJohn Marino if (regset_contains == REGSET_HAS_STARTS)
587*d5f8dde1SJohn Marino tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
588*d5f8dde1SJohn Marino else
589*d5f8dde1SJohn Marino tnfa->tag_directions[tag] = direction;
590*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to %s\n", tag,
591*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[tag]]));
592*d5f8dde1SJohn Marino tre_purge_regset(regset, tnfa, tag);
593*d5f8dde1SJohn Marino
594*d5f8dde1SJohn Marino if (IS_BACKREF(lit))
595*d5f8dde1SJohn Marino {
596*d5f8dde1SJohn Marino int b = lit->code_max;
597*d5f8dde1SJohn Marino int t = tnfa->submatch_data[b].so_tag;
598*d5f8dde1SJohn Marino /* Fail if the referenced submatch hasn't been
599*d5f8dde1SJohn Marino * completed yet */
600*d5f8dde1SJohn Marino if (tnfa->submatch_data[b].eo_tag < 0)
601*d5f8dde1SJohn Marino {
602*d5f8dde1SJohn Marino status = REG_ESUBREG;
603*d5f8dde1SJohn Marino break;
604*d5f8dde1SJohn Marino }
605*d5f8dde1SJohn Marino if (t < tag)
606*d5f8dde1SJohn Marino {
607*d5f8dde1SJohn Marino DPRINT((" Backref %d start: "
608*d5f8dde1SJohn Marino "t%d reordered to before t%d\n",
609*d5f8dde1SJohn Marino b, tag, t));
610*d5f8dde1SJohn Marino if(t > 0)
611*d5f8dde1SJohn Marino t--;
612*d5f8dde1SJohn Marino /* Append to tag_order, move "tag" after
613*d5f8dde1SJohn Marino * "t" */
614*d5f8dde1SJohn Marino *rtp++ = tag;
615*d5f8dde1SJohn Marino *rtp++ = t;
616*d5f8dde1SJohn Marino }
617*d5f8dde1SJohn Marino #if TRE_DEBUG
618*d5f8dde1SJohn Marino else
619*d5f8dde1SJohn Marino DPRINT((" Backref %d start: "
620*d5f8dde1SJohn Marino "(t%d already before t%d)\n",
621*d5f8dde1SJohn Marino b, tag, t));
622*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
623*d5f8dde1SJohn Marino }
6245f2eab64SJohn Marino }
6255f2eab64SJohn Marino
626*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
627*d5f8dde1SJohn Marino tag));
628*d5f8dde1SJohn Marino regset[0] = REGSET_UNSET;
629*d5f8dde1SJohn Marino regset_contains = 0;
6305f2eab64SJohn Marino tag = next_tag;
6315f2eab64SJohn Marino num_tags++;
6325f2eab64SJohn Marino next_tag++;
6335f2eab64SJohn Marino }
6345f2eab64SJohn Marino }
6355f2eab64SJohn Marino else
6365f2eab64SJohn Marino {
6375f2eab64SJohn Marino assert(!IS_TAG(lit));
6385f2eab64SJohn Marino }
6395f2eab64SJohn Marino break;
6405f2eab64SJohn Marino }
6415f2eab64SJohn Marino case CATENATION:
6425f2eab64SJohn Marino {
6435f2eab64SJohn Marino tre_catenation_t *cat = node->obj;
6445f2eab64SJohn Marino tre_ast_node_t *left = cat->left;
6455f2eab64SJohn Marino tre_ast_node_t *right = cat->right;
6465f2eab64SJohn Marino int reserved_tag = -1;
6475f2eab64SJohn Marino DPRINT(("Catenation, next_tag = %d\n", next_tag));
6485f2eab64SJohn Marino
6495f2eab64SJohn Marino
6505f2eab64SJohn Marino /* After processing right child. */
6515f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, node);
6525f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
6535f2eab64SJohn Marino
6545f2eab64SJohn Marino /* Process right child. */
6555f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, right);
6565f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6575f2eab64SJohn Marino
6585f2eab64SJohn Marino /* After processing left child. */
6595f2eab64SJohn Marino STACK_PUSHX(stack, int, next_tag + left->num_tags);
6605f2eab64SJohn Marino DPRINT((" Pushing %d for after left\n",
6615f2eab64SJohn Marino next_tag + left->num_tags));
6625f2eab64SJohn Marino if (left->num_tags > 0 && right->num_tags > 0)
6635f2eab64SJohn Marino {
6645f2eab64SJohn Marino /* Reserve the next tag to the right child. */
665*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
666*d5f8dde1SJohn Marino "Reserving next_tag %d to right child\n",
6675f2eab64SJohn Marino next_tag));
6685f2eab64SJohn Marino reserved_tag = next_tag;
6695f2eab64SJohn Marino next_tag++;
6705f2eab64SJohn Marino }
6715f2eab64SJohn Marino STACK_PUSHX(stack, int, reserved_tag);
6725f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
6735f2eab64SJohn Marino
6745f2eab64SJohn Marino /* Process left child. */
6755f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, left);
6765f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6775f2eab64SJohn Marino
6785f2eab64SJohn Marino }
6795f2eab64SJohn Marino break;
6805f2eab64SJohn Marino case ITERATION:
6815f2eab64SJohn Marino {
6825f2eab64SJohn Marino tre_iteration_t *iter = node->obj;
6835f2eab64SJohn Marino DPRINT(("Iteration\n"));
6845f2eab64SJohn Marino
6855f2eab64SJohn Marino if (first_pass)
686*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, regset_contains != 0);
6875f2eab64SJohn Marino STACK_PUSHX(stack, int, tag);
6885f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, node);
6895f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
6905f2eab64SJohn Marino
6915f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, iter->arg);
6925f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
6935f2eab64SJohn Marino
694*d5f8dde1SJohn Marino /* Regset is not empty, so add a tag here (this always happens
695*d5f8dde1SJohn Marino because iterators always get submatch id, even if in the
696*d5f8dde1SJohn Marino invisible range) */
697*d5f8dde1SJohn Marino if (regset_contains)
6985f2eab64SJohn Marino {
6995f2eab64SJohn Marino if (!first_pass)
7005f2eab64SJohn Marino {
701*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, NULL, tag,
702*d5f8dde1SJohn Marino tnfa->num_tags);
703*d5f8dde1SJohn Marino if (status != REG_OK)
704*d5f8dde1SJohn Marino break;
7055f2eab64SJohn Marino status = tre_add_tag_left(mem, node, tag);
706*d5f8dde1SJohn Marino if (status != REG_OK)
707*d5f8dde1SJohn Marino break;
708*d5f8dde1SJohn Marino if (regset_contains == REGSET_HAS_STARTS && tag != 0)
709*d5f8dde1SJohn Marino tnfa->tag_directions[tag] = iter->minimal ?
710*d5f8dde1SJohn Marino TRE_TAG_MINIMIZE :
711*d5f8dde1SJohn Marino TRE_TAG_LEFT_MAXIMIZE;
7125f2eab64SJohn Marino else
7135f2eab64SJohn Marino tnfa->tag_directions[tag] = direction;
714*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to %s\n", tag,
715*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[tag]]));
7165f2eab64SJohn Marino tre_purge_regset(regset, tnfa, tag);
7175f2eab64SJohn Marino }
7185f2eab64SJohn Marino
719*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
720*d5f8dde1SJohn Marino tag));
721*d5f8dde1SJohn Marino regset[0] = REGSET_UNSET;
722*d5f8dde1SJohn Marino regset_contains = 0;
7235f2eab64SJohn Marino tag = next_tag;
7245f2eab64SJohn Marino num_tags++;
7255f2eab64SJohn Marino next_tag++;
7265f2eab64SJohn Marino }
727*d5f8dde1SJohn Marino direction = TRE_TAG_LEFT_MAXIMIZE;
728*d5f8dde1SJohn Marino DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
7295f2eab64SJohn Marino }
7305f2eab64SJohn Marino break;
7315f2eab64SJohn Marino case UNION:
7325f2eab64SJohn Marino {
733*d5f8dde1SJohn Marino tre_union_t *uni;
734*d5f8dde1SJohn Marino tre_ast_node_t *left;
735*d5f8dde1SJohn Marino tre_ast_node_t *right;
736*d5f8dde1SJohn Marino int front_tag = -1;
7375f2eab64SJohn Marino
7385f2eab64SJohn Marino DPRINT(("Union\n"));
7395f2eab64SJohn Marino
740*d5f8dde1SJohn Marino if (regset_contains)
741*d5f8dde1SJohn Marino {
742*d5f8dde1SJohn Marino DPRINT((" UNION num_tags++ tag=%d\n", tag));
743*d5f8dde1SJohn Marino front_tag = tag;
744*d5f8dde1SJohn Marino tag = next_tag;
745*d5f8dde1SJohn Marino num_tags++;
746*d5f8dde1SJohn Marino next_tag++;
747*d5f8dde1SJohn Marino }
748*d5f8dde1SJohn Marino
749*d5f8dde1SJohn Marino /* For the top union, walk the tree of consecutive unions,
750*d5f8dde1SJohn Marino * setting the left_tag and right_tag values in increasing
751*d5f8dde1SJohn Marino * order (left to right priority) */
752*d5f8dde1SJohn Marino if (top_union &&
753*d5f8dde1SJohn Marino (node->num_submatches -
754*d5f8dde1SJohn Marino (node->submatch_id >= 0 &&
755*d5f8dde1SJohn Marino node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
756*d5f8dde1SJohn Marino {
757*d5f8dde1SJohn Marino tre_ast_node_t *n;
758*d5f8dde1SJohn Marino int last = tre_stack_num_objects(stack);
759*d5f8dde1SJohn Marino
760*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, node);
761*d5f8dde1SJohn Marino STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
762*d5f8dde1SJohn Marino
763*d5f8dde1SJohn Marino while (tre_stack_num_objects(stack) > last)
764*d5f8dde1SJohn Marino {
765*d5f8dde1SJohn Marino symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
766*d5f8dde1SJohn Marino switch (symbol)
767*d5f8dde1SJohn Marino {
768*d5f8dde1SJohn Marino case ADDTAGS_UNION_RECURSE:
769*d5f8dde1SJohn Marino n = tre_stack_pop_voidptr(stack);
770*d5f8dde1SJohn Marino uni = n->obj;
771*d5f8dde1SJohn Marino left = uni->left;
772*d5f8dde1SJohn Marino
773*d5f8dde1SJohn Marino /* Since the top union has num_submatches > 0,
774*d5f8dde1SJohn Marino * we set all the consecutive union's
775*d5f8dde1SJohn Marino * make_branches to 1 to force the generation
776*d5f8dde1SJohn Marino * of end tags for each union branch. */
777*d5f8dde1SJohn Marino n->make_branches = 1;
778*d5f8dde1SJohn Marino
779*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, n);
780*d5f8dde1SJohn Marino STACK_PUSH(stack, int,
781*d5f8dde1SJohn Marino ADDTAGS_UNION_RIGHT_RECURSE);
782*d5f8dde1SJohn Marino
783*d5f8dde1SJohn Marino if (left->type == UNION)
784*d5f8dde1SJohn Marino {
785*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, left);
786*d5f8dde1SJohn Marino STACK_PUSH(stack, int,
787*d5f8dde1SJohn Marino ADDTAGS_UNION_RECURSE);
788*d5f8dde1SJohn Marino }
789*d5f8dde1SJohn Marino else
790*d5f8dde1SJohn Marino {
791*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_UNION_RECURSE "
792*d5f8dde1SJohn Marino "num_tags++ tag=%d\n", tag));
793*d5f8dde1SJohn Marino uni->left_tag = tag;
794*d5f8dde1SJohn Marino tag = next_tag;
795*d5f8dde1SJohn Marino num_tags++;
796*d5f8dde1SJohn Marino next_tag++;
797*d5f8dde1SJohn Marino }
798*d5f8dde1SJohn Marino break;
799*d5f8dde1SJohn Marino
800*d5f8dde1SJohn Marino case ADDTAGS_UNION_RIGHT_RECURSE:
801*d5f8dde1SJohn Marino n = tre_stack_pop_voidptr(stack);
802*d5f8dde1SJohn Marino uni = n->obj;
803*d5f8dde1SJohn Marino right = uni->right;
804*d5f8dde1SJohn Marino
805*d5f8dde1SJohn Marino if (right->type == UNION)
806*d5f8dde1SJohn Marino {
807*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, right);
808*d5f8dde1SJohn Marino STACK_PUSH(stack, int,
809*d5f8dde1SJohn Marino ADDTAGS_UNION_RECURSE);
810*d5f8dde1SJohn Marino }
811*d5f8dde1SJohn Marino else
812*d5f8dde1SJohn Marino {
813*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
814*d5f8dde1SJohn Marino "num_tags++ tag=%d\n", tag));
815*d5f8dde1SJohn Marino uni->right_tag = tag;
816*d5f8dde1SJohn Marino tag = next_tag;
817*d5f8dde1SJohn Marino num_tags++;
818*d5f8dde1SJohn Marino next_tag++;
819*d5f8dde1SJohn Marino }
820*d5f8dde1SJohn Marino
821*d5f8dde1SJohn Marino break;
822*d5f8dde1SJohn Marino
823*d5f8dde1SJohn Marino default:
824*d5f8dde1SJohn Marino assert(0);
825*d5f8dde1SJohn Marino break;
826*d5f8dde1SJohn Marino
827*d5f8dde1SJohn Marino } /* end switch(symbol) */
828*d5f8dde1SJohn Marino } /* end while(tre_stack_num_objects(stack) > last */
829*d5f8dde1SJohn Marino if (!first_pass)
830*d5f8dde1SJohn Marino {
831*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, front_tag);
8325f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, node);
833*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
834*d5f8dde1SJohn Marino }
835*d5f8dde1SJohn Marino } /* end if (top_union && ...) */
836*d5f8dde1SJohn Marino
837*d5f8dde1SJohn Marino uni = node->obj;
838*d5f8dde1SJohn Marino left = uni->left;
839*d5f8dde1SJohn Marino right = uni->right;
840*d5f8dde1SJohn Marino
841*d5f8dde1SJohn Marino /* After processing right child. */
842*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, regset);
843*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, regset_contains != 0);
844*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, node);
8455f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
8465f2eab64SJohn Marino
8475f2eab64SJohn Marino /* Process right child. */
8485f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, right);
849*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
8505f2eab64SJohn Marino
8515f2eab64SJohn Marino /* After processing left child. */
8525f2eab64SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
8535f2eab64SJohn Marino
8545f2eab64SJohn Marino /* Process left child. */
8555f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, left);
856*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
8575f2eab64SJohn Marino
8585f2eab64SJohn Marino /* Regset is not empty, so add a tag here. */
859*d5f8dde1SJohn Marino if (regset_contains)
8605f2eab64SJohn Marino {
8615f2eab64SJohn Marino if (!first_pass)
8625f2eab64SJohn Marino {
863*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, NULL, front_tag,
864*d5f8dde1SJohn Marino tnfa->num_tags);
865*d5f8dde1SJohn Marino if (status != REG_OK)
866*d5f8dde1SJohn Marino break;
867*d5f8dde1SJohn Marino status = tre_add_tag_left(mem, node, front_tag);
868*d5f8dde1SJohn Marino if (status != REG_OK)
869*d5f8dde1SJohn Marino break;
870*d5f8dde1SJohn Marino if (regset_contains == REGSET_HAS_STARTS)
871*d5f8dde1SJohn Marino tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
872*d5f8dde1SJohn Marino else
873*d5f8dde1SJohn Marino tnfa->tag_directions[front_tag] = direction;
874*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to %s\n", front_tag,
875*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[front_tag]]));
876*d5f8dde1SJohn Marino tre_purge_regset(regset, tnfa, front_tag);
8775f2eab64SJohn Marino }
8785f2eab64SJohn Marino
879*d5f8dde1SJohn Marino regset[0] = REGSET_UNSET;
880*d5f8dde1SJohn Marino regset_contains = 0;
8815f2eab64SJohn Marino }
8825f2eab64SJohn Marino
8835f2eab64SJohn Marino break;
8845f2eab64SJohn Marino }
885*d5f8dde1SJohn Marino } /* end switch (node->type) */
8865f2eab64SJohn Marino
8875f2eab64SJohn Marino break; /* end case: ADDTAGS_RECURSE */
8885f2eab64SJohn Marino
8895f2eab64SJohn Marino case ADDTAGS_AFTER_ITERATION:
8905f2eab64SJohn Marino {
891*d5f8dde1SJohn Marino tre_iteration_t *iter;
892*d5f8dde1SJohn Marino tre_ast_node_t *orig;
8935f2eab64SJohn Marino int enter_tag;
894*d5f8dde1SJohn Marino
8955f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
896*d5f8dde1SJohn Marino orig = node->original ? node->original : node;
897*d5f8dde1SJohn Marino iter = (tre_iteration_t *)orig->obj;
8985f2eab64SJohn Marino enter_tag = tre_stack_pop_int(stack);
899*d5f8dde1SJohn Marino if (iter->minimal)
9005f2eab64SJohn Marino minimal_tag = enter_tag;
9015f2eab64SJohn Marino
9025f2eab64SJohn Marino DPRINT(("After iteration\n"));
903*d5f8dde1SJohn Marino if (first_pass)
9045f2eab64SJohn Marino {
905*d5f8dde1SJohn Marino node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
906*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
907*d5f8dde1SJohn Marino node->num_tags));
908*d5f8dde1SJohn Marino }
909*d5f8dde1SJohn Marino else
910*d5f8dde1SJohn Marino {
911*d5f8dde1SJohn Marino /* node->last_matched_branch will have the start tag (the tag
912*d5f8dde1SJohn Marino just *before* the iteration). iter->arg->last_matched_branch
913*d5f8dde1SJohn Marino will have the tag(s) inside the iteration, the ones that
914*d5f8dde1SJohn Marino may need to be reset if the iteration doesn't match. So
915*d5f8dde1SJohn Marino before we merge iter->arg into node, we need to set up
916*d5f8dde1SJohn Marino a new tre_last_matched_t and tre_last_matched_branch_t,
917*d5f8dde1SJohn Marino using any of the inside tags as cmp_tag (we choose the first
918*d5f8dde1SJohn Marino tag found by bit_ffs). If there are no inside tags, we
919*d5f8dde1SJohn Marino don't bother creating the extra structures. */
920*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *b =
921*d5f8dde1SJohn Marino iter->arg->last_matched_branch;
922*d5f8dde1SJohn Marino
923*d5f8dde1SJohn Marino if (b && b->n_tags > 0)
924*d5f8dde1SJohn Marino {
925*d5f8dde1SJohn Marino tre_last_matched_pre_t *u;
926*d5f8dde1SJohn Marino
927*d5f8dde1SJohn Marino bit_ffs(b->tags, num_tags, &b->cmp_tag);
928*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
929*d5f8dde1SJohn Marino "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
930*d5f8dde1SJohn Marino
931*d5f8dde1SJohn Marino u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
932*d5f8dde1SJohn Marino sizeof(tre_last_matched_branch_pre_t)
933*d5f8dde1SJohn Marino + bitstr_size(tnfa->num_tags));
934*d5f8dde1SJohn Marino if (!u)
935*d5f8dde1SJohn Marino {
936*d5f8dde1SJohn Marino status = REG_ESPACE;
937*d5f8dde1SJohn Marino break;
938*d5f8dde1SJohn Marino }
939*d5f8dde1SJohn Marino u->branches = b;
940*d5f8dde1SJohn Marino u->n_branches = 1;
941*d5f8dde1SJohn Marino u->start_tag = b->cmp_tag;
942*d5f8dde1SJohn Marino u->tot_branches = b->tot_branches;
943*d5f8dde1SJohn Marino u->tot_last_matched = 1 + b->tot_last_matched;
944*d5f8dde1SJohn Marino u->tot_tags = b->tot_tags;
945*d5f8dde1SJohn Marino
946*d5f8dde1SJohn Marino b = (tre_last_matched_branch_pre_t *)(u + 1);
947*d5f8dde1SJohn Marino b->last_matched = u;
948*d5f8dde1SJohn Marino b->n_last_matched = 1;
949*d5f8dde1SJohn Marino b->tot_branches = 1 + u->tot_branches;
950*d5f8dde1SJohn Marino b->tot_last_matched = u->tot_last_matched;
951*d5f8dde1SJohn Marino b->tot_tags = u->tot_tags;
952*d5f8dde1SJohn Marino
953*d5f8dde1SJohn Marino iter->arg->last_matched_branch = b;
954*d5f8dde1SJohn Marino }
955*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, iter->arg, 0,
956*d5f8dde1SJohn Marino tnfa->num_tags);
957*d5f8dde1SJohn Marino if (status != REG_OK)
958*d5f8dde1SJohn Marino break;
959*d5f8dde1SJohn Marino
960*d5f8dde1SJohn Marino if (iter->minimal)
961*d5f8dde1SJohn Marino {
962*d5f8dde1SJohn Marino /* Add a union with a left EMPTY literal and the right
963*d5f8dde1SJohn Marino being iter->arg. This should force the tags inside
964*d5f8dde1SJohn Marino the minimal iteration to prefer being unset */
965*d5f8dde1SJohn Marino if (iter->min == 0 && iter->max <= 1)
966*d5f8dde1SJohn Marino {
967*d5f8dde1SJohn Marino tre_ast_node_t *u, *e;
968*d5f8dde1SJohn Marino
969*d5f8dde1SJohn Marino e = tre_ast_new_literal(mem, EMPTY, -1, -1);
970*d5f8dde1SJohn Marino if (e == NULL)
971*d5f8dde1SJohn Marino {
972*d5f8dde1SJohn Marino status = REG_ESPACE;
973*d5f8dde1SJohn Marino break;
974*d5f8dde1SJohn Marino }
975*d5f8dde1SJohn Marino u = tre_ast_new_union(mem, e, iter->arg);
976*d5f8dde1SJohn Marino if (u == NULL)
977*d5f8dde1SJohn Marino {
978*d5f8dde1SJohn Marino status = REG_ESPACE;
979*d5f8dde1SJohn Marino break;
980*d5f8dde1SJohn Marino }
981*d5f8dde1SJohn Marino iter->arg = u;
982*d5f8dde1SJohn Marino }
983*d5f8dde1SJohn Marino
9845f2eab64SJohn Marino direction = TRE_TAG_MINIMIZE;
985*d5f8dde1SJohn Marino }
9865f2eab64SJohn Marino else
9875f2eab64SJohn Marino direction = TRE_TAG_MAXIMIZE;
988*d5f8dde1SJohn Marino DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
9895f2eab64SJohn Marino }
9905f2eab64SJohn Marino break;
9915f2eab64SJohn Marino }
9925f2eab64SJohn Marino
9935f2eab64SJohn Marino case ADDTAGS_AFTER_CAT_LEFT:
9945f2eab64SJohn Marino {
9955f2eab64SJohn Marino int new_tag = tre_stack_pop_int(stack);
9965f2eab64SJohn Marino next_tag = tre_stack_pop_int(stack);
9975f2eab64SJohn Marino DPRINT(("After cat left, tag = %d, next_tag = %d\n",
9985f2eab64SJohn Marino tag, next_tag));
9995f2eab64SJohn Marino if (new_tag >= 0)
10005f2eab64SJohn Marino {
10015f2eab64SJohn Marino DPRINT((" Setting tag to %d\n", new_tag));
10025f2eab64SJohn Marino tag = new_tag;
10035f2eab64SJohn Marino }
10045f2eab64SJohn Marino break;
10055f2eab64SJohn Marino }
10065f2eab64SJohn Marino
10075f2eab64SJohn Marino case ADDTAGS_AFTER_CAT_RIGHT:
1008*d5f8dde1SJohn Marino {
1009*d5f8dde1SJohn Marino tre_catenation_t *cat;
1010*d5f8dde1SJohn Marino
10115f2eab64SJohn Marino DPRINT(("After cat right\n"));
10125f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
1013*d5f8dde1SJohn Marino cat = node->obj;
10145f2eab64SJohn Marino if (first_pass)
1015*d5f8dde1SJohn Marino {
1016*d5f8dde1SJohn Marino node->num_tags = cat->left->num_tags + cat->right->num_tags;
1017*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1018*d5f8dde1SJohn Marino node->num_tags));
1019*d5f8dde1SJohn Marino }
1020*d5f8dde1SJohn Marino else
1021*d5f8dde1SJohn Marino {
1022*d5f8dde1SJohn Marino status = tre_merge_branches(mem, cat->left, cat->right, 0,
1023*d5f8dde1SJohn Marino tnfa->num_tags);
1024*d5f8dde1SJohn Marino if (status != REG_OK)
10255f2eab64SJohn Marino break;
1026*d5f8dde1SJohn Marino status = tre_merge_branches(mem, node, cat->left, 0,
1027*d5f8dde1SJohn Marino tnfa->num_tags);
1028*d5f8dde1SJohn Marino }
1029*d5f8dde1SJohn Marino break;
1030*d5f8dde1SJohn Marino }
10315f2eab64SJohn Marino
10325f2eab64SJohn Marino case ADDTAGS_AFTER_UNION_LEFT:
10335f2eab64SJohn Marino DPRINT(("After union left\n"));
10345f2eab64SJohn Marino /* Lift the bottom of the `regset' array so that when processing
10355f2eab64SJohn Marino the right operand the items currently in the array are
10365f2eab64SJohn Marino invisible. The original bottom was saved at ADDTAGS_UNION and
10375f2eab64SJohn Marino will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1038*d5f8dde1SJohn Marino while (*regset != REGSET_UNSET)
10395f2eab64SJohn Marino regset++;
1040*d5f8dde1SJohn Marino regset_contains = 0;
10415f2eab64SJohn Marino break;
10425f2eab64SJohn Marino
10435f2eab64SJohn Marino case ADDTAGS_AFTER_UNION_RIGHT:
10445f2eab64SJohn Marino {
1045*d5f8dde1SJohn Marino int added_tags;
1046*d5f8dde1SJohn Marino tre_ast_node_t *orig;
1047*d5f8dde1SJohn Marino tre_union_t *uni;
1048*d5f8dde1SJohn Marino /* Note: node may not be a UNION, but a CATENATION with a left
1049*d5f8dde1SJohn Marino * tag. So that is why we pass the original node->obj on the
1050*d5f8dde1SJohn Marino * stack, to get the union's true values. */
1051*d5f8dde1SJohn Marino
10525f2eab64SJohn Marino DPRINT(("After union right\n"));
10535f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
1054*d5f8dde1SJohn Marino orig = node->original ? node->original : node;
1055*d5f8dde1SJohn Marino uni = (tre_union_t *)orig->obj;
10565f2eab64SJohn Marino added_tags = tre_stack_pop_int(stack);
10575f2eab64SJohn Marino if (first_pass)
10585f2eab64SJohn Marino {
1059*d5f8dde1SJohn Marino node->num_tags = uni->left->num_tags + uni->right->num_tags
1060*d5f8dde1SJohn Marino + added_tags;
1061*d5f8dde1SJohn Marino if (uni->left_tag > 0)
1062*d5f8dde1SJohn Marino node->num_tags++;
1063*d5f8dde1SJohn Marino if (uni->right_tag > 0)
1064*d5f8dde1SJohn Marino node->num_tags++;
1065*d5f8dde1SJohn Marino DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1066*d5f8dde1SJohn Marino node->num_tags));
10675f2eab64SJohn Marino }
10685f2eab64SJohn Marino regset = tre_stack_pop_voidptr(stack);
10695f2eab64SJohn Marino
10705f2eab64SJohn Marino /* Add tags after both children, the left child gets a smaller
10715f2eab64SJohn Marino tag than the right child. This guarantees that we prefer
10725f2eab64SJohn Marino the left child over the right child. */
10735f2eab64SJohn Marino /* XXX - This is not always necessary (if the children have
10745f2eab64SJohn Marino tags which must be seen for every match of that child). */
1075*d5f8dde1SJohn Marino if (!first_pass && node->make_branches)
10765f2eab64SJohn Marino {
1077*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *lb =
1078*d5f8dde1SJohn Marino uni->left->last_matched_branch;
1079*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *rb =
1080*d5f8dde1SJohn Marino uni->right->last_matched_branch;
1081*d5f8dde1SJohn Marino tre_last_matched_pre_t *lu =
1082*d5f8dde1SJohn Marino uni->left->last_matched_in_progress;
1083*d5f8dde1SJohn Marino tre_last_matched_pre_t *ru =
1084*d5f8dde1SJohn Marino uni->right->last_matched_in_progress;
1085*d5f8dde1SJohn Marino tre_last_matched_pre_t *u;
1086*d5f8dde1SJohn Marino /* We don't need to call tre_merge_branches because these
1087*d5f8dde1SJohn Marino * tags don't participate in submatch ranges, so don't need
1088*d5f8dde1SJohn Marino * to be recorded. But we do set the cmp_tag entry of the
1089*d5f8dde1SJohn Marino * tre_last_matched_branch_pre_t, so we might call
1090*d5f8dde1SJohn Marino * tre_merge_branches if we need to create an empty
1091*d5f8dde1SJohn Marino * tre_last_matched_branch_pre_t. */
1092*d5f8dde1SJohn Marino if (uni->left_tag > 0)
10935f2eab64SJohn Marino {
1094*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to maximize\n",
1095*d5f8dde1SJohn Marino uni->left_tag));
1096*d5f8dde1SJohn Marino status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1097*d5f8dde1SJohn Marino if (status != REG_OK)
1098*d5f8dde1SJohn Marino break;
1099*d5f8dde1SJohn Marino tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1100*d5f8dde1SJohn Marino if (!lb)
1101*d5f8dde1SJohn Marino {
1102*d5f8dde1SJohn Marino status = tre_merge_branches(mem, uni->left, NULL, -1,
1103*d5f8dde1SJohn Marino tnfa->num_tags);
1104*d5f8dde1SJohn Marino if (status != REG_OK)
1105*d5f8dde1SJohn Marino break;
1106*d5f8dde1SJohn Marino lb = uni->left->last_matched_branch;
11075f2eab64SJohn Marino }
1108*d5f8dde1SJohn Marino lb->cmp_tag = uni->left_tag;
1109*d5f8dde1SJohn Marino }
1110*d5f8dde1SJohn Marino if (uni->right_tag > 0)
1111*d5f8dde1SJohn Marino {
1112*d5f8dde1SJohn Marino DPRINT(("Setting t%d direction to maximize\n",
1113*d5f8dde1SJohn Marino uni->right_tag));
1114*d5f8dde1SJohn Marino status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1115*d5f8dde1SJohn Marino if (status != REG_OK)
1116*d5f8dde1SJohn Marino break;
1117*d5f8dde1SJohn Marino tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1118*d5f8dde1SJohn Marino if (!rb)
1119*d5f8dde1SJohn Marino {
1120*d5f8dde1SJohn Marino status = tre_merge_branches(mem, uni->right, NULL, -1,
1121*d5f8dde1SJohn Marino tnfa->num_tags);
1122*d5f8dde1SJohn Marino if (status != REG_OK)
1123*d5f8dde1SJohn Marino break;
1124*d5f8dde1SJohn Marino rb = uni->right->last_matched_branch;
1125*d5f8dde1SJohn Marino }
1126*d5f8dde1SJohn Marino rb->cmp_tag = uni->right_tag;
1127*d5f8dde1SJohn Marino }
1128*d5f8dde1SJohn Marino /* Now merge the tre_last_matched_branch_pre_t into a
1129*d5f8dde1SJohn Marino tre_last_matched_pre_t */
1130*d5f8dde1SJohn Marino if (lu == NULL)
1131*d5f8dde1SJohn Marino {
1132*d5f8dde1SJohn Marino if (ru == NULL)
1133*d5f8dde1SJohn Marino {
1134*d5f8dde1SJohn Marino /* Create a new tre_last_matched_pre_t */
1135*d5f8dde1SJohn Marino u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1136*d5f8dde1SJohn Marino if (!u)
1137*d5f8dde1SJohn Marino {
1138*d5f8dde1SJohn Marino status = REG_ESPACE;
1139*d5f8dde1SJohn Marino break;
1140*d5f8dde1SJohn Marino }
1141*d5f8dde1SJohn Marino u->tot_last_matched = 1;
1142*d5f8dde1SJohn Marino
1143*d5f8dde1SJohn Marino if (lb)
1144*d5f8dde1SJohn Marino {
1145*d5f8dde1SJohn Marino u->branches = lb;
1146*d5f8dde1SJohn Marino u->n_branches = 1;
1147*d5f8dde1SJohn Marino u->tot_branches += lb->tot_branches;
1148*d5f8dde1SJohn Marino u->tot_last_matched += lb->tot_last_matched;
1149*d5f8dde1SJohn Marino u->tot_tags += lb->tot_tags;
1150*d5f8dde1SJohn Marino if (rb)
1151*d5f8dde1SJohn Marino {
1152*d5f8dde1SJohn Marino lb->next = rb;
1153*d5f8dde1SJohn Marino u->n_branches++;
1154*d5f8dde1SJohn Marino u->tot_branches += rb->tot_branches;
1155*d5f8dde1SJohn Marino u->tot_last_matched += rb->tot_last_matched;
1156*d5f8dde1SJohn Marino u->tot_tags += rb->tot_tags;
1157*d5f8dde1SJohn Marino }
1158*d5f8dde1SJohn Marino }
1159*d5f8dde1SJohn Marino else if (rb)
1160*d5f8dde1SJohn Marino {
1161*d5f8dde1SJohn Marino u->branches = rb;
1162*d5f8dde1SJohn Marino u->n_branches = 1;
1163*d5f8dde1SJohn Marino u->tot_branches += rb->tot_branches;
1164*d5f8dde1SJohn Marino u->tot_last_matched += rb->tot_last_matched;
1165*d5f8dde1SJohn Marino u->tot_tags += rb->tot_tags;
1166*d5f8dde1SJohn Marino }
1167*d5f8dde1SJohn Marino }
1168*d5f8dde1SJohn Marino else
1169*d5f8dde1SJohn Marino {
1170*d5f8dde1SJohn Marino /* Use ru, and add lb */
1171*d5f8dde1SJohn Marino u = ru;
1172*d5f8dde1SJohn Marino if (lb)
1173*d5f8dde1SJohn Marino {
1174*d5f8dde1SJohn Marino lb->next = u->branches;
1175*d5f8dde1SJohn Marino u->branches = lb;
1176*d5f8dde1SJohn Marino u->n_branches++;
1177*d5f8dde1SJohn Marino u->tot_branches += lb->tot_branches;
1178*d5f8dde1SJohn Marino u->tot_last_matched += lb->tot_last_matched;
1179*d5f8dde1SJohn Marino u->tot_tags += lb->tot_tags;
1180*d5f8dde1SJohn Marino }
1181*d5f8dde1SJohn Marino }
1182*d5f8dde1SJohn Marino }
1183*d5f8dde1SJohn Marino else if (ru == NULL)
1184*d5f8dde1SJohn Marino {
1185*d5f8dde1SJohn Marino /* Use lu, and add rb */
1186*d5f8dde1SJohn Marino u = lu;
1187*d5f8dde1SJohn Marino if (rb)
1188*d5f8dde1SJohn Marino {
1189*d5f8dde1SJohn Marino rb->next = u->branches;
1190*d5f8dde1SJohn Marino u->branches = rb;
1191*d5f8dde1SJohn Marino u->n_branches++;
1192*d5f8dde1SJohn Marino u->tot_branches += rb->tot_branches;
1193*d5f8dde1SJohn Marino u->tot_last_matched += rb->tot_last_matched;
1194*d5f8dde1SJohn Marino u->tot_tags += rb->tot_tags;
1195*d5f8dde1SJohn Marino }
1196*d5f8dde1SJohn Marino }
1197*d5f8dde1SJohn Marino else
1198*d5f8dde1SJohn Marino {
1199*d5f8dde1SJohn Marino /* Merge lu and ru into lu */
1200*d5f8dde1SJohn Marino if (lu->branches)
1201*d5f8dde1SJohn Marino {
1202*d5f8dde1SJohn Marino if (ru->branches)
1203*d5f8dde1SJohn Marino {
1204*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *b = lu->branches;
1205*d5f8dde1SJohn Marino while (b->next) b = b->next;
1206*d5f8dde1SJohn Marino b->next = ru->branches;
1207*d5f8dde1SJohn Marino lu->n_branches += ru->n_branches;
1208*d5f8dde1SJohn Marino }
1209*d5f8dde1SJohn Marino }
1210*d5f8dde1SJohn Marino else if (ru->branches)
1211*d5f8dde1SJohn Marino {
1212*d5f8dde1SJohn Marino lu->branches = ru->branches;
1213*d5f8dde1SJohn Marino lu->n_branches = ru->n_branches;
1214*d5f8dde1SJohn Marino }
1215*d5f8dde1SJohn Marino lu->tot_branches += ru->tot_branches;
1216*d5f8dde1SJohn Marino lu->tot_last_matched += ru->tot_last_matched - 1;
1217*d5f8dde1SJohn Marino lu->tot_tags += ru->tot_tags;
1218*d5f8dde1SJohn Marino u = lu;
1219*d5f8dde1SJohn Marino }
1220*d5f8dde1SJohn Marino node->last_matched_in_progress = u;
12215f2eab64SJohn Marino }
12225f2eab64SJohn Marino direction = TRE_TAG_MAXIMIZE;
12235f2eab64SJohn Marino break;
12245f2eab64SJohn Marino }
12255f2eab64SJohn Marino
1226*d5f8dde1SJohn Marino case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1227*d5f8dde1SJohn Marino {
1228*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *b;
1229*d5f8dde1SJohn Marino tre_last_matched_pre_t *u;
1230*d5f8dde1SJohn Marino int start_tag;
1231*d5f8dde1SJohn Marino
1232*d5f8dde1SJohn Marino DPRINT(("After union top\n"));
1233*d5f8dde1SJohn Marino node = tre_stack_pop_voidptr(stack);
1234*d5f8dde1SJohn Marino start_tag = tre_stack_pop_int(stack);
1235*d5f8dde1SJohn Marino b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1236*d5f8dde1SJohn Marino + bitstr_size(tnfa->num_tags));
1237*d5f8dde1SJohn Marino if (!b)
1238*d5f8dde1SJohn Marino {
1239*d5f8dde1SJohn Marino status = REG_ESPACE;
1240*d5f8dde1SJohn Marino break;
1241*d5f8dde1SJohn Marino }
1242*d5f8dde1SJohn Marino
1243*d5f8dde1SJohn Marino u = node->last_matched_in_progress;
1244*d5f8dde1SJohn Marino u->start_tag = start_tag;
1245*d5f8dde1SJohn Marino b->tot_branches = 1 + u->tot_branches;
1246*d5f8dde1SJohn Marino b->tot_last_matched = u->tot_last_matched;
1247*d5f8dde1SJohn Marino b->tot_tags = u->tot_tags;
1248*d5f8dde1SJohn Marino b->last_matched = u;
1249*d5f8dde1SJohn Marino b->n_last_matched = 1;
1250*d5f8dde1SJohn Marino node->last_matched_branch = b;
1251*d5f8dde1SJohn Marino node->last_matched_in_progress = NULL;
1252*d5f8dde1SJohn Marino break;
1253*d5f8dde1SJohn Marino }
1254*d5f8dde1SJohn Marino
12555f2eab64SJohn Marino default:
12565f2eab64SJohn Marino assert(0);
12575f2eab64SJohn Marino break;
12585f2eab64SJohn Marino
12595f2eab64SJohn Marino } /* end switch(symbol) */
12605f2eab64SJohn Marino } /* end while(tre_stack_num_objects(stack) > bottom) */
12615f2eab64SJohn Marino
1262*d5f8dde1SJohn Marino if (status != REG_OK)
1263*d5f8dde1SJohn Marino {
1264*d5f8dde1SJohn Marino DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1265*d5f8dde1SJohn Marino goto error_post_compile;
1266*d5f8dde1SJohn Marino }
12675f2eab64SJohn Marino
1268*d5f8dde1SJohn Marino if (!first_pass)
12695f2eab64SJohn Marino {
12705f2eab64SJohn Marino int i;
1271*d5f8dde1SJohn Marino if (num_tags != tnfa->num_tags)
1272*d5f8dde1SJohn Marino {
1273*d5f8dde1SJohn Marino DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1274*d5f8dde1SJohn Marino tnfa->num_tags));
1275*d5f8dde1SJohn Marino status = REG_BADPAT;
1276*d5f8dde1SJohn Marino goto error_post_compile;
1277*d5f8dde1SJohn Marino }
1278*d5f8dde1SJohn Marino
1279*d5f8dde1SJohn Marino tre_purge_regset(regset, tnfa, tag);
1280*d5f8dde1SJohn Marino DPRINT(("Setting t%d to %s\n", num_tags,
1281*d5f8dde1SJohn Marino tag_dir_str[direction]));
1282*d5f8dde1SJohn Marino tnfa->tag_directions[num_tags] = direction;
1283*d5f8dde1SJohn Marino
1284*d5f8dde1SJohn Marino if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1285*d5f8dde1SJohn Marino {
1286*d5f8dde1SJohn Marino DPRINT(("Processed %d reorder tags instead of %d\n",
1287*d5f8dde1SJohn Marino (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1288*d5f8dde1SJohn Marino status = REG_BADPAT;
1289*d5f8dde1SJohn Marino goto error_post_compile;
1290*d5f8dde1SJohn Marino }
1291*d5f8dde1SJohn Marino *rtp = -1;
1292*d5f8dde1SJohn Marino #if TRE_DEBUG
1293*d5f8dde1SJohn Marino if (reorder_tags[0] >= 0)
1294*d5f8dde1SJohn Marino {
1295*d5f8dde1SJohn Marino DPRINT(("reorder_tags:\n"));
1296*d5f8dde1SJohn Marino for (rtp = reorder_tags; *rtp >= 0;)
1297*d5f8dde1SJohn Marino {
1298*d5f8dde1SJohn Marino DPRINT(("%d after ", *rtp++));
1299*d5f8dde1SJohn Marino DPRINT(("%d\n", *rtp++));
1300*d5f8dde1SJohn Marino }
1301*d5f8dde1SJohn Marino }
1302*d5f8dde1SJohn Marino else
1303*d5f8dde1SJohn Marino DPRINT(("No reorder_tags\n"));
1304*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1305*d5f8dde1SJohn Marino
1306*d5f8dde1SJohn Marino /* Initialize to_reorder */
1307*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1308*d5f8dde1SJohn Marino to_reorder[i] = i;
1309*d5f8dde1SJohn Marino /* Use to_seq_order to convert reorder_tags values, and use those to
1310*d5f8dde1SJohn Marino * reorder to_reorder */
1311*d5f8dde1SJohn Marino for (rtp = reorder_tags; *rtp >= 0;)
1312*d5f8dde1SJohn Marino {
1313*d5f8dde1SJohn Marino int j, high, low;
1314*d5f8dde1SJohn Marino int ti = *rtp++;
1315*d5f8dde1SJohn Marino
1316*d5f8dde1SJohn Marino /* Skip reordering the final tag */
1317*d5f8dde1SJohn Marino if (ti >= num_tags)
1318*d5f8dde1SJohn Marino {
1319*d5f8dde1SJohn Marino DPRINT(("Skipping reorder of %d\n", ti));
1320*d5f8dde1SJohn Marino rtp++;
1321*d5f8dde1SJohn Marino continue;
1322*d5f8dde1SJohn Marino }
1323*d5f8dde1SJohn Marino /* The number of the tag to reorder */
1324*d5f8dde1SJohn Marino high = to_reorder[ti];
1325*d5f8dde1SJohn Marino /* Reorder after this tag */
1326*d5f8dde1SJohn Marino low = to_reorder[*rtp++];
1327*d5f8dde1SJohn Marino
1328*d5f8dde1SJohn Marino DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1329*d5f8dde1SJohn Marino if (low > high)
1330*d5f8dde1SJohn Marino {
1331*d5f8dde1SJohn Marino DPRINT(("Tag %d already before %d\n", high, low));
1332*d5f8dde1SJohn Marino continue;
1333*d5f8dde1SJohn Marino }
1334*d5f8dde1SJohn Marino for (j = 0; j < num_tags; j++)
1335*d5f8dde1SJohn Marino if (to_reorder[j] > low && to_reorder[j] < high)
1336*d5f8dde1SJohn Marino to_reorder[j]++;
1337*d5f8dde1SJohn Marino to_reorder[ti] = low + 1;
1338*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1339*d5f8dde1SJohn Marino DPRINT(("to_reorder=("));
1340*d5f8dde1SJohn Marino for (j = 0; j < num_tags; j++)
1341*d5f8dde1SJohn Marino {
1342*d5f8dde1SJohn Marino DPRINT(("%d", to_reorder[j]));
1343*d5f8dde1SJohn Marino if (j < num_tags - 1)
1344*d5f8dde1SJohn Marino DPRINT((","));
1345*d5f8dde1SJohn Marino }
1346*d5f8dde1SJohn Marino DPRINT((")\n"));
1347*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1348*d5f8dde1SJohn Marino }
1349*d5f8dde1SJohn Marino /* Determine if reordering in really necessary */
1350*d5f8dde1SJohn Marino {
1351*d5f8dde1SJohn Marino int need_reorder = 0;
1352*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1353*d5f8dde1SJohn Marino if(to_reorder[i] != i)
1354*d5f8dde1SJohn Marino {
1355*d5f8dde1SJohn Marino need_reorder = 1;
1356*d5f8dde1SJohn Marino break;
1357*d5f8dde1SJohn Marino }
1358*d5f8dde1SJohn Marino /* If need_reorder is not set, free reorder_tags, and set to NULL,
1359*d5f8dde1SJohn Marino * indicating no reordering is needed */
1360*d5f8dde1SJohn Marino if (!need_reorder)
1361*d5f8dde1SJohn Marino {
1362*d5f8dde1SJohn Marino DPRINT(("Don't need to reorder\n"));
1363*d5f8dde1SJohn Marino xfree(reorder_tags);
1364*d5f8dde1SJohn Marino reorder_tags = NULL;
1365*d5f8dde1SJohn Marino }
1366*d5f8dde1SJohn Marino }
1367*d5f8dde1SJohn Marino }
1368*d5f8dde1SJohn Marino
1369*d5f8dde1SJohn Marino if (reorder_tags)
1370*d5f8dde1SJohn Marino {
1371*d5f8dde1SJohn Marino int i;
1372*d5f8dde1SJohn Marino tre_tag_direction_t *new_tag_directions;
1373*d5f8dde1SJohn Marino #if TRE_DEBUG
1374*d5f8dde1SJohn Marino DPRINT(("to_reorder:"));
1375*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1376*d5f8dde1SJohn Marino DPRINT((" %d->%d", i, to_reorder[i]));
1377*d5f8dde1SJohn Marino DPRINT(("\n"));
1378*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1379*d5f8dde1SJohn Marino
1380*d5f8dde1SJohn Marino DPRINT(("Reordering submatch_data\n"));
1381*d5f8dde1SJohn Marino for (i = 0; i < tnfa->num_submatches; i++)
1382*d5f8dde1SJohn Marino {
1383*d5f8dde1SJohn Marino #if TRE_DEBUG
1384*d5f8dde1SJohn Marino int so = tnfa->submatch_data[i].so_tag;
1385*d5f8dde1SJohn Marino int eo = tnfa->submatch_data[i].eo_tag;
1386*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1387*d5f8dde1SJohn Marino tnfa->submatch_data[i].so_tag =
1388*d5f8dde1SJohn Marino to_reorder[tnfa->submatch_data[i].so_tag];
1389*d5f8dde1SJohn Marino tnfa->submatch_data[i].eo_tag =
1390*d5f8dde1SJohn Marino tnfa->submatch_data[i].eo_tag < num_tags ?
1391*d5f8dde1SJohn Marino to_reorder[tnfa->submatch_data[i].eo_tag] :
1392*d5f8dde1SJohn Marino tnfa->submatch_data[i].eo_tag;
1393*d5f8dde1SJohn Marino DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1394*d5f8dde1SJohn Marino tnfa->submatch_data[i].so_tag,
1395*d5f8dde1SJohn Marino tnfa->submatch_data[i].eo_tag));
1396*d5f8dde1SJohn Marino }
1397*d5f8dde1SJohn Marino
1398*d5f8dde1SJohn Marino DPRINT(("Reordering tag_directions\n"));
1399*d5f8dde1SJohn Marino /* We only allocate num_tags directions and reorder them. The
1400*d5f8dde1SJohn Marino * num_tags-th direction (end tag) is left unchanged. */
1401*d5f8dde1SJohn Marino new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1402*d5f8dde1SJohn Marino if (new_tag_directions == NULL)
1403*d5f8dde1SJohn Marino {
1404*d5f8dde1SJohn Marino status = REG_ESPACE;
1405*d5f8dde1SJohn Marino goto error_post_compile;
1406*d5f8dde1SJohn Marino }
1407*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1408*d5f8dde1SJohn Marino {
1409*d5f8dde1SJohn Marino new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1410*d5f8dde1SJohn Marino }
1411*d5f8dde1SJohn Marino #if TRE_DEBUG
1412*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1413*d5f8dde1SJohn Marino {
1414*d5f8dde1SJohn Marino DPRINT(("t%d %s->%s\n", i,
1415*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[i]],
1416*d5f8dde1SJohn Marino tag_dir_str[new_tag_directions[i]]));
1417*d5f8dde1SJohn Marino }
1418*d5f8dde1SJohn Marino DPRINT(("t%d %s->%s\n", num_tags,
1419*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[num_tags]],
1420*d5f8dde1SJohn Marino tag_dir_str[tnfa->tag_directions[num_tags]]));
1421*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1422*d5f8dde1SJohn Marino memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1423*d5f8dde1SJohn Marino xfree(new_tag_directions);
1424*d5f8dde1SJohn Marino
1425*d5f8dde1SJohn Marino DPRINT(("Reordering minimal_tags\n"));
1426*d5f8dde1SJohn Marino for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1427*d5f8dde1SJohn Marino tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1428*d5f8dde1SJohn Marino to_reorder[tnfa->minimal_tags[i]] :
1429*d5f8dde1SJohn Marino tnfa->minimal_tags[i];
1430*d5f8dde1SJohn Marino
1431*d5f8dde1SJohn Marino DPRINT(("Reordering AST tags\n"));
1432*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, tree);
1433*d5f8dde1SJohn Marino while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1434*d5f8dde1SJohn Marino {
1435*d5f8dde1SJohn Marino node = tre_stack_pop_voidptr(stack);
1436*d5f8dde1SJohn Marino
1437*d5f8dde1SJohn Marino switch (node->type)
1438*d5f8dde1SJohn Marino {
1439*d5f8dde1SJohn Marino case LITERAL:
1440*d5f8dde1SJohn Marino {
1441*d5f8dde1SJohn Marino tre_literal_t *lit = (tre_literal_t *)node->obj;
1442*d5f8dde1SJohn Marino if (IS_TAG(lit))
1443*d5f8dde1SJohn Marino lit->code_max = to_reorder[lit->code_max];
1444*d5f8dde1SJohn Marino break;
1445*d5f8dde1SJohn Marino }
1446*d5f8dde1SJohn Marino
1447*d5f8dde1SJohn Marino case UNION:
1448*d5f8dde1SJohn Marino {
1449*d5f8dde1SJohn Marino tre_union_t *uni = (tre_union_t *)node->obj;
1450*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, uni->right);
1451*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, uni->left);
1452*d5f8dde1SJohn Marino break;
1453*d5f8dde1SJohn Marino }
1454*d5f8dde1SJohn Marino
1455*d5f8dde1SJohn Marino case CATENATION:
1456*d5f8dde1SJohn Marino {
1457*d5f8dde1SJohn Marino tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1458*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, cat->right);
1459*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, cat->left);
1460*d5f8dde1SJohn Marino break;
1461*d5f8dde1SJohn Marino }
1462*d5f8dde1SJohn Marino
1463*d5f8dde1SJohn Marino case ITERATION:
1464*d5f8dde1SJohn Marino {
1465*d5f8dde1SJohn Marino tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1466*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, iter->arg);
1467*d5f8dde1SJohn Marino break;
1468*d5f8dde1SJohn Marino }
1469*d5f8dde1SJohn Marino
1470*d5f8dde1SJohn Marino default:
1471*d5f8dde1SJohn Marino assert(0);
1472*d5f8dde1SJohn Marino break;
1473*d5f8dde1SJohn Marino }
1474*d5f8dde1SJohn Marino }
1475*d5f8dde1SJohn Marino if (status != REG_OK)
1476*d5f8dde1SJohn Marino {
1477*d5f8dde1SJohn Marino DPRINT(("Error while reordering tags\n"));
1478*d5f8dde1SJohn Marino goto error_post_compile;
1479*d5f8dde1SJohn Marino }
1480*d5f8dde1SJohn Marino }
1481*d5f8dde1SJohn Marino
1482*d5f8dde1SJohn Marino
1483*d5f8dde1SJohn Marino if (!first_pass)
1484*d5f8dde1SJohn Marino {
1485*d5f8dde1SJohn Marino if (tree->last_matched_branch)
1486*d5f8dde1SJohn Marino {
1487*d5f8dde1SJohn Marino tre_last_matched_branch_t *buf, *b, *bb;
1488*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *bp;
1489*d5f8dde1SJohn Marino tre_last_matched_t *u, *uu;
1490*d5f8dde1SJohn Marino tre_last_matched_pre_t *up;
1491*d5f8dde1SJohn Marino int *t;
1492*d5f8dde1SJohn Marino int i;
1493*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1494*d5f8dde1SJohn Marino tre_last_matched_branch_t *_b;
1495*d5f8dde1SJohn Marino tre_last_matched_t *_u;
1496*d5f8dde1SJohn Marino int *_t;
1497*d5f8dde1SJohn Marino
1498*d5f8dde1SJohn Marino DPRINT(("last_match_branch_pre:\n"));
1499*d5f8dde1SJohn Marino print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1500*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1501*d5f8dde1SJohn Marino buf = (tre_last_matched_branch_t *)xcalloc(1,
1502*d5f8dde1SJohn Marino tree->last_matched_branch->tot_branches
1503*d5f8dde1SJohn Marino * sizeof(tre_last_matched_branch_t) +
1504*d5f8dde1SJohn Marino tree->last_matched_branch->tot_last_matched
1505*d5f8dde1SJohn Marino * sizeof(tre_last_matched_t) +
1506*d5f8dde1SJohn Marino tree->last_matched_branch->tot_tags *
1507*d5f8dde1SJohn Marino sizeof(int));
1508*d5f8dde1SJohn Marino if (!buf)
1509*d5f8dde1SJohn Marino {
1510*d5f8dde1SJohn Marino status = REG_ESPACE;
1511*d5f8dde1SJohn Marino goto error_post_compile;
1512*d5f8dde1SJohn Marino }
1513*d5f8dde1SJohn Marino
1514*d5f8dde1SJohn Marino b = buf;
1515*d5f8dde1SJohn Marino u = (tre_last_matched_t *)(b +
1516*d5f8dde1SJohn Marino tree->last_matched_branch->tot_branches);
1517*d5f8dde1SJohn Marino t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1518*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1519*d5f8dde1SJohn Marino _b = b;
1520*d5f8dde1SJohn Marino _u = u;
1521*d5f8dde1SJohn Marino _t = t;
1522*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1523*d5f8dde1SJohn Marino DPRINT(("Copying info_pre to info\n"));
1524*d5f8dde1SJohn Marino STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1525*d5f8dde1SJohn Marino STACK_PUSH(stack, int, 1);
1526*d5f8dde1SJohn Marino STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1527*d5f8dde1SJohn Marino
1528*d5f8dde1SJohn Marino while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1529*d5f8dde1SJohn Marino {
1530*d5f8dde1SJohn Marino switch (tre_stack_pop_int(stack))
1531*d5f8dde1SJohn Marino {
1532*d5f8dde1SJohn Marino case COPY_LAST_MATCHED_BRANCH:
1533*d5f8dde1SJohn Marino i = tre_stack_pop_int(stack);
1534*d5f8dde1SJohn Marino /* The tre_last_matched_branch_pre_t * is still on the
1535*d5f8dde1SJohn Marino stack */
1536*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, b);
1537*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1538*d5f8dde1SJohn Marino b += i;
1539*d5f8dde1SJohn Marino break;
1540*d5f8dde1SJohn Marino
1541*d5f8dde1SJohn Marino case COPY_LAST_MATCHED_BRANCH_NEXT:
1542*d5f8dde1SJohn Marino bb = tre_stack_pop_voidptr(stack);
1543*d5f8dde1SJohn Marino bp = tre_stack_pop_voidptr(stack);
1544*d5f8dde1SJohn Marino bb->n_last_matched = bp->n_last_matched;
1545*d5f8dde1SJohn Marino bb->cmp_tag = bp->cmp_tag;
1546*d5f8dde1SJohn Marino if (bp->n_tags > 0)
1547*d5f8dde1SJohn Marino {
1548*d5f8dde1SJohn Marino int n;
1549*d5f8dde1SJohn Marino n = bb->n_tags = bp->n_tags;
1550*d5f8dde1SJohn Marino bb->tags = t;
1551*d5f8dde1SJohn Marino for (i = 0; i < num_tags; i++)
1552*d5f8dde1SJohn Marino if (bit_test(bp->tags, i))
1553*d5f8dde1SJohn Marino {
1554*d5f8dde1SJohn Marino *t++ = i;
1555*d5f8dde1SJohn Marino if (--n <= 0)
1556*d5f8dde1SJohn Marino break;
1557*d5f8dde1SJohn Marino }
1558*d5f8dde1SJohn Marino }
1559*d5f8dde1SJohn Marino if (bp->next)
1560*d5f8dde1SJohn Marino {
1561*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, bp->next);
1562*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, bb + 1);
1563*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1564*d5f8dde1SJohn Marino }
1565*d5f8dde1SJohn Marino if (bp->n_last_matched > 0)
1566*d5f8dde1SJohn Marino {
1567*d5f8dde1SJohn Marino bb->last_matched = u;
1568*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, bp->last_matched);
1569*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, bp->n_last_matched);
1570*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1571*d5f8dde1SJohn Marino }
1572*d5f8dde1SJohn Marino break;
1573*d5f8dde1SJohn Marino
1574*d5f8dde1SJohn Marino case COPY_LAST_MATCHED:
1575*d5f8dde1SJohn Marino i = tre_stack_pop_int(stack);
1576*d5f8dde1SJohn Marino /* The tre_last_matched_pre_t * is still on the stack */
1577*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, u);
1578*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1579*d5f8dde1SJohn Marino u += i;
1580*d5f8dde1SJohn Marino break;
1581*d5f8dde1SJohn Marino
1582*d5f8dde1SJohn Marino case COPY_LAST_MATCHED_NEXT:
1583*d5f8dde1SJohn Marino uu = tre_stack_pop_voidptr(stack);
1584*d5f8dde1SJohn Marino up = tre_stack_pop_voidptr(stack);
1585*d5f8dde1SJohn Marino uu->n_branches = up->n_branches;
1586*d5f8dde1SJohn Marino uu->branches = b;
1587*d5f8dde1SJohn Marino uu->start_tag = up->start_tag;
1588*d5f8dde1SJohn Marino if (up->next)
1589*d5f8dde1SJohn Marino {
1590*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, up->next);
1591*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, uu + 1);
1592*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1593*d5f8dde1SJohn Marino }
1594*d5f8dde1SJohn Marino STACK_PUSHX(stack, voidptr, up->branches);
1595*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, up->n_branches);
1596*d5f8dde1SJohn Marino STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1597*d5f8dde1SJohn Marino break;
1598*d5f8dde1SJohn Marino }
1599*d5f8dde1SJohn Marino }
1600*d5f8dde1SJohn Marino if (status != REG_OK)
1601*d5f8dde1SJohn Marino goto error_post_compile;
1602*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1603*d5f8dde1SJohn Marino DPRINT(("last_matched_branch:\n"));
1604*d5f8dde1SJohn Marino print_last_match_branch(buf, 0);
1605*d5f8dde1SJohn Marino if (b != _b + tree->last_matched_branch->tot_branches)
1606*d5f8dde1SJohn Marino DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607*d5f8dde1SJohn Marino b, _b + tree->last_matched_branch->tot_branches));
1608*d5f8dde1SJohn Marino if (u != _u + tree->last_matched_branch->tot_last_matched)
1609*d5f8dde1SJohn Marino DPRINT(("u/%p != _u + "
1610*d5f8dde1SJohn Marino "tree->last_matched_branch->tot_last_matched/%p\n",
1611*d5f8dde1SJohn Marino u, _u + tree->last_matched_branch->tot_last_matched));
1612*d5f8dde1SJohn Marino if (t != _t + tree->last_matched_branch->tot_tags)
1613*d5f8dde1SJohn Marino DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614*d5f8dde1SJohn Marino t, _t + tree->last_matched_branch->tot_tags));
1615*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1616*d5f8dde1SJohn Marino tnfa->last_matched_branch = buf;
1617*d5f8dde1SJohn Marino }
1618*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1619*d5f8dde1SJohn Marino else
1620*d5f8dde1SJohn Marino DPRINT(("No last_match_branch_pre\n"));
1621*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
16225f2eab64SJohn Marino }
16235f2eab64SJohn Marino
16245f2eab64SJohn Marino DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
16255f2eab64SJohn Marino first_pass? "First pass" : "Second pass", num_tags));
1626*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
1627*d5f8dde1SJohn Marino tre_ast_print(tree);
1628*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
1629*d5f8dde1SJohn Marino DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1630*d5f8dde1SJohn Marino num_tags));
16315f2eab64SJohn Marino assert(tree->num_tags == num_tags);
16325f2eab64SJohn Marino tnfa->end_tag = num_tags;
16335f2eab64SJohn Marino tnfa->num_tags = num_tags;
16345f2eab64SJohn Marino tnfa->num_minimals = num_minimals;
1635*d5f8dde1SJohn Marino error_post_compile:
1636*d5f8dde1SJohn Marino xfree(reorder_tags);
1637*d5f8dde1SJohn Marino error_reorder_tags:
16385f2eab64SJohn Marino xfree(orig_regset);
1639*d5f8dde1SJohn Marino error_regset:
16405f2eab64SJohn Marino return status;
16415f2eab64SJohn Marino }
16425f2eab64SJohn Marino
16435f2eab64SJohn Marino
16445f2eab64SJohn Marino
16455f2eab64SJohn Marino /*
16465f2eab64SJohn Marino AST to TNFA compilation routines.
16475f2eab64SJohn Marino */
16485f2eab64SJohn Marino
16495f2eab64SJohn Marino typedef enum {
16505f2eab64SJohn Marino COPY_RECURSE,
16515f2eab64SJohn Marino COPY_SET_RESULT_PTR
16525f2eab64SJohn Marino } tre_copyast_symbol_t;
16535f2eab64SJohn Marino
16545f2eab64SJohn Marino /* Flags for tre_copy_ast(). */
16555f2eab64SJohn Marino #define COPY_REMOVE_TAGS 1
16565f2eab64SJohn Marino #define COPY_MAXIMIZE_FIRST_TAG 2
16575f2eab64SJohn Marino
16585f2eab64SJohn Marino static reg_errcode_t
tre_copy_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int flags,int * pos_add,tre_tag_direction_t * tag_directions,tre_ast_node_t ** copy,int * max_pos)16595f2eab64SJohn Marino tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
16605f2eab64SJohn Marino int flags, int *pos_add, tre_tag_direction_t *tag_directions,
16615f2eab64SJohn Marino tre_ast_node_t **copy, int *max_pos)
16625f2eab64SJohn Marino {
16635f2eab64SJohn Marino reg_errcode_t status = REG_OK;
16645f2eab64SJohn Marino int bottom = tre_stack_num_objects(stack);
16655f2eab64SJohn Marino int num_copied = 0;
16665f2eab64SJohn Marino int first_tag = 1;
16675f2eab64SJohn Marino tre_ast_node_t **result = copy;
16685f2eab64SJohn Marino tre_copyast_symbol_t symbol;
16695f2eab64SJohn Marino
16705f2eab64SJohn Marino STACK_PUSH(stack, voidptr, ast);
16715f2eab64SJohn Marino STACK_PUSH(stack, int, COPY_RECURSE);
16725f2eab64SJohn Marino
16735f2eab64SJohn Marino while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
16745f2eab64SJohn Marino {
16755f2eab64SJohn Marino tre_ast_node_t *node;
16765f2eab64SJohn Marino if (status != REG_OK)
16775f2eab64SJohn Marino break;
16785f2eab64SJohn Marino
16795f2eab64SJohn Marino symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
16805f2eab64SJohn Marino switch (symbol)
16815f2eab64SJohn Marino {
16825f2eab64SJohn Marino case COPY_SET_RESULT_PTR:
16835f2eab64SJohn Marino result = tre_stack_pop_voidptr(stack);
16845f2eab64SJohn Marino break;
16855f2eab64SJohn Marino case COPY_RECURSE:
16865f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
16875f2eab64SJohn Marino switch (node->type)
16885f2eab64SJohn Marino {
16895f2eab64SJohn Marino case LITERAL:
16905f2eab64SJohn Marino {
16915f2eab64SJohn Marino tre_literal_t *lit = node->obj;
16925f2eab64SJohn Marino int pos = lit->position;
16935f2eab64SJohn Marino int min = lit->code_min;
16945f2eab64SJohn Marino int max = lit->code_max;
1695*d5f8dde1SJohn Marino tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1696*d5f8dde1SJohn Marino lit->u.bracket_match_list :
1697*d5f8dde1SJohn Marino NULL;
16985f2eab64SJohn Marino if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
16995f2eab64SJohn Marino {
17005f2eab64SJohn Marino /* XXX - e.g. [ab] has only one position but two
17015f2eab64SJohn Marino nodes, so we are creating holes in the state space
17025f2eab64SJohn Marino here. Not fatal, just wastes memory. */
17035f2eab64SJohn Marino pos += *pos_add;
17045f2eab64SJohn Marino num_copied++;
17055f2eab64SJohn Marino }
17065f2eab64SJohn Marino else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
17075f2eab64SJohn Marino {
17085f2eab64SJohn Marino /* Change this tag to empty. */
17095f2eab64SJohn Marino min = EMPTY;
17105f2eab64SJohn Marino max = pos = -1;
17115f2eab64SJohn Marino }
17125f2eab64SJohn Marino else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
17135f2eab64SJohn Marino && first_tag)
17145f2eab64SJohn Marino {
17155f2eab64SJohn Marino /* Maximize the first tag. */
1716*d5f8dde1SJohn Marino if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
17175f2eab64SJohn Marino tag_directions[max] = TRE_TAG_MAXIMIZE;
17185f2eab64SJohn Marino first_tag = 0;
17195f2eab64SJohn Marino }
17205f2eab64SJohn Marino *result = tre_ast_new_literal(mem, min, max, pos);
17215f2eab64SJohn Marino if (*result == NULL)
17225f2eab64SJohn Marino status = REG_ESPACE;
17235f2eab64SJohn Marino
17245f2eab64SJohn Marino if (pos > *max_pos)
17255f2eab64SJohn Marino *max_pos = pos;
1726*d5f8dde1SJohn Marino
1727*d5f8dde1SJohn Marino if (!IS_SPECIAL(lit))
1728*d5f8dde1SJohn Marino ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1729*d5f8dde1SJohn Marino = list;
17305f2eab64SJohn Marino break;
17315f2eab64SJohn Marino }
17325f2eab64SJohn Marino case UNION:
17335f2eab64SJohn Marino {
17345f2eab64SJohn Marino tre_union_t *uni = node->obj;
17355f2eab64SJohn Marino tre_union_t *tmp;
17365f2eab64SJohn Marino *result = tre_ast_new_union(mem, uni->left, uni->right);
17375f2eab64SJohn Marino if (*result == NULL)
17385f2eab64SJohn Marino {
17395f2eab64SJohn Marino status = REG_ESPACE;
17405f2eab64SJohn Marino break;
17415f2eab64SJohn Marino }
17425f2eab64SJohn Marino tmp = (*result)->obj;
17435f2eab64SJohn Marino result = &tmp->left;
17445f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->right);
17455f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_RECURSE);
17465f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, &tmp->right);
17475f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
17485f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->left);
17495f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_RECURSE);
17505f2eab64SJohn Marino break;
17515f2eab64SJohn Marino }
17525f2eab64SJohn Marino case CATENATION:
17535f2eab64SJohn Marino {
17545f2eab64SJohn Marino tre_catenation_t *cat = node->obj;
17555f2eab64SJohn Marino tre_catenation_t *tmp;
17565f2eab64SJohn Marino *result = tre_ast_new_catenation(mem, cat->left, cat->right);
17575f2eab64SJohn Marino if (*result == NULL)
17585f2eab64SJohn Marino {
17595f2eab64SJohn Marino status = REG_ESPACE;
17605f2eab64SJohn Marino break;
17615f2eab64SJohn Marino }
17625f2eab64SJohn Marino tmp = (*result)->obj;
17635f2eab64SJohn Marino tmp->left = NULL;
17645f2eab64SJohn Marino tmp->right = NULL;
17655f2eab64SJohn Marino result = &tmp->left;
17665f2eab64SJohn Marino
17675f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->right);
17685f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_RECURSE);
17695f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, &tmp->right);
17705f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
17715f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->left);
17725f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_RECURSE);
17735f2eab64SJohn Marino break;
17745f2eab64SJohn Marino }
17755f2eab64SJohn Marino case ITERATION:
17765f2eab64SJohn Marino {
17775f2eab64SJohn Marino tre_iteration_t *iter = node->obj;
17785f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, iter->arg);
17795f2eab64SJohn Marino STACK_PUSHX(stack, int, COPY_RECURSE);
17805f2eab64SJohn Marino *result = tre_ast_new_iter(mem, iter->arg, iter->min,
17815f2eab64SJohn Marino iter->max, iter->minimal);
17825f2eab64SJohn Marino if (*result == NULL)
17835f2eab64SJohn Marino {
17845f2eab64SJohn Marino status = REG_ESPACE;
17855f2eab64SJohn Marino break;
17865f2eab64SJohn Marino }
17875f2eab64SJohn Marino iter = (*result)->obj;
17885f2eab64SJohn Marino result = &iter->arg;
17895f2eab64SJohn Marino break;
17905f2eab64SJohn Marino }
17915f2eab64SJohn Marino default:
17925f2eab64SJohn Marino assert(0);
17935f2eab64SJohn Marino break;
17945f2eab64SJohn Marino }
17955f2eab64SJohn Marino break;
17965f2eab64SJohn Marino }
17975f2eab64SJohn Marino }
17985f2eab64SJohn Marino *pos_add += num_copied;
17995f2eab64SJohn Marino return status;
18005f2eab64SJohn Marino }
18015f2eab64SJohn Marino
18025f2eab64SJohn Marino typedef enum {
18035f2eab64SJohn Marino EXPAND_RECURSE,
18045f2eab64SJohn Marino EXPAND_AFTER_ITER
18055f2eab64SJohn Marino } tre_expand_ast_symbol_t;
18065f2eab64SJohn Marino
18075f2eab64SJohn Marino /* Expands each iteration node that has a finite nonzero minimum or maximum
18085f2eab64SJohn Marino iteration count to a catenated sequence of copies of the node. */
18095f2eab64SJohn Marino static reg_errcode_t
tre_expand_ast(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * ast,int * position,tre_tag_direction_t * tag_directions,int * max_depth)18105f2eab64SJohn Marino tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
18115f2eab64SJohn Marino int *position, tre_tag_direction_t *tag_directions,
18125f2eab64SJohn Marino int *max_depth)
18135f2eab64SJohn Marino {
18145f2eab64SJohn Marino reg_errcode_t status = REG_OK;
18155f2eab64SJohn Marino int bottom = tre_stack_num_objects(stack);
18165f2eab64SJohn Marino int pos_add = 0;
18175f2eab64SJohn Marino int pos_add_total = 0;
18185f2eab64SJohn Marino int max_pos = 0;
1819*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18205f2eab64SJohn Marino /* Current approximate matching parameters. */
18215f2eab64SJohn Marino int params[TRE_PARAM_LAST];
18225f2eab64SJohn Marino /* Approximate parameter nesting level. */
18235f2eab64SJohn Marino int params_depth = 0;
1824*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18255f2eab64SJohn Marino int iter_depth = 0;
1826*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18275f2eab64SJohn Marino int i;
1828*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18295f2eab64SJohn Marino
1830*d5f8dde1SJohn Marino #ifdef TRE_APPROX
18315f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
18325f2eab64SJohn Marino params[i] = TRE_PARAM_DEFAULT;
1833*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
18345f2eab64SJohn Marino
18355f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ast);
18365f2eab64SJohn Marino STACK_PUSHR(stack, int, EXPAND_RECURSE);
18375f2eab64SJohn Marino while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
18385f2eab64SJohn Marino {
18395f2eab64SJohn Marino tre_ast_node_t *node;
18405f2eab64SJohn Marino tre_expand_ast_symbol_t symbol;
18415f2eab64SJohn Marino
18425f2eab64SJohn Marino if (status != REG_OK)
18435f2eab64SJohn Marino break;
18445f2eab64SJohn Marino
18455f2eab64SJohn Marino DPRINT(("pos_add %d\n", pos_add));
18465f2eab64SJohn Marino
18475f2eab64SJohn Marino symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
18485f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
18495f2eab64SJohn Marino switch (symbol)
18505f2eab64SJohn Marino {
18515f2eab64SJohn Marino case EXPAND_RECURSE:
18525f2eab64SJohn Marino switch (node->type)
18535f2eab64SJohn Marino {
18545f2eab64SJohn Marino case LITERAL:
18555f2eab64SJohn Marino {
18565f2eab64SJohn Marino tre_literal_t *lit= node->obj;
18575f2eab64SJohn Marino if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
18585f2eab64SJohn Marino {
18595f2eab64SJohn Marino lit->position += pos_add;
18605f2eab64SJohn Marino if (lit->position > max_pos)
18615f2eab64SJohn Marino max_pos = lit->position;
18625f2eab64SJohn Marino }
18635f2eab64SJohn Marino break;
18645f2eab64SJohn Marino }
18655f2eab64SJohn Marino case UNION:
18665f2eab64SJohn Marino {
18675f2eab64SJohn Marino tre_union_t *uni = node->obj;
18685f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->right);
18695f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_RECURSE);
18705f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->left);
18715f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_RECURSE);
18725f2eab64SJohn Marino break;
18735f2eab64SJohn Marino }
18745f2eab64SJohn Marino case CATENATION:
18755f2eab64SJohn Marino {
18765f2eab64SJohn Marino tre_catenation_t *cat = node->obj;
18775f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->right);
18785f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_RECURSE);
18795f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->left);
18805f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_RECURSE);
18815f2eab64SJohn Marino break;
18825f2eab64SJohn Marino }
18835f2eab64SJohn Marino case ITERATION:
18845f2eab64SJohn Marino {
18855f2eab64SJohn Marino tre_iteration_t *iter = node->obj;
18865f2eab64SJohn Marino STACK_PUSHX(stack, int, pos_add);
18875f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, node);
18885f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
18895f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, iter->arg);
18905f2eab64SJohn Marino STACK_PUSHX(stack, int, EXPAND_RECURSE);
18915f2eab64SJohn Marino /* If we are going to expand this node at EXPAND_AFTER_ITER
18925f2eab64SJohn Marino then don't increase the `pos' fields of the nodes now, it
18935f2eab64SJohn Marino will get done when expanding. */
18945f2eab64SJohn Marino if (iter->min > 1 || iter->max > 1)
18955f2eab64SJohn Marino pos_add = 0;
18965f2eab64SJohn Marino iter_depth++;
18975f2eab64SJohn Marino DPRINT(("iter\n"));
18985f2eab64SJohn Marino break;
18995f2eab64SJohn Marino }
19005f2eab64SJohn Marino default:
19015f2eab64SJohn Marino assert(0);
19025f2eab64SJohn Marino break;
19035f2eab64SJohn Marino }
19045f2eab64SJohn Marino break;
19055f2eab64SJohn Marino case EXPAND_AFTER_ITER:
19065f2eab64SJohn Marino {
19075f2eab64SJohn Marino tre_iteration_t *iter = node->obj;
19085f2eab64SJohn Marino int pos_add_last;
19095f2eab64SJohn Marino pos_add = tre_stack_pop_int(stack);
19105f2eab64SJohn Marino pos_add_last = pos_add;
1911*d5f8dde1SJohn Marino /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912*d5f8dde1SJohn Marino immediate replace the whole iteration with EMPTY. This
1913*d5f8dde1SJohn Marino unfortunately drops any submatches, and messes up setting the
1914*d5f8dde1SJohn Marino pmatch values (we can get tags of -1, and tag values in the
1915*d5f8dde1SJohn Marino billions). So we left it there and replace with EMPTY here. */
1916*d5f8dde1SJohn Marino if (iter->min == 0 && iter->max == 0)
1917*d5f8dde1SJohn Marino {
1918*d5f8dde1SJohn Marino tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1919*d5f8dde1SJohn Marino if (empty == NULL)
1920*d5f8dde1SJohn Marino return REG_ESPACE;
1921*d5f8dde1SJohn Marino node->obj = empty->obj;
1922*d5f8dde1SJohn Marino node->type = empty->type;
1923*d5f8dde1SJohn Marino }
1924*d5f8dde1SJohn Marino else if (iter->min > 1 || iter->max > 1)
19255f2eab64SJohn Marino {
19265f2eab64SJohn Marino tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
19275f2eab64SJohn Marino int j;
19285f2eab64SJohn Marino int pos_add_save = pos_add;
19295f2eab64SJohn Marino
19305f2eab64SJohn Marino /* Create a catenated sequence of copies of the node. */
19315f2eab64SJohn Marino for (j = 0; j < iter->min; j++)
19325f2eab64SJohn Marino {
19335f2eab64SJohn Marino tre_ast_node_t *copy;
19345f2eab64SJohn Marino /* Remove tags from all but the last copy. */
19355f2eab64SJohn Marino int flags = ((j + 1 < iter->min)
19365f2eab64SJohn Marino ? COPY_REMOVE_TAGS
19375f2eab64SJohn Marino : COPY_MAXIMIZE_FIRST_TAG);
19385f2eab64SJohn Marino DPRINT((" pos_add %d\n", pos_add));
19395f2eab64SJohn Marino pos_add_save = pos_add;
19405f2eab64SJohn Marino status = tre_copy_ast(mem, stack, iter->arg, flags,
19415f2eab64SJohn Marino &pos_add, tag_directions, ©,
19425f2eab64SJohn Marino &max_pos);
19435f2eab64SJohn Marino if (status != REG_OK)
19445f2eab64SJohn Marino return status;
19455f2eab64SJohn Marino if (seq1 != NULL)
19465f2eab64SJohn Marino seq1 = tre_ast_new_catenation(mem, seq1, copy);
19475f2eab64SJohn Marino else
19485f2eab64SJohn Marino seq1 = copy;
19495f2eab64SJohn Marino if (seq1 == NULL)
19505f2eab64SJohn Marino return REG_ESPACE;
19515f2eab64SJohn Marino }
19525f2eab64SJohn Marino
19535f2eab64SJohn Marino if (iter->max == -1)
19545f2eab64SJohn Marino {
19555f2eab64SJohn Marino /* No upper limit. */
19565f2eab64SJohn Marino pos_add_save = pos_add;
19575f2eab64SJohn Marino status = tre_copy_ast(mem, stack, iter->arg, 0,
19585f2eab64SJohn Marino &pos_add, NULL, &seq2, &max_pos);
19595f2eab64SJohn Marino if (status != REG_OK)
19605f2eab64SJohn Marino return status;
19615f2eab64SJohn Marino seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
19625f2eab64SJohn Marino if (seq2 == NULL)
19635f2eab64SJohn Marino return REG_ESPACE;
19645f2eab64SJohn Marino }
19655f2eab64SJohn Marino else
19665f2eab64SJohn Marino {
19675f2eab64SJohn Marino for (j = iter->min; j < iter->max; j++)
19685f2eab64SJohn Marino {
19695f2eab64SJohn Marino tre_ast_node_t *tmp, *copy;
19705f2eab64SJohn Marino pos_add_save = pos_add;
19715f2eab64SJohn Marino status = tre_copy_ast(mem, stack, iter->arg, 0,
19725f2eab64SJohn Marino &pos_add, NULL, ©, &max_pos);
19735f2eab64SJohn Marino if (status != REG_OK)
19745f2eab64SJohn Marino return status;
19755f2eab64SJohn Marino if (seq2 != NULL)
19765f2eab64SJohn Marino seq2 = tre_ast_new_catenation(mem, copy, seq2);
19775f2eab64SJohn Marino else
19785f2eab64SJohn Marino seq2 = copy;
19795f2eab64SJohn Marino if (seq2 == NULL)
19805f2eab64SJohn Marino return REG_ESPACE;
19815f2eab64SJohn Marino tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
19825f2eab64SJohn Marino if (tmp == NULL)
19835f2eab64SJohn Marino return REG_ESPACE;
19845f2eab64SJohn Marino seq2 = tre_ast_new_union(mem, tmp, seq2);
19855f2eab64SJohn Marino if (seq2 == NULL)
19865f2eab64SJohn Marino return REG_ESPACE;
19875f2eab64SJohn Marino }
19885f2eab64SJohn Marino }
19895f2eab64SJohn Marino
19905f2eab64SJohn Marino pos_add = pos_add_save;
19915f2eab64SJohn Marino if (seq1 == NULL)
19925f2eab64SJohn Marino seq1 = seq2;
19935f2eab64SJohn Marino else if (seq2 != NULL)
19945f2eab64SJohn Marino seq1 = tre_ast_new_catenation(mem, seq1, seq2);
19955f2eab64SJohn Marino if (seq1 == NULL)
19965f2eab64SJohn Marino return REG_ESPACE;
19975f2eab64SJohn Marino node->obj = seq1->obj;
19985f2eab64SJohn Marino node->type = seq1->type;
19995f2eab64SJohn Marino }
20005f2eab64SJohn Marino
20015f2eab64SJohn Marino iter_depth--;
20025f2eab64SJohn Marino pos_add_total += pos_add - pos_add_last;
20035f2eab64SJohn Marino if (iter_depth == 0)
20045f2eab64SJohn Marino pos_add = pos_add_total;
20055f2eab64SJohn Marino
2006*d5f8dde1SJohn Marino #ifdef TRE_APPROX
20075f2eab64SJohn Marino /* If approximate parameters are specified, surround the result
20085f2eab64SJohn Marino with two parameter setting nodes. The one on the left sets
20095f2eab64SJohn Marino the specified parameters, and the one on the right restores
20105f2eab64SJohn Marino the old parameters. */
20115f2eab64SJohn Marino if (iter->params)
20125f2eab64SJohn Marino {
20135f2eab64SJohn Marino tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
20145f2eab64SJohn Marino int *old_params;
20155f2eab64SJohn Marino
20165f2eab64SJohn Marino tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
20175f2eab64SJohn Marino if (!tmp_l)
20185f2eab64SJohn Marino return REG_ESPACE;
20195f2eab64SJohn Marino ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
20205f2eab64SJohn Marino iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
20215f2eab64SJohn Marino tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
20225f2eab64SJohn Marino if (!tmp_r)
20235f2eab64SJohn Marino return REG_ESPACE;
20245f2eab64SJohn Marino old_params = tre_mem_alloc(mem, sizeof(*old_params)
20255f2eab64SJohn Marino * TRE_PARAM_LAST);
20265f2eab64SJohn Marino if (!old_params)
20275f2eab64SJohn Marino return REG_ESPACE;
20285f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
20295f2eab64SJohn Marino old_params[i] = params[i];
20305f2eab64SJohn Marino ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
20315f2eab64SJohn Marino old_params[TRE_PARAM_DEPTH] = params_depth;
20325f2eab64SJohn Marino /* XXX - this is the only place where ast_new_node is
20335f2eab64SJohn Marino needed -- should be moved inside AST module. */
20345f2eab64SJohn Marino node_copy = tre_ast_new_node(mem, ITERATION,
20355f2eab64SJohn Marino sizeof(tre_iteration_t));
20365f2eab64SJohn Marino if (!node_copy)
20375f2eab64SJohn Marino return REG_ESPACE;
20385f2eab64SJohn Marino node_copy->obj = node->obj;
20395f2eab64SJohn Marino tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
20405f2eab64SJohn Marino if (!tmp_node)
20415f2eab64SJohn Marino return REG_ESPACE;
20425f2eab64SJohn Marino tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
20435f2eab64SJohn Marino if (!tmp_node)
20445f2eab64SJohn Marino return REG_ESPACE;
20455f2eab64SJohn Marino /* Replace the contents of `node' with `tmp_node'. */
20465f2eab64SJohn Marino memcpy(node, tmp_node, sizeof(*node));
20475f2eab64SJohn Marino node->obj = tmp_node->obj;
20485f2eab64SJohn Marino node->type = tmp_node->type;
20495f2eab64SJohn Marino params_depth++;
20505f2eab64SJohn Marino if (params_depth > *max_depth)
20515f2eab64SJohn Marino *max_depth = params_depth;
20525f2eab64SJohn Marino }
2053*d5f8dde1SJohn Marino #endif /* TRE_APPROX */
20545f2eab64SJohn Marino break;
20555f2eab64SJohn Marino }
20565f2eab64SJohn Marino default:
20575f2eab64SJohn Marino assert(0);
20585f2eab64SJohn Marino break;
20595f2eab64SJohn Marino }
20605f2eab64SJohn Marino }
20615f2eab64SJohn Marino
20625f2eab64SJohn Marino *position += pos_add_total;
20635f2eab64SJohn Marino
20645f2eab64SJohn Marino /* `max_pos' should never be larger than `*position' if the above
20655f2eab64SJohn Marino code works, but just an extra safeguard let's make sure
20665f2eab64SJohn Marino `*position' is set large enough so enough memory will be
20675f2eab64SJohn Marino allocated for the transition table. */
20685f2eab64SJohn Marino if (max_pos > *position)
20695f2eab64SJohn Marino *position = max_pos;
20705f2eab64SJohn Marino
20715f2eab64SJohn Marino #ifdef TRE_DEBUG
20725f2eab64SJohn Marino DPRINT(("Expanded AST:\n"));
20735f2eab64SJohn Marino tre_ast_print(ast);
20745f2eab64SJohn Marino DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
20755f2eab64SJohn Marino #endif
20765f2eab64SJohn Marino
20775f2eab64SJohn Marino return status;
20785f2eab64SJohn Marino }
20795f2eab64SJohn Marino
20805f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_empty(tre_mem_t mem)20815f2eab64SJohn Marino tre_set_empty(tre_mem_t mem)
20825f2eab64SJohn Marino {
20835f2eab64SJohn Marino tre_pos_and_tags_t *new_set;
20845f2eab64SJohn Marino
20855f2eab64SJohn Marino new_set = tre_mem_calloc(mem, sizeof(*new_set));
20865f2eab64SJohn Marino if (new_set == NULL)
20875f2eab64SJohn Marino return NULL;
20885f2eab64SJohn Marino
20895f2eab64SJohn Marino new_set[0].position = -1;
20905f2eab64SJohn Marino new_set[0].code_min = -1;
20915f2eab64SJohn Marino new_set[0].code_max = -1;
20925f2eab64SJohn Marino
20935f2eab64SJohn Marino return new_set;
20945f2eab64SJohn Marino }
20955f2eab64SJohn Marino
20965f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_one(tre_mem_t mem,int position,int code_min,int code_max,tre_bracket_match_list_t * bracket_match_list,int backref)20975f2eab64SJohn Marino tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2098*d5f8dde1SJohn Marino tre_bracket_match_list_t *bracket_match_list, int backref)
20995f2eab64SJohn Marino {
21005f2eab64SJohn Marino tre_pos_and_tags_t *new_set;
21015f2eab64SJohn Marino
21025f2eab64SJohn Marino new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
21035f2eab64SJohn Marino if (new_set == NULL)
21045f2eab64SJohn Marino return NULL;
21055f2eab64SJohn Marino
21065f2eab64SJohn Marino new_set[0].position = position;
21075f2eab64SJohn Marino new_set[0].code_min = code_min;
21085f2eab64SJohn Marino new_set[0].code_max = code_max;
2109*d5f8dde1SJohn Marino new_set[0].bracket_match_list = bracket_match_list;
21105f2eab64SJohn Marino new_set[0].backref = backref;
21115f2eab64SJohn Marino new_set[1].position = -1;
21125f2eab64SJohn Marino new_set[1].code_min = -1;
21135f2eab64SJohn Marino new_set[1].code_max = -1;
21145f2eab64SJohn Marino
21155f2eab64SJohn Marino return new_set;
21165f2eab64SJohn Marino }
21175f2eab64SJohn Marino
21185f2eab64SJohn Marino static tre_pos_and_tags_t *
tre_set_union(tre_mem_t mem,tre_pos_and_tags_t * set1,tre_pos_and_tags_t * set2,int * tags,int assertions,int * params)21195f2eab64SJohn Marino tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
21205f2eab64SJohn Marino int *tags, int assertions, int *params)
21215f2eab64SJohn Marino {
21225f2eab64SJohn Marino int s1, s2, i, j;
21235f2eab64SJohn Marino tre_pos_and_tags_t *new_set;
21245f2eab64SJohn Marino int *new_tags;
21255f2eab64SJohn Marino int num_tags;
21265f2eab64SJohn Marino
21275f2eab64SJohn Marino for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
21285f2eab64SJohn Marino for (s1 = 0; set1[s1].position >= 0; s1++);
21295f2eab64SJohn Marino for (s2 = 0; set2[s2].position >= 0; s2++);
21305f2eab64SJohn Marino new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
21315f2eab64SJohn Marino if (!new_set )
21325f2eab64SJohn Marino return NULL;
21335f2eab64SJohn Marino
21345f2eab64SJohn Marino for (s1 = 0; set1[s1].position >= 0; s1++)
21355f2eab64SJohn Marino {
21365f2eab64SJohn Marino new_set[s1].position = set1[s1].position;
21375f2eab64SJohn Marino new_set[s1].code_min = set1[s1].code_min;
21385f2eab64SJohn Marino new_set[s1].code_max = set1[s1].code_max;
21395f2eab64SJohn Marino new_set[s1].assertions = set1[s1].assertions | assertions;
2140*d5f8dde1SJohn Marino new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
21415f2eab64SJohn Marino new_set[s1].backref = set1[s1].backref;
21425f2eab64SJohn Marino if (set1[s1].tags == NULL && tags == NULL)
21435f2eab64SJohn Marino new_set[s1].tags = NULL;
21445f2eab64SJohn Marino else
21455f2eab64SJohn Marino {
21465f2eab64SJohn Marino for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
21475f2eab64SJohn Marino new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
21485f2eab64SJohn Marino * (i + num_tags + 1)));
21495f2eab64SJohn Marino if (new_tags == NULL)
21505f2eab64SJohn Marino return NULL;
21515f2eab64SJohn Marino for (j = 0; j < i; j++)
21525f2eab64SJohn Marino new_tags[j] = set1[s1].tags[j];
21535f2eab64SJohn Marino for (i = 0; i < num_tags; i++)
21545f2eab64SJohn Marino new_tags[j + i] = tags[i];
21555f2eab64SJohn Marino new_tags[j + i] = -1;
21565f2eab64SJohn Marino new_set[s1].tags = new_tags;
21575f2eab64SJohn Marino }
21585f2eab64SJohn Marino if (set1[s1].params)
21595f2eab64SJohn Marino new_set[s1].params = set1[s1].params;
21605f2eab64SJohn Marino if (params)
21615f2eab64SJohn Marino {
21625f2eab64SJohn Marino if (!new_set[s1].params)
21635f2eab64SJohn Marino new_set[s1].params = params;
21645f2eab64SJohn Marino else
21655f2eab64SJohn Marino {
21665f2eab64SJohn Marino new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
21675f2eab64SJohn Marino TRE_PARAM_LAST);
21685f2eab64SJohn Marino if (!new_set[s1].params)
21695f2eab64SJohn Marino return NULL;
21705f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
21715f2eab64SJohn Marino if (params[i] != TRE_PARAM_UNSET)
21725f2eab64SJohn Marino new_set[s1].params[i] = params[i];
21735f2eab64SJohn Marino }
21745f2eab64SJohn Marino }
21755f2eab64SJohn Marino }
21765f2eab64SJohn Marino
21775f2eab64SJohn Marino for (s2 = 0; set2[s2].position >= 0; s2++)
21785f2eab64SJohn Marino {
21795f2eab64SJohn Marino new_set[s1 + s2].position = set2[s2].position;
21805f2eab64SJohn Marino new_set[s1 + s2].code_min = set2[s2].code_min;
21815f2eab64SJohn Marino new_set[s1 + s2].code_max = set2[s2].code_max;
21825f2eab64SJohn Marino /* XXX - why not | assertions here as well? */
21835f2eab64SJohn Marino new_set[s1 + s2].assertions = set2[s2].assertions;
2184*d5f8dde1SJohn Marino new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
21855f2eab64SJohn Marino new_set[s1 + s2].backref = set2[s2].backref;
21865f2eab64SJohn Marino if (set2[s2].tags == NULL)
21875f2eab64SJohn Marino new_set[s1 + s2].tags = NULL;
21885f2eab64SJohn Marino else
21895f2eab64SJohn Marino {
21905f2eab64SJohn Marino for (i = 0; set2[s2].tags[i] >= 0; i++);
21915f2eab64SJohn Marino new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
21925f2eab64SJohn Marino if (new_tags == NULL)
21935f2eab64SJohn Marino return NULL;
21945f2eab64SJohn Marino for (j = 0; j < i; j++)
21955f2eab64SJohn Marino new_tags[j] = set2[s2].tags[j];
21965f2eab64SJohn Marino new_tags[j] = -1;
21975f2eab64SJohn Marino new_set[s1 + s2].tags = new_tags;
21985f2eab64SJohn Marino }
21995f2eab64SJohn Marino if (set2[s2].params)
22005f2eab64SJohn Marino new_set[s1 + s2].params = set2[s2].params;
22015f2eab64SJohn Marino if (params)
22025f2eab64SJohn Marino {
22035f2eab64SJohn Marino if (!new_set[s1 + s2].params)
22045f2eab64SJohn Marino new_set[s1 + s2].params = params;
22055f2eab64SJohn Marino else
22065f2eab64SJohn Marino {
22075f2eab64SJohn Marino new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
22085f2eab64SJohn Marino TRE_PARAM_LAST);
22095f2eab64SJohn Marino if (!new_set[s1 + s2].params)
22105f2eab64SJohn Marino return NULL;
22115f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
22125f2eab64SJohn Marino if (params[i] != TRE_PARAM_UNSET)
22135f2eab64SJohn Marino new_set[s1 + s2].params[i] = params[i];
22145f2eab64SJohn Marino }
22155f2eab64SJohn Marino }
22165f2eab64SJohn Marino }
22175f2eab64SJohn Marino new_set[s1 + s2].position = -1;
22185f2eab64SJohn Marino return new_set;
22195f2eab64SJohn Marino }
22205f2eab64SJohn Marino
22215f2eab64SJohn Marino /* Finds the empty path through `node' which is the one that should be
22225f2eab64SJohn Marino taken according to POSIX.2 rules, and adds the tags on that path to
22235f2eab64SJohn Marino `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
22245f2eab64SJohn Marino set to the number of tags seen on the path. */
22255f2eab64SJohn Marino static reg_errcode_t
tre_match_empty(tre_stack_t * stack,tre_ast_node_t * node,int * tags,int * assertions,int * params,int * num_tags_seen,int * params_seen)22265f2eab64SJohn Marino tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
22275f2eab64SJohn Marino int *assertions, int *params, int *num_tags_seen,
22285f2eab64SJohn Marino int *params_seen)
22295f2eab64SJohn Marino {
22305f2eab64SJohn Marino tre_literal_t *lit;
22315f2eab64SJohn Marino tre_union_t *uni;
22325f2eab64SJohn Marino tre_catenation_t *cat;
22335f2eab64SJohn Marino tre_iteration_t *iter;
22345f2eab64SJohn Marino int i;
22355f2eab64SJohn Marino int bottom = tre_stack_num_objects(stack);
22365f2eab64SJohn Marino reg_errcode_t status = REG_OK;
22375f2eab64SJohn Marino if (num_tags_seen)
22385f2eab64SJohn Marino *num_tags_seen = 0;
22395f2eab64SJohn Marino if (params_seen)
22405f2eab64SJohn Marino *params_seen = 0;
22415f2eab64SJohn Marino
22425f2eab64SJohn Marino status = tre_stack_push_voidptr(stack, node);
22435f2eab64SJohn Marino
22445f2eab64SJohn Marino /* Walk through the tree recursively. */
22455f2eab64SJohn Marino while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
22465f2eab64SJohn Marino {
22475f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
22485f2eab64SJohn Marino
22495f2eab64SJohn Marino switch (node->type)
22505f2eab64SJohn Marino {
22515f2eab64SJohn Marino case LITERAL:
22525f2eab64SJohn Marino lit = (tre_literal_t *)node->obj;
22535f2eab64SJohn Marino switch (lit->code_min)
22545f2eab64SJohn Marino {
22555f2eab64SJohn Marino case TAG:
22565f2eab64SJohn Marino if (lit->code_max >= 0)
22575f2eab64SJohn Marino {
22585f2eab64SJohn Marino if (tags != NULL)
22595f2eab64SJohn Marino {
22605f2eab64SJohn Marino /* Add the tag to `tags'. */
22615f2eab64SJohn Marino for (i = 0; tags[i] >= 0; i++)
22625f2eab64SJohn Marino if (tags[i] == lit->code_max)
22635f2eab64SJohn Marino break;
22645f2eab64SJohn Marino if (tags[i] < 0)
22655f2eab64SJohn Marino {
22665f2eab64SJohn Marino tags[i] = lit->code_max;
22675f2eab64SJohn Marino tags[i + 1] = -1;
22685f2eab64SJohn Marino }
22695f2eab64SJohn Marino }
22705f2eab64SJohn Marino if (num_tags_seen)
22715f2eab64SJohn Marino (*num_tags_seen)++;
22725f2eab64SJohn Marino }
22735f2eab64SJohn Marino break;
22745f2eab64SJohn Marino case ASSERTION:
22755f2eab64SJohn Marino assert(lit->code_max >= 1
22765f2eab64SJohn Marino || lit->code_max <= ASSERT_LAST);
22775f2eab64SJohn Marino if (assertions != NULL)
22785f2eab64SJohn Marino *assertions |= lit->code_max;
22795f2eab64SJohn Marino break;
22805f2eab64SJohn Marino case PARAMETER:
22815f2eab64SJohn Marino if (params != NULL)
22825f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
22835f2eab64SJohn Marino params[i] = lit->u.params[i];
22845f2eab64SJohn Marino if (params_seen != NULL)
22855f2eab64SJohn Marino *params_seen = 1;
22865f2eab64SJohn Marino break;
22875f2eab64SJohn Marino case EMPTY:
22885f2eab64SJohn Marino break;
22895f2eab64SJohn Marino default:
22905f2eab64SJohn Marino assert(0);
22915f2eab64SJohn Marino break;
22925f2eab64SJohn Marino }
22935f2eab64SJohn Marino break;
22945f2eab64SJohn Marino
22955f2eab64SJohn Marino case UNION:
22965f2eab64SJohn Marino /* Subexpressions starting earlier take priority over ones
22975f2eab64SJohn Marino starting later, so we prefer the left subexpression over the
22985f2eab64SJohn Marino right subexpression. */
22995f2eab64SJohn Marino uni = (tre_union_t *)node->obj;
23005f2eab64SJohn Marino if (uni->left->nullable)
23015f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->left)
23025f2eab64SJohn Marino else if (uni->right->nullable)
23035f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, uni->right)
23045f2eab64SJohn Marino else
23055f2eab64SJohn Marino assert(0);
23065f2eab64SJohn Marino break;
23075f2eab64SJohn Marino
23085f2eab64SJohn Marino case CATENATION:
23095f2eab64SJohn Marino /* The path must go through both children. */
23105f2eab64SJohn Marino cat = (tre_catenation_t *)node->obj;
23115f2eab64SJohn Marino assert(cat->left->nullable);
23125f2eab64SJohn Marino assert(cat->right->nullable);
23135f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->left);
23145f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, cat->right);
23155f2eab64SJohn Marino break;
23165f2eab64SJohn Marino
23175f2eab64SJohn Marino case ITERATION:
23185f2eab64SJohn Marino /* A match with an empty string is preferred over no match at
23195f2eab64SJohn Marino all, so we go through the argument if possible. */
23205f2eab64SJohn Marino iter = (tre_iteration_t *)node->obj;
23215f2eab64SJohn Marino if (iter->arg->nullable)
23225f2eab64SJohn Marino STACK_PUSHX(stack, voidptr, iter->arg);
23235f2eab64SJohn Marino break;
23245f2eab64SJohn Marino
23255f2eab64SJohn Marino default:
23265f2eab64SJohn Marino assert(0);
23275f2eab64SJohn Marino break;
23285f2eab64SJohn Marino }
23295f2eab64SJohn Marino }
23305f2eab64SJohn Marino
23315f2eab64SJohn Marino return status;
23325f2eab64SJohn Marino }
23335f2eab64SJohn Marino
23345f2eab64SJohn Marino
23355f2eab64SJohn Marino typedef enum {
23365f2eab64SJohn Marino NFL_RECURSE,
23375f2eab64SJohn Marino NFL_POST_UNION,
23385f2eab64SJohn Marino NFL_POST_CATENATION,
23395f2eab64SJohn Marino NFL_POST_ITERATION
23405f2eab64SJohn Marino } tre_nfl_stack_symbol_t;
23415f2eab64SJohn Marino
23425f2eab64SJohn Marino
23435f2eab64SJohn Marino /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
23445f2eab64SJohn Marino the nodes of the AST `tree'. */
23455f2eab64SJohn Marino static reg_errcode_t
tre_compute_nfl(tre_mem_t mem,tre_stack_t * stack,tre_ast_node_t * tree)23465f2eab64SJohn Marino tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
23475f2eab64SJohn Marino {
23485f2eab64SJohn Marino int bottom = tre_stack_num_objects(stack);
23495f2eab64SJohn Marino
23505f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, tree);
23515f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
23525f2eab64SJohn Marino
23535f2eab64SJohn Marino while (tre_stack_num_objects(stack) > bottom)
23545f2eab64SJohn Marino {
23555f2eab64SJohn Marino tre_nfl_stack_symbol_t symbol;
23565f2eab64SJohn Marino tre_ast_node_t *node;
23575f2eab64SJohn Marino
23585f2eab64SJohn Marino symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
23595f2eab64SJohn Marino node = tre_stack_pop_voidptr(stack);
23605f2eab64SJohn Marino switch (symbol)
23615f2eab64SJohn Marino {
23625f2eab64SJohn Marino case NFL_RECURSE:
23635f2eab64SJohn Marino switch (node->type)
23645f2eab64SJohn Marino {
23655f2eab64SJohn Marino case LITERAL:
23665f2eab64SJohn Marino {
23675f2eab64SJohn Marino tre_literal_t *lit = (tre_literal_t *)node->obj;
23685f2eab64SJohn Marino if (IS_BACKREF(lit))
23695f2eab64SJohn Marino {
23705f2eab64SJohn Marino /* Back references: nullable = false, firstpos = {i},
23715f2eab64SJohn Marino lastpos = {i}. */
23725f2eab64SJohn Marino node->nullable = 0;
23735f2eab64SJohn Marino node->firstpos = tre_set_one(mem, lit->position, 0,
2374*d5f8dde1SJohn Marino TRE_CHAR_MAX, NULL, -1);
23755f2eab64SJohn Marino if (!node->firstpos)
23765f2eab64SJohn Marino return REG_ESPACE;
23775f2eab64SJohn Marino node->lastpos = tre_set_one(mem, lit->position, 0,
2378*d5f8dde1SJohn Marino TRE_CHAR_MAX, NULL,
23795f2eab64SJohn Marino (int)lit->code_max);
23805f2eab64SJohn Marino if (!node->lastpos)
23815f2eab64SJohn Marino return REG_ESPACE;
23825f2eab64SJohn Marino }
23835f2eab64SJohn Marino else if (lit->code_min < 0)
23845f2eab64SJohn Marino {
23855f2eab64SJohn Marino /* Tags, empty strings, params, and zero width assertions:
23865f2eab64SJohn Marino nullable = true, firstpos = {}, and lastpos = {}. */
23875f2eab64SJohn Marino node->nullable = 1;
23885f2eab64SJohn Marino node->firstpos = tre_set_empty(mem);
23895f2eab64SJohn Marino if (!node->firstpos)
23905f2eab64SJohn Marino return REG_ESPACE;
23915f2eab64SJohn Marino node->lastpos = tre_set_empty(mem);
23925f2eab64SJohn Marino if (!node->lastpos)
23935f2eab64SJohn Marino return REG_ESPACE;
23945f2eab64SJohn Marino }
23955f2eab64SJohn Marino else
23965f2eab64SJohn Marino {
23975f2eab64SJohn Marino /* Literal at position i: nullable = false, firstpos = {i},
23985f2eab64SJohn Marino lastpos = {i}. */
23995f2eab64SJohn Marino node->nullable = 0;
24005f2eab64SJohn Marino node->firstpos =
24015f2eab64SJohn Marino tre_set_one(mem, lit->position, (int)lit->code_min,
2402*d5f8dde1SJohn Marino (int)lit->code_max, NULL, -1);
24035f2eab64SJohn Marino if (!node->firstpos)
24045f2eab64SJohn Marino return REG_ESPACE;
24055f2eab64SJohn Marino node->lastpos = tre_set_one(mem, lit->position,
24065f2eab64SJohn Marino (int)lit->code_min,
24075f2eab64SJohn Marino (int)lit->code_max,
2408*d5f8dde1SJohn Marino lit->u.bracket_match_list,
24095f2eab64SJohn Marino -1);
24105f2eab64SJohn Marino if (!node->lastpos)
24115f2eab64SJohn Marino return REG_ESPACE;
24125f2eab64SJohn Marino }
24135f2eab64SJohn Marino break;
24145f2eab64SJohn Marino }
24155f2eab64SJohn Marino
24165f2eab64SJohn Marino case UNION:
24175f2eab64SJohn Marino /* Compute the attributes for the two subtrees, and after that
24185f2eab64SJohn Marino for this node. */
24195f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, node);
24205f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_POST_UNION);
24215f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
24225f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
24235f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
24245f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
24255f2eab64SJohn Marino break;
24265f2eab64SJohn Marino
24275f2eab64SJohn Marino case CATENATION:
24285f2eab64SJohn Marino /* Compute the attributes for the two subtrees, and after that
24295f2eab64SJohn Marino for this node. */
24305f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, node);
24315f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_POST_CATENATION);
24325f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
24335f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
24345f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
24355f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
24365f2eab64SJohn Marino break;
24375f2eab64SJohn Marino
24385f2eab64SJohn Marino case ITERATION:
24395f2eab64SJohn Marino /* Compute the attributes for the subtree, and after that for
24405f2eab64SJohn Marino this node. */
24415f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, node);
24425f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_POST_ITERATION);
24435f2eab64SJohn Marino STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
24445f2eab64SJohn Marino STACK_PUSHR(stack, int, NFL_RECURSE);
24455f2eab64SJohn Marino break;
24465f2eab64SJohn Marino }
24475f2eab64SJohn Marino break; /* end case: NFL_RECURSE */
24485f2eab64SJohn Marino
24495f2eab64SJohn Marino case NFL_POST_UNION:
24505f2eab64SJohn Marino {
24515f2eab64SJohn Marino tre_union_t *uni = (tre_union_t *)node->obj;
24525f2eab64SJohn Marino node->nullable = uni->left->nullable || uni->right->nullable;
24535f2eab64SJohn Marino node->firstpos = tre_set_union(mem, uni->left->firstpos,
24545f2eab64SJohn Marino uni->right->firstpos, NULL, 0, NULL);
24555f2eab64SJohn Marino if (!node->firstpos)
24565f2eab64SJohn Marino return REG_ESPACE;
24575f2eab64SJohn Marino node->lastpos = tre_set_union(mem, uni->left->lastpos,
24585f2eab64SJohn Marino uni->right->lastpos, NULL, 0, NULL);
24595f2eab64SJohn Marino if (!node->lastpos)
24605f2eab64SJohn Marino return REG_ESPACE;
24615f2eab64SJohn Marino break;
24625f2eab64SJohn Marino }
24635f2eab64SJohn Marino
24645f2eab64SJohn Marino case NFL_POST_ITERATION:
24655f2eab64SJohn Marino {
2466*d5f8dde1SJohn Marino int num_tags, *tags, assertions, params_seen;
2467*d5f8dde1SJohn Marino int *params;
2468*d5f8dde1SJohn Marino reg_errcode_t status;
24695f2eab64SJohn Marino tre_iteration_t *iter = (tre_iteration_t *)node->obj;
24705f2eab64SJohn Marino
2471*d5f8dde1SJohn Marino /* From Ville Laurikari's original 2001 Master's thesis, the
2472*d5f8dde1SJohn Marino firstpos(n) and lastpos(n) of an iteration is just the
2473*d5f8dde1SJohn Marino corresponding values of the iteration's argument. Unfortunately,
2474*d5f8dde1SJohn Marino this isn't sufficient for the following BRE:
2475*d5f8dde1SJohn Marino
2476*d5f8dde1SJohn Marino \(a*\)*b\(\1\) matched against ab
2477*d5f8dde1SJohn Marino
2478*d5f8dde1SJohn Marino The backreference wants to force the first subexpression to
2479*d5f8dde1SJohn Marino be the empty string, but there is no transition for this. So
2480*d5f8dde1SJohn Marino we need to modify the lastpos(n) of an iteration to be the
2481*d5f8dde1SJohn Marino equivalent of that of catentation. Using the same notation as
2482*d5f8dde1SJohn Marino in the thesis, lastpos(n) is redefined as:
2483*d5f8dde1SJohn Marino
2484*d5f8dde1SJohn Marino if nullable(c1) then
2485*d5f8dde1SJohn Marino lastpos(c1) U
2486*d5f8dde1SJohn Marino addtags(lastpos(c1),
2487*d5f8dde1SJohn Marino emptymatch(c1))
24885f2eab64SJohn Marino else
2489*d5f8dde1SJohn Marino lastpos(c1)
2490*d5f8dde1SJohn Marino
2491*d5f8dde1SJohn Marino where c1 is the argument node. firstpos(n) remains the same. */
2492*d5f8dde1SJohn Marino
2493*d5f8dde1SJohn Marino /* Compute lastpos. */
2494*d5f8dde1SJohn Marino if (iter->min == 0 || iter->arg->nullable)
2495*d5f8dde1SJohn Marino {
2496*d5f8dde1SJohn Marino node->nullable = 1;
2497*d5f8dde1SJohn Marino if (iter->arg->nullable)
2498*d5f8dde1SJohn Marino {
2499*d5f8dde1SJohn Marino /* The arg matches the empty string. Make a first pass
2500*d5f8dde1SJohn Marino with tre_match_empty() to get the number of tags and
2501*d5f8dde1SJohn Marino parameters. */
2502*d5f8dde1SJohn Marino status = tre_match_empty(stack, iter->arg,
2503*d5f8dde1SJohn Marino NULL, NULL, NULL, &num_tags,
2504*d5f8dde1SJohn Marino ¶ms_seen);
2505*d5f8dde1SJohn Marino if (status != REG_OK)
2506*d5f8dde1SJohn Marino return status;
2507*d5f8dde1SJohn Marino /* Allocate arrays for the tags and parameters. */
2508*d5f8dde1SJohn Marino tags = xmalloc(sizeof(int) * (num_tags + 1));
2509*d5f8dde1SJohn Marino if (!tags)
2510*d5f8dde1SJohn Marino return REG_ESPACE;
2511*d5f8dde1SJohn Marino tags[0] = -1;
2512*d5f8dde1SJohn Marino assertions = 0;
2513*d5f8dde1SJohn Marino params = NULL;
2514*d5f8dde1SJohn Marino if (params_seen)
2515*d5f8dde1SJohn Marino {
2516*d5f8dde1SJohn Marino params = tre_mem_alloc(mem, sizeof(*params)
2517*d5f8dde1SJohn Marino * TRE_PARAM_LAST);
2518*d5f8dde1SJohn Marino if (!params)
2519*d5f8dde1SJohn Marino {
2520*d5f8dde1SJohn Marino xfree(tags);
2521*d5f8dde1SJohn Marino return REG_ESPACE;
2522*d5f8dde1SJohn Marino }
2523*d5f8dde1SJohn Marino }
2524*d5f8dde1SJohn Marino /* Second pass with tre_mach_empty() to get the list of
2525*d5f8dde1SJohn Marino tags and parameters. */
2526*d5f8dde1SJohn Marino status = tre_match_empty(stack, iter->arg, tags,
2527*d5f8dde1SJohn Marino &assertions, params, NULL, NULL);
2528*d5f8dde1SJohn Marino if (status != REG_OK)
2529*d5f8dde1SJohn Marino {
2530*d5f8dde1SJohn Marino xfree(tags);
2531*d5f8dde1SJohn Marino return status;
2532*d5f8dde1SJohn Marino }
2533*d5f8dde1SJohn Marino node->lastpos =
2534*d5f8dde1SJohn Marino tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2535*d5f8dde1SJohn Marino tags, assertions, params);
2536*d5f8dde1SJohn Marino xfree(tags);
2537*d5f8dde1SJohn Marino if (!node->lastpos)
2538*d5f8dde1SJohn Marino return REG_ESPACE;
2539*d5f8dde1SJohn Marino }
2540*d5f8dde1SJohn Marino else
25415f2eab64SJohn Marino node->lastpos = iter->arg->lastpos;
2542*d5f8dde1SJohn Marino }
2543*d5f8dde1SJohn Marino else
2544*d5f8dde1SJohn Marino {
2545*d5f8dde1SJohn Marino node->nullable = 0;
2546*d5f8dde1SJohn Marino node->lastpos = iter->arg->lastpos;
2547*d5f8dde1SJohn Marino }
2548*d5f8dde1SJohn Marino node->firstpos = iter->arg->firstpos;
25495f2eab64SJohn Marino break;
25505f2eab64SJohn Marino }
25515f2eab64SJohn Marino
25525f2eab64SJohn Marino case NFL_POST_CATENATION:
25535f2eab64SJohn Marino {
25545f2eab64SJohn Marino int num_tags, *tags, assertions, params_seen;
25555f2eab64SJohn Marino int *params;
25565f2eab64SJohn Marino reg_errcode_t status;
25575f2eab64SJohn Marino tre_catenation_t *cat = node->obj;
25585f2eab64SJohn Marino node->nullable = cat->left->nullable && cat->right->nullable;
25595f2eab64SJohn Marino
25605f2eab64SJohn Marino /* Compute firstpos. */
25615f2eab64SJohn Marino if (cat->left->nullable)
25625f2eab64SJohn Marino {
25635f2eab64SJohn Marino /* The left side matches the empty string. Make a first pass
25645f2eab64SJohn Marino with tre_match_empty() to get the number of tags and
25655f2eab64SJohn Marino parameters. */
25665f2eab64SJohn Marino status = tre_match_empty(stack, cat->left,
25675f2eab64SJohn Marino NULL, NULL, NULL, &num_tags,
25685f2eab64SJohn Marino ¶ms_seen);
25695f2eab64SJohn Marino if (status != REG_OK)
25705f2eab64SJohn Marino return status;
25715f2eab64SJohn Marino /* Allocate arrays for the tags and parameters. */
25725f2eab64SJohn Marino tags = xmalloc(sizeof(*tags) * (num_tags + 1));
25735f2eab64SJohn Marino if (!tags)
25745f2eab64SJohn Marino return REG_ESPACE;
25755f2eab64SJohn Marino tags[0] = -1;
25765f2eab64SJohn Marino assertions = 0;
25775f2eab64SJohn Marino params = NULL;
25785f2eab64SJohn Marino if (params_seen)
25795f2eab64SJohn Marino {
25805f2eab64SJohn Marino params = tre_mem_alloc(mem, sizeof(*params)
25815f2eab64SJohn Marino * TRE_PARAM_LAST);
25825f2eab64SJohn Marino if (!params)
25835f2eab64SJohn Marino {
25845f2eab64SJohn Marino xfree(tags);
25855f2eab64SJohn Marino return REG_ESPACE;
25865f2eab64SJohn Marino }
25875f2eab64SJohn Marino }
25885f2eab64SJohn Marino /* Second pass with tre_mach_empty() to get the list of
25895f2eab64SJohn Marino tags and parameters. */
25905f2eab64SJohn Marino status = tre_match_empty(stack, cat->left, tags,
25915f2eab64SJohn Marino &assertions, params, NULL, NULL);
25925f2eab64SJohn Marino if (status != REG_OK)
25935f2eab64SJohn Marino {
25945f2eab64SJohn Marino xfree(tags);
25955f2eab64SJohn Marino return status;
25965f2eab64SJohn Marino }
25975f2eab64SJohn Marino node->firstpos =
25985f2eab64SJohn Marino tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
25995f2eab64SJohn Marino tags, assertions, params);
26005f2eab64SJohn Marino xfree(tags);
26015f2eab64SJohn Marino if (!node->firstpos)
26025f2eab64SJohn Marino return REG_ESPACE;
26035f2eab64SJohn Marino }
26045f2eab64SJohn Marino else
26055f2eab64SJohn Marino {
26065f2eab64SJohn Marino node->firstpos = cat->left->firstpos;
26075f2eab64SJohn Marino }
26085f2eab64SJohn Marino
26095f2eab64SJohn Marino /* Compute lastpos. */
26105f2eab64SJohn Marino if (cat->right->nullable)
26115f2eab64SJohn Marino {
26125f2eab64SJohn Marino /* The right side matches the empty string. Make a first pass
26135f2eab64SJohn Marino with tre_match_empty() to get the number of tags and
26145f2eab64SJohn Marino parameters. */
26155f2eab64SJohn Marino status = tre_match_empty(stack, cat->right,
26165f2eab64SJohn Marino NULL, NULL, NULL, &num_tags,
26175f2eab64SJohn Marino ¶ms_seen);
26185f2eab64SJohn Marino if (status != REG_OK)
26195f2eab64SJohn Marino return status;
26205f2eab64SJohn Marino /* Allocate arrays for the tags and parameters. */
26215f2eab64SJohn Marino tags = xmalloc(sizeof(int) * (num_tags + 1));
26225f2eab64SJohn Marino if (!tags)
26235f2eab64SJohn Marino return REG_ESPACE;
26245f2eab64SJohn Marino tags[0] = -1;
26255f2eab64SJohn Marino assertions = 0;
26265f2eab64SJohn Marino params = NULL;
26275f2eab64SJohn Marino if (params_seen)
26285f2eab64SJohn Marino {
26295f2eab64SJohn Marino params = tre_mem_alloc(mem, sizeof(*params)
26305f2eab64SJohn Marino * TRE_PARAM_LAST);
26315f2eab64SJohn Marino if (!params)
26325f2eab64SJohn Marino {
26335f2eab64SJohn Marino xfree(tags);
26345f2eab64SJohn Marino return REG_ESPACE;
26355f2eab64SJohn Marino }
26365f2eab64SJohn Marino }
26375f2eab64SJohn Marino /* Second pass with tre_mach_empty() to get the list of
26385f2eab64SJohn Marino tags and parameters. */
26395f2eab64SJohn Marino status = tre_match_empty(stack, cat->right, tags,
26405f2eab64SJohn Marino &assertions, params, NULL, NULL);
26415f2eab64SJohn Marino if (status != REG_OK)
26425f2eab64SJohn Marino {
26435f2eab64SJohn Marino xfree(tags);
26445f2eab64SJohn Marino return status;
26455f2eab64SJohn Marino }
26465f2eab64SJohn Marino node->lastpos =
26475f2eab64SJohn Marino tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
26485f2eab64SJohn Marino tags, assertions, params);
26495f2eab64SJohn Marino xfree(tags);
26505f2eab64SJohn Marino if (!node->lastpos)
26515f2eab64SJohn Marino return REG_ESPACE;
26525f2eab64SJohn Marino }
26535f2eab64SJohn Marino else
26545f2eab64SJohn Marino {
26555f2eab64SJohn Marino node->lastpos = cat->right->lastpos;
26565f2eab64SJohn Marino }
26575f2eab64SJohn Marino break;
26585f2eab64SJohn Marino }
26595f2eab64SJohn Marino
26605f2eab64SJohn Marino default:
26615f2eab64SJohn Marino assert(0);
26625f2eab64SJohn Marino break;
26635f2eab64SJohn Marino }
26645f2eab64SJohn Marino }
26655f2eab64SJohn Marino
26665f2eab64SJohn Marino return REG_OK;
26675f2eab64SJohn Marino }
26685f2eab64SJohn Marino
26695f2eab64SJohn Marino
26705f2eab64SJohn Marino /* Adds a transition from each position in `p1' to each position in `p2'. */
26715f2eab64SJohn Marino static reg_errcode_t
tre_make_trans(tre_pos_and_tags_t * p1,tre_pos_and_tags_t * p2,tre_tnfa_transition_t * transitions,int * counts,int * offs)26725f2eab64SJohn Marino tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
26735f2eab64SJohn Marino tre_tnfa_transition_t *transitions,
26745f2eab64SJohn Marino int *counts, int *offs)
26755f2eab64SJohn Marino {
26765f2eab64SJohn Marino tre_pos_and_tags_t *orig_p2 = p2;
26775f2eab64SJohn Marino tre_tnfa_transition_t *trans;
26785f2eab64SJohn Marino int i, j, k, l, dup, prev_p2_pos;
26795f2eab64SJohn Marino
26805f2eab64SJohn Marino if (transitions != NULL)
26815f2eab64SJohn Marino while (p1->position >= 0)
26825f2eab64SJohn Marino {
26835f2eab64SJohn Marino p2 = orig_p2;
26845f2eab64SJohn Marino prev_p2_pos = -1;
26855f2eab64SJohn Marino while (p2->position >= 0)
26865f2eab64SJohn Marino {
26875f2eab64SJohn Marino /* Optimization: if this position was already handled, skip it. */
26885f2eab64SJohn Marino if (p2->position == prev_p2_pos)
26895f2eab64SJohn Marino {
26905f2eab64SJohn Marino p2++;
26915f2eab64SJohn Marino continue;
26925f2eab64SJohn Marino }
26935f2eab64SJohn Marino prev_p2_pos = p2->position;
26945f2eab64SJohn Marino /* Set `trans' to point to the next unused transition from
26955f2eab64SJohn Marino position `p1->position'. */
26965f2eab64SJohn Marino trans = transitions + offs[p1->position];
26975f2eab64SJohn Marino while (trans->state != NULL)
26985f2eab64SJohn Marino {
26995f2eab64SJohn Marino #if 0
27005f2eab64SJohn Marino /* If we find a previous transition from `p1->position' to
27015f2eab64SJohn Marino `p2->position', it is overwritten. This can happen only
27025f2eab64SJohn Marino if there are nested loops in the regexp, like in "((a)*)*".
27035f2eab64SJohn Marino In POSIX.2 repetition using the outer loop is always
27045f2eab64SJohn Marino preferred over using the inner loop. Therefore the
27055f2eab64SJohn Marino transition for the inner loop is useless and can be thrown
27065f2eab64SJohn Marino away. */
27075f2eab64SJohn Marino /* XXX - The same position is used for all nodes in a bracket
27085f2eab64SJohn Marino expression, so this optimization cannot be used (it will
27095f2eab64SJohn Marino break bracket expressions) unless I figure out a way to
27105f2eab64SJohn Marino detect it here. */
27115f2eab64SJohn Marino if (trans->state_id == p2->position)
27125f2eab64SJohn Marino {
27135f2eab64SJohn Marino DPRINT(("*"));
27145f2eab64SJohn Marino break;
27155f2eab64SJohn Marino }
27165f2eab64SJohn Marino #endif
27175f2eab64SJohn Marino trans++;
27185f2eab64SJohn Marino }
27195f2eab64SJohn Marino
27205f2eab64SJohn Marino if (trans->state == NULL)
27215f2eab64SJohn Marino (trans + 1)->state = NULL;
27225f2eab64SJohn Marino /* Use the character ranges, assertions, etc. from `p1' for
27235f2eab64SJohn Marino the transition from `p1' to `p2'. */
27245f2eab64SJohn Marino trans->code_min = p1->code_min;
27255f2eab64SJohn Marino trans->code_max = p1->code_max;
27265f2eab64SJohn Marino trans->state = transitions + offs[p2->position];
27275f2eab64SJohn Marino trans->state_id = p2->position;
27285f2eab64SJohn Marino trans->assertions = p1->assertions | p2->assertions
2729*d5f8dde1SJohn Marino | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
27305f2eab64SJohn Marino if (p1->backref >= 0)
27315f2eab64SJohn Marino {
2732*d5f8dde1SJohn Marino assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
27335f2eab64SJohn Marino assert(p2->backref < 0);
27345f2eab64SJohn Marino trans->u.backref = p1->backref;
27355f2eab64SJohn Marino trans->assertions |= ASSERT_BACKREF;
27365f2eab64SJohn Marino }
2737*d5f8dde1SJohn Marino if (p1->bracket_match_list != NULL)
27385f2eab64SJohn Marino {
2739*d5f8dde1SJohn Marino trans->u.bracket_match_list =
2740*d5f8dde1SJohn Marino xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2741*d5f8dde1SJohn Marino if (trans->u.bracket_match_list == NULL)
27425f2eab64SJohn Marino return REG_ESPACE;
2743*d5f8dde1SJohn Marino memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2744*d5f8dde1SJohn Marino SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
27455f2eab64SJohn Marino }
27465f2eab64SJohn Marino
27475f2eab64SJohn Marino /* Find out how many tags this transition has. */
27485f2eab64SJohn Marino i = 0;
27495f2eab64SJohn Marino if (p1->tags != NULL)
27505f2eab64SJohn Marino while(p1->tags[i] >= 0)
27515f2eab64SJohn Marino i++;
27525f2eab64SJohn Marino j = 0;
27535f2eab64SJohn Marino if (p2->tags != NULL)
27545f2eab64SJohn Marino while(p2->tags[j] >= 0)
27555f2eab64SJohn Marino j++;
27565f2eab64SJohn Marino
27575f2eab64SJohn Marino /* If we are overwriting a transition, free the old tag array. */
27585f2eab64SJohn Marino if (trans->tags != NULL)
27595f2eab64SJohn Marino xfree(trans->tags);
27605f2eab64SJohn Marino trans->tags = NULL;
27615f2eab64SJohn Marino
27625f2eab64SJohn Marino /* If there were any tags, allocate an array and fill it. */
27635f2eab64SJohn Marino if (i + j > 0)
27645f2eab64SJohn Marino {
27655f2eab64SJohn Marino trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
27665f2eab64SJohn Marino if (!trans->tags)
27675f2eab64SJohn Marino return REG_ESPACE;
27685f2eab64SJohn Marino i = 0;
27695f2eab64SJohn Marino if (p1->tags != NULL)
27705f2eab64SJohn Marino while(p1->tags[i] >= 0)
27715f2eab64SJohn Marino {
27725f2eab64SJohn Marino trans->tags[i] = p1->tags[i];
27735f2eab64SJohn Marino i++;
27745f2eab64SJohn Marino }
27755f2eab64SJohn Marino l = i;
27765f2eab64SJohn Marino j = 0;
27775f2eab64SJohn Marino if (p2->tags != NULL)
27785f2eab64SJohn Marino while (p2->tags[j] >= 0)
27795f2eab64SJohn Marino {
27805f2eab64SJohn Marino /* Don't add duplicates. */
27815f2eab64SJohn Marino dup = 0;
27825f2eab64SJohn Marino for (k = 0; k < i; k++)
27835f2eab64SJohn Marino if (trans->tags[k] == p2->tags[j])
27845f2eab64SJohn Marino {
27855f2eab64SJohn Marino dup = 1;
27865f2eab64SJohn Marino break;
27875f2eab64SJohn Marino }
27885f2eab64SJohn Marino if (!dup)
27895f2eab64SJohn Marino trans->tags[l++] = p2->tags[j];
27905f2eab64SJohn Marino j++;
27915f2eab64SJohn Marino }
27925f2eab64SJohn Marino trans->tags[l] = -1;
27935f2eab64SJohn Marino }
27945f2eab64SJohn Marino
27955f2eab64SJohn Marino /* Set the parameter array. If both `p2' and `p1' have same
27965f2eab64SJohn Marino parameters, the values in `p2' override those in `p1'. */
27975f2eab64SJohn Marino if (p1->params || p2->params)
27985f2eab64SJohn Marino {
27995f2eab64SJohn Marino if (!trans->params)
28005f2eab64SJohn Marino trans->params = xmalloc(sizeof(*trans->params)
28015f2eab64SJohn Marino * TRE_PARAM_LAST);
28025f2eab64SJohn Marino if (!trans->params)
28035f2eab64SJohn Marino return REG_ESPACE;
28045f2eab64SJohn Marino for (i = 0; i < TRE_PARAM_LAST; i++)
28055f2eab64SJohn Marino {
28065f2eab64SJohn Marino trans->params[i] = TRE_PARAM_UNSET;
28075f2eab64SJohn Marino if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
28085f2eab64SJohn Marino trans->params[i] = p1->params[i];
28095f2eab64SJohn Marino if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
28105f2eab64SJohn Marino trans->params[i] = p2->params[i];
28115f2eab64SJohn Marino }
28125f2eab64SJohn Marino }
28135f2eab64SJohn Marino else
28145f2eab64SJohn Marino {
28155f2eab64SJohn Marino if (trans->params)
28165f2eab64SJohn Marino xfree(trans->params);
28175f2eab64SJohn Marino trans->params = NULL;
28185f2eab64SJohn Marino }
28195f2eab64SJohn Marino
28205f2eab64SJohn Marino
28215f2eab64SJohn Marino #ifdef TRE_DEBUG
28225f2eab64SJohn Marino {
28235f2eab64SJohn Marino int *tags;
28245f2eab64SJohn Marino
28255f2eab64SJohn Marino DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
28265f2eab64SJohn Marino p1->code_min));
28275f2eab64SJohn Marino if (p1->code_max != p1->code_min)
28285f2eab64SJohn Marino DPRINT(("-%3d", p1->code_max));
28295f2eab64SJohn Marino tags = trans->tags;
28305f2eab64SJohn Marino if (tags)
28315f2eab64SJohn Marino {
28325f2eab64SJohn Marino DPRINT((", tags ["));
28335f2eab64SJohn Marino while (*tags >= 0)
28345f2eab64SJohn Marino {
28355f2eab64SJohn Marino DPRINT(("%d", *tags));
28365f2eab64SJohn Marino tags++;
28375f2eab64SJohn Marino if (*tags >= 0)
28385f2eab64SJohn Marino DPRINT((","));
28395f2eab64SJohn Marino }
28405f2eab64SJohn Marino DPRINT(("]"));
28415f2eab64SJohn Marino }
28425f2eab64SJohn Marino if (trans->assertions)
28435f2eab64SJohn Marino DPRINT((", assert %d", trans->assertions));
28445f2eab64SJohn Marino if (trans->assertions & ASSERT_BACKREF)
28455f2eab64SJohn Marino DPRINT((", backref %d", trans->u.backref));
2846*d5f8dde1SJohn Marino else if (trans->assertions & ASSERT_BRACKET_MATCH)
2847*d5f8dde1SJohn Marino DPRINT((", bracket_match_list %p",
2848*d5f8dde1SJohn Marino trans->u.bracket_match_list));
28495f2eab64SJohn Marino if (trans->params)
28505f2eab64SJohn Marino {
28515f2eab64SJohn Marino DPRINT((", "));
28525f2eab64SJohn Marino tre_print_params(trans->params);
28535f2eab64SJohn Marino }
28545f2eab64SJohn Marino DPRINT(("\n"));
28555f2eab64SJohn Marino }
28565f2eab64SJohn Marino #endif /* TRE_DEBUG */
28575f2eab64SJohn Marino p2++;
28585f2eab64SJohn Marino }
28595f2eab64SJohn Marino p1++;
28605f2eab64SJohn Marino }
28615f2eab64SJohn Marino else
28625f2eab64SJohn Marino /* Compute a maximum limit for the number of transitions leaving
28635f2eab64SJohn Marino from each state. */
28645f2eab64SJohn Marino while (p1->position >= 0)
28655f2eab64SJohn Marino {
28665f2eab64SJohn Marino p2 = orig_p2;
28675f2eab64SJohn Marino while (p2->position >= 0)
28685f2eab64SJohn Marino {
28695f2eab64SJohn Marino counts[p1->position]++;
28705f2eab64SJohn Marino p2++;
28715f2eab64SJohn Marino }
28725f2eab64SJohn Marino p1++;
28735f2eab64SJohn Marino }
28745f2eab64SJohn Marino return REG_OK;
28755f2eab64SJohn Marino }
28765f2eab64SJohn Marino
28775f2eab64SJohn Marino /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
28785f2eab64SJohn Marino labelled with one character range (there are no transitions on empty
28795f2eab64SJohn Marino strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
28805f2eab64SJohn Marino the regexp. */
28815f2eab64SJohn Marino static reg_errcode_t
tre_ast_to_tnfa(tre_ast_node_t * node,tre_tnfa_transition_t * transitions,int * counts,int * offs)28825f2eab64SJohn Marino tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
28835f2eab64SJohn Marino int *counts, int *offs)
28845f2eab64SJohn Marino {
28855f2eab64SJohn Marino tre_union_t *uni;
28865f2eab64SJohn Marino tre_catenation_t *cat;
28875f2eab64SJohn Marino tre_iteration_t *iter;
28885f2eab64SJohn Marino reg_errcode_t errcode = REG_OK;
28895f2eab64SJohn Marino
28905f2eab64SJohn Marino /* XXX - recurse using a stack!. */
28915f2eab64SJohn Marino switch (node->type)
28925f2eab64SJohn Marino {
28935f2eab64SJohn Marino case LITERAL:
28945f2eab64SJohn Marino break;
28955f2eab64SJohn Marino case UNION:
28965f2eab64SJohn Marino uni = (tre_union_t *)node->obj;
28975f2eab64SJohn Marino errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
28985f2eab64SJohn Marino if (errcode != REG_OK)
28995f2eab64SJohn Marino return errcode;
29005f2eab64SJohn Marino errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
29015f2eab64SJohn Marino break;
29025f2eab64SJohn Marino
29035f2eab64SJohn Marino case CATENATION:
29045f2eab64SJohn Marino cat = (tre_catenation_t *)node->obj;
29055f2eab64SJohn Marino /* Add a transition from each position in cat->left->lastpos
29065f2eab64SJohn Marino to each position in cat->right->firstpos. */
29075f2eab64SJohn Marino errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
29085f2eab64SJohn Marino transitions, counts, offs);
29095f2eab64SJohn Marino if (errcode != REG_OK)
29105f2eab64SJohn Marino return errcode;
29115f2eab64SJohn Marino errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
29125f2eab64SJohn Marino if (errcode != REG_OK)
29135f2eab64SJohn Marino return errcode;
29145f2eab64SJohn Marino errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
29155f2eab64SJohn Marino break;
29165f2eab64SJohn Marino
29175f2eab64SJohn Marino case ITERATION:
29185f2eab64SJohn Marino iter = (tre_iteration_t *)node->obj;
29195f2eab64SJohn Marino assert(iter->max == -1 || iter->max == 1);
29205f2eab64SJohn Marino
29215f2eab64SJohn Marino if (iter->max == -1)
29225f2eab64SJohn Marino {
29235f2eab64SJohn Marino assert(iter->min == 0 || iter->min == 1);
29245f2eab64SJohn Marino /* Add a transition from each last position in the iterated
29255f2eab64SJohn Marino expression to each first position. */
29265f2eab64SJohn Marino errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
29275f2eab64SJohn Marino transitions, counts, offs);
29285f2eab64SJohn Marino if (errcode != REG_OK)
29295f2eab64SJohn Marino return errcode;
29305f2eab64SJohn Marino }
29315f2eab64SJohn Marino errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
29325f2eab64SJohn Marino break;
29335f2eab64SJohn Marino }
29345f2eab64SJohn Marino return errcode;
29355f2eab64SJohn Marino }
29365f2eab64SJohn Marino
29375f2eab64SJohn Marino
29385f2eab64SJohn Marino #define ERROR_EXIT(err) \
29395f2eab64SJohn Marino do \
29405f2eab64SJohn Marino { \
29415f2eab64SJohn Marino errcode = err; \
29425f2eab64SJohn Marino if (/*CONSTCOND*/1) \
29435f2eab64SJohn Marino goto error_exit; \
29445f2eab64SJohn Marino } \
29455f2eab64SJohn Marino while (/*CONSTCOND*/0)
29465f2eab64SJohn Marino
29475f2eab64SJohn Marino
29485f2eab64SJohn Marino int
tre_compile(regex_t * preg,const tre_char_t * regex,size_t n,int cflags,locale_t loc)2949*d5f8dde1SJohn Marino tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2950*d5f8dde1SJohn Marino locale_t loc)
29515f2eab64SJohn Marino {
29525f2eab64SJohn Marino tre_stack_t *stack;
29535f2eab64SJohn Marino tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
29545f2eab64SJohn Marino tre_pos_and_tags_t *p;
29555f2eab64SJohn Marino int *counts = NULL, *offs = NULL;
29565f2eab64SJohn Marino int i, add = 0;
29575f2eab64SJohn Marino tre_tnfa_transition_t *transitions, *initial;
29585f2eab64SJohn Marino tre_tnfa_t *tnfa = NULL;
2959*d5f8dde1SJohn Marino tre_submatch_data_t *submatch_data = NULL;
29605f2eab64SJohn Marino tre_tag_direction_t *tag_directions = NULL;
29615f2eab64SJohn Marino reg_errcode_t errcode;
29625f2eab64SJohn Marino tre_mem_t mem;
29635f2eab64SJohn Marino
29645f2eab64SJohn Marino /* Parse context. */
29655f2eab64SJohn Marino tre_parse_ctx_t parse_ctx;
29665f2eab64SJohn Marino
29675f2eab64SJohn Marino /* Allocate a stack used throughout the compilation process for various
29685f2eab64SJohn Marino purposes. */
29695f2eab64SJohn Marino stack = tre_stack_new(512, 10240, 128);
29705f2eab64SJohn Marino if (!stack)
29715f2eab64SJohn Marino return REG_ESPACE;
29725f2eab64SJohn Marino /* Allocate a fast memory allocator. */
29735f2eab64SJohn Marino mem = tre_mem_new();
29745f2eab64SJohn Marino if (!mem)
29755f2eab64SJohn Marino {
29765f2eab64SJohn Marino tre_stack_destroy(stack);
29775f2eab64SJohn Marino return REG_ESPACE;
29785f2eab64SJohn Marino }
29795f2eab64SJohn Marino
29805f2eab64SJohn Marino /* Parse the regexp. */
29815f2eab64SJohn Marino memset(&parse_ctx, 0, sizeof(parse_ctx));
29825f2eab64SJohn Marino parse_ctx.mem = mem;
29835f2eab64SJohn Marino parse_ctx.stack = stack;
29845f2eab64SJohn Marino parse_ctx.re = regex;
29855f2eab64SJohn Marino parse_ctx.len = n;
2986*d5f8dde1SJohn Marino /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2987*d5f8dde1SJohn Marino are also set */
2988*d5f8dde1SJohn Marino if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2989*d5f8dde1SJohn Marino cflags &= ~REG_UNGREEDY;
29905f2eab64SJohn Marino parse_ctx.cflags = cflags;
29915f2eab64SJohn Marino parse_ctx.max_backref = -1;
2992*d5f8dde1SJohn Marino parse_ctx.loc = loc;
2993*d5f8dde1SJohn Marino parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2994*d5f8dde1SJohn Marino
29955f2eab64SJohn Marino DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
29965f2eab64SJohn Marino errcode = tre_parse(&parse_ctx);
29975f2eab64SJohn Marino if (errcode != REG_OK)
29985f2eab64SJohn Marino ERROR_EXIT(errcode);
29995f2eab64SJohn Marino preg->re_nsub = parse_ctx.submatch_id - 1;
30005f2eab64SJohn Marino tree = parse_ctx.result;
30015f2eab64SJohn Marino
30025f2eab64SJohn Marino /* Back references and approximate matching cannot currently be used
30035f2eab64SJohn Marino in the same regexp. */
30045f2eab64SJohn Marino if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
30055f2eab64SJohn Marino ERROR_EXIT(REG_BADPAT);
30065f2eab64SJohn Marino
30075f2eab64SJohn Marino #ifdef TRE_DEBUG
30085f2eab64SJohn Marino tre_ast_print(tree);
30095f2eab64SJohn Marino #endif /* TRE_DEBUG */
30105f2eab64SJohn Marino
30115f2eab64SJohn Marino /* Referring to nonexistent subexpressions is illegal. */
30125f2eab64SJohn Marino if (parse_ctx.max_backref > (int)preg->re_nsub)
30135f2eab64SJohn Marino ERROR_EXIT(REG_ESUBREG);
30145f2eab64SJohn Marino
30155f2eab64SJohn Marino /* Allocate the TNFA struct. */
30165f2eab64SJohn Marino tnfa = xcalloc(1, sizeof(tre_tnfa_t));
30175f2eab64SJohn Marino if (tnfa == NULL)
30185f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
30195f2eab64SJohn Marino tnfa->have_backrefs = parse_ctx.max_backref >= 0;
30205f2eab64SJohn Marino tnfa->have_approx = parse_ctx.have_approx;
30215f2eab64SJohn Marino tnfa->num_submatches = parse_ctx.submatch_id;
3022*d5f8dde1SJohn Marino tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3023*d5f8dde1SJohn Marino - SUBMATCH_ID_INVISIBLE_START;
3024*d5f8dde1SJohn Marino tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3025*d5f8dde1SJohn Marino tnfa->loc = parse_ctx.loc;
30265f2eab64SJohn Marino
30275f2eab64SJohn Marino /* Set up tags for submatch addressing. If REG_NOSUB is set and the
30285f2eab64SJohn Marino regexp does not have back references, this can be skipped. */
3029*d5f8dde1SJohn Marino if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
30305f2eab64SJohn Marino {
30315f2eab64SJohn Marino DPRINT(("tre_compile: setting up tags\n"));
30325f2eab64SJohn Marino
30335f2eab64SJohn Marino /* Figure out how many tags we will need. */
30345f2eab64SJohn Marino errcode = tre_add_tags(NULL, stack, tree, tnfa);
30355f2eab64SJohn Marino if (errcode != REG_OK)
30365f2eab64SJohn Marino ERROR_EXIT(errcode);
30375f2eab64SJohn Marino #ifdef TRE_DEBUG
30385f2eab64SJohn Marino tre_ast_print(tree);
30395f2eab64SJohn Marino #endif /* TRE_DEBUG */
30405f2eab64SJohn Marino
30415f2eab64SJohn Marino if (tnfa->num_tags > 0)
30425f2eab64SJohn Marino {
30435f2eab64SJohn Marino tag_directions = xmalloc(sizeof(*tag_directions)
30445f2eab64SJohn Marino * (tnfa->num_tags + 1));
30455f2eab64SJohn Marino if (tag_directions == NULL)
30465f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
30475f2eab64SJohn Marino tnfa->tag_directions = tag_directions;
30485f2eab64SJohn Marino memset(tag_directions, -1,
30495f2eab64SJohn Marino sizeof(*tag_directions) * (tnfa->num_tags + 1));
30505f2eab64SJohn Marino }
3051*d5f8dde1SJohn Marino tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
30525f2eab64SJohn Marino sizeof(tnfa->minimal_tags));
30535f2eab64SJohn Marino if (tnfa->minimal_tags == NULL)
30545f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
30555f2eab64SJohn Marino
30565f2eab64SJohn Marino submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
30575f2eab64SJohn Marino sizeof(*submatch_data));
30585f2eab64SJohn Marino if (submatch_data == NULL)
30595f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
3060*d5f8dde1SJohn Marino /* Set the eo_tag value to -1 to indicate that that corresponding
3061*d5f8dde1SJohn Marino * submatch has not be completed yet */
3062*d5f8dde1SJohn Marino for (i = 0; i < parse_ctx.submatch_id; i++)
3063*d5f8dde1SJohn Marino {
3064*d5f8dde1SJohn Marino submatch_data[i].eo_tag = -1;
3065*d5f8dde1SJohn Marino }
30665f2eab64SJohn Marino tnfa->submatch_data = submatch_data;
30675f2eab64SJohn Marino
30685f2eab64SJohn Marino errcode = tre_add_tags(mem, stack, tree, tnfa);
30695f2eab64SJohn Marino if (errcode != REG_OK)
30705f2eab64SJohn Marino ERROR_EXIT(errcode);
30715f2eab64SJohn Marino
30725f2eab64SJohn Marino #ifdef TRE_DEBUG
30735f2eab64SJohn Marino for (i = 0; i < parse_ctx.submatch_id; i++)
30745f2eab64SJohn Marino DPRINT(("pmatch[%d] = {t%d, t%d}\n",
30755f2eab64SJohn Marino i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3076*d5f8dde1SJohn Marino for (i = 0; i <= tnfa->num_tags; i++)
3077*d5f8dde1SJohn Marino DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
30785f2eab64SJohn Marino #endif /* TRE_DEBUG */
30795f2eab64SJohn Marino }
30805f2eab64SJohn Marino
30815f2eab64SJohn Marino /* Expand iteration nodes. */
30825f2eab64SJohn Marino errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
30835f2eab64SJohn Marino tag_directions, &tnfa->params_depth);
30845f2eab64SJohn Marino if (errcode != REG_OK)
30855f2eab64SJohn Marino ERROR_EXIT(errcode);
30865f2eab64SJohn Marino
30875f2eab64SJohn Marino /* Add a dummy node for the final state.
30885f2eab64SJohn Marino XXX - For certain patterns this dummy node can be optimized away,
30895f2eab64SJohn Marino for example "a*" or "ab*". Figure out a simple way to detect
30905f2eab64SJohn Marino this possibility. */
30915f2eab64SJohn Marino tmp_ast_l = tree;
30925f2eab64SJohn Marino tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
30935f2eab64SJohn Marino if (tmp_ast_r == NULL)
30945f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
30955f2eab64SJohn Marino
30965f2eab64SJohn Marino tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
30975f2eab64SJohn Marino if (tree == NULL)
30985f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
30995f2eab64SJohn Marino
31005f2eab64SJohn Marino #ifdef TRE_DEBUG
31015f2eab64SJohn Marino tre_ast_print(tree);
31025f2eab64SJohn Marino DPRINT(("Number of states: %d\n", parse_ctx.position));
3103*d5f8dde1SJohn Marino if (submatch_data)
3104*d5f8dde1SJohn Marino for (i = 0; i < parse_ctx.submatch_id; i++)
3105*d5f8dde1SJohn Marino DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106*d5f8dde1SJohn Marino i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3107*d5f8dde1SJohn Marino if (tag_directions)
3108*d5f8dde1SJohn Marino for (i = 0; i <= tnfa->num_tags; i++)
3109*d5f8dde1SJohn Marino DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
31105f2eab64SJohn Marino #endif /* TRE_DEBUG */
31115f2eab64SJohn Marino
31125f2eab64SJohn Marino errcode = tre_compute_nfl(mem, stack, tree);
31135f2eab64SJohn Marino if (errcode != REG_OK)
31145f2eab64SJohn Marino ERROR_EXIT(errcode);
31155f2eab64SJohn Marino
31165f2eab64SJohn Marino counts = xmalloc(sizeof(int) * parse_ctx.position);
31175f2eab64SJohn Marino if (counts == NULL)
31185f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
31195f2eab64SJohn Marino
31205f2eab64SJohn Marino offs = xmalloc(sizeof(int) * parse_ctx.position);
31215f2eab64SJohn Marino if (offs == NULL)
31225f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
31235f2eab64SJohn Marino
31245f2eab64SJohn Marino for (i = 0; i < parse_ctx.position; i++)
31255f2eab64SJohn Marino counts[i] = 0;
31265f2eab64SJohn Marino tre_ast_to_tnfa(tree, NULL, counts, NULL);
31275f2eab64SJohn Marino
31285f2eab64SJohn Marino add = 0;
31295f2eab64SJohn Marino for (i = 0; i < parse_ctx.position; i++)
31305f2eab64SJohn Marino {
31315f2eab64SJohn Marino offs[i] = add;
31325f2eab64SJohn Marino add += counts[i] + 1;
31335f2eab64SJohn Marino counts[i] = 0;
31345f2eab64SJohn Marino }
31355f2eab64SJohn Marino transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
31365f2eab64SJohn Marino if (transitions == NULL)
31375f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
31385f2eab64SJohn Marino tnfa->transitions = transitions;
31395f2eab64SJohn Marino tnfa->num_transitions = add;
31405f2eab64SJohn Marino
31415f2eab64SJohn Marino DPRINT(("Converting to TNFA:\n"));
31425f2eab64SJohn Marino errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
31435f2eab64SJohn Marino if (errcode != REG_OK)
31445f2eab64SJohn Marino ERROR_EXIT(errcode);
31455f2eab64SJohn Marino
3146*d5f8dde1SJohn Marino
3147*d5f8dde1SJohn Marino /* Set first_char only if there is only one character that can be the
3148*d5f8dde1SJohn Marino first character of a match */
31495f2eab64SJohn Marino tnfa->first_char = -1;
3150*d5f8dde1SJohn Marino if (!tmp_ast_l->nullable)
31515f2eab64SJohn Marino {
3152*d5f8dde1SJohn Marino int scanning = 1;
3153*d5f8dde1SJohn Marino for (p = tree->firstpos; scanning && p->position >= 0; p++)
31545f2eab64SJohn Marino {
31555f2eab64SJohn Marino tre_tnfa_transition_t *j = transitions + offs[p->position];
31565f2eab64SJohn Marino while (j->state != NULL)
31575f2eab64SJohn Marino {
3158*d5f8dde1SJohn Marino if (j->code_min <= j->code_max)
31595f2eab64SJohn Marino {
3160*d5f8dde1SJohn Marino if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3161*d5f8dde1SJohn Marino {
3162*d5f8dde1SJohn Marino tnfa->first_char = -1;
3163*d5f8dde1SJohn Marino scanning = 0;
3164*d5f8dde1SJohn Marino break;
3165*d5f8dde1SJohn Marino }
3166*d5f8dde1SJohn Marino tnfa->first_char = j->code_min;
31675f2eab64SJohn Marino }
31685f2eab64SJohn Marino j++;
31695f2eab64SJohn Marino }
31705f2eab64SJohn Marino }
3171*d5f8dde1SJohn Marino #ifdef TRE_DEBUG
3172*d5f8dde1SJohn Marino if (tnfa->first_char >= 0)
3173*d5f8dde1SJohn Marino DPRINT(("first char must be %d\n", tnfa->first_char));
3174*d5f8dde1SJohn Marino #endif /* TRE_DEBUG */
31755f2eab64SJohn Marino }
31765f2eab64SJohn Marino
31775f2eab64SJohn Marino p = tree->firstpos;
31785f2eab64SJohn Marino i = 0;
31795f2eab64SJohn Marino while (p->position >= 0)
31805f2eab64SJohn Marino {
31815f2eab64SJohn Marino i++;
31825f2eab64SJohn Marino
31835f2eab64SJohn Marino #ifdef TRE_DEBUG
31845f2eab64SJohn Marino {
31855f2eab64SJohn Marino int *tags;
31865f2eab64SJohn Marino DPRINT(("initial: %d", p->position));
31875f2eab64SJohn Marino tags = p->tags;
31885f2eab64SJohn Marino if (tags != NULL)
31895f2eab64SJohn Marino {
31905f2eab64SJohn Marino if (*tags >= 0)
31915f2eab64SJohn Marino DPRINT(("/"));
31925f2eab64SJohn Marino while (*tags >= 0)
31935f2eab64SJohn Marino {
31945f2eab64SJohn Marino DPRINT(("%d", *tags));
31955f2eab64SJohn Marino tags++;
31965f2eab64SJohn Marino if (*tags >= 0)
31975f2eab64SJohn Marino DPRINT((","));
31985f2eab64SJohn Marino }
31995f2eab64SJohn Marino }
32005f2eab64SJohn Marino DPRINT((", assert %d", p->assertions));
32015f2eab64SJohn Marino if (p->params)
32025f2eab64SJohn Marino {
32035f2eab64SJohn Marino DPRINT((", "));
32045f2eab64SJohn Marino tre_print_params(p->params);
32055f2eab64SJohn Marino }
32065f2eab64SJohn Marino DPRINT(("\n"));
32075f2eab64SJohn Marino }
32085f2eab64SJohn Marino #endif /* TRE_DEBUG */
32095f2eab64SJohn Marino
32105f2eab64SJohn Marino p++;
32115f2eab64SJohn Marino }
32125f2eab64SJohn Marino
32135f2eab64SJohn Marino initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
32145f2eab64SJohn Marino if (initial == NULL)
32155f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
32165f2eab64SJohn Marino tnfa->initial = initial;
32175f2eab64SJohn Marino
32185f2eab64SJohn Marino i = 0;
32195f2eab64SJohn Marino for (p = tree->firstpos; p->position >= 0; p++)
32205f2eab64SJohn Marino {
32215f2eab64SJohn Marino initial[i].state = transitions + offs[p->position];
32225f2eab64SJohn Marino initial[i].state_id = p->position;
32235f2eab64SJohn Marino initial[i].tags = NULL;
32245f2eab64SJohn Marino /* Copy the arrays p->tags, and p->params, they are allocated
32255f2eab64SJohn Marino from a tre_mem object. */
32265f2eab64SJohn Marino if (p->tags)
32275f2eab64SJohn Marino {
32285f2eab64SJohn Marino int j;
32295f2eab64SJohn Marino for (j = 0; p->tags[j] >= 0; j++);
32305f2eab64SJohn Marino initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
32315f2eab64SJohn Marino if (!initial[i].tags)
32325f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
32335f2eab64SJohn Marino memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
32345f2eab64SJohn Marino }
32355f2eab64SJohn Marino initial[i].params = NULL;
32365f2eab64SJohn Marino if (p->params)
32375f2eab64SJohn Marino {
32385f2eab64SJohn Marino initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
32395f2eab64SJohn Marino if (!initial[i].params)
32405f2eab64SJohn Marino ERROR_EXIT(REG_ESPACE);
32415f2eab64SJohn Marino memcpy(initial[i].params, p->params,
32425f2eab64SJohn Marino sizeof(*p->params) * TRE_PARAM_LAST);
32435f2eab64SJohn Marino }
32445f2eab64SJohn Marino initial[i].assertions = p->assertions;
32455f2eab64SJohn Marino i++;
32465f2eab64SJohn Marino }
32475f2eab64SJohn Marino initial[i].state = NULL;
32485f2eab64SJohn Marino
32495f2eab64SJohn Marino tnfa->num_transitions = add;
32505f2eab64SJohn Marino tnfa->final = transitions + offs[tree->lastpos[0].position];
32515f2eab64SJohn Marino tnfa->num_states = parse_ctx.position;
32525f2eab64SJohn Marino tnfa->cflags = cflags;
32535f2eab64SJohn Marino
3254*d5f8dde1SJohn Marino DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3255*d5f8dde1SJohn Marino (void *)tnfa->final));
32565f2eab64SJohn Marino
32575f2eab64SJohn Marino tre_mem_destroy(mem);
32585f2eab64SJohn Marino tre_stack_destroy(stack);
32595f2eab64SJohn Marino xfree(counts);
32605f2eab64SJohn Marino xfree(offs);
32615f2eab64SJohn Marino
3262*d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
3263*d5f8dde1SJohn Marino preg->re_magic = RE_MAGIC;
3264*d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
32655f2eab64SJohn Marino preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3266*d5f8dde1SJohn Marino xlocale_retain(tnfa->loc);
32675f2eab64SJohn Marino return REG_OK;
32685f2eab64SJohn Marino
32695f2eab64SJohn Marino error_exit:
32705f2eab64SJohn Marino /* Free everything that was allocated and return the error code. */
32715f2eab64SJohn Marino tre_mem_destroy(mem);
32725f2eab64SJohn Marino if (stack != NULL)
32735f2eab64SJohn Marino tre_stack_destroy(stack);
32745f2eab64SJohn Marino if (counts != NULL)
32755f2eab64SJohn Marino xfree(counts);
32765f2eab64SJohn Marino if (offs != NULL)
32775f2eab64SJohn Marino xfree(offs);
3278*d5f8dde1SJohn Marino
3279*d5f8dde1SJohn Marino /* Set tnfa into preg, so that calling tre_free() will free the contents
3280*d5f8dde1SJohn Marino of tnfa. NULL out the loc field since we never retained the locale. */
32815f2eab64SJohn Marino preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3282*d5f8dde1SJohn Marino if(tnfa) tnfa->loc = NULL;
3283*d5f8dde1SJohn Marino
32845f2eab64SJohn Marino tre_free(preg);
32855f2eab64SJohn Marino return errcode;
32865f2eab64SJohn Marino }
32875f2eab64SJohn Marino
32885f2eab64SJohn Marino
32895f2eab64SJohn Marino
32905f2eab64SJohn Marino
32915f2eab64SJohn Marino void
tre_free(regex_t * preg)32925f2eab64SJohn Marino tre_free(regex_t *preg)
32935f2eab64SJohn Marino {
32945f2eab64SJohn Marino tre_tnfa_t *tnfa;
32955f2eab64SJohn Marino unsigned int i;
32965f2eab64SJohn Marino tre_tnfa_transition_t *trans;
32975f2eab64SJohn Marino
3298*d5f8dde1SJohn Marino #ifdef TRE_USE_SYSTEM_REGEX_H
3299*d5f8dde1SJohn Marino preg->re_magic = 0;
3300*d5f8dde1SJohn Marino #endif /* TRE_USE_SYSTEM_REGEX_H */
33015f2eab64SJohn Marino tnfa = (void *)preg->TRE_REGEX_T_FIELD;
33025f2eab64SJohn Marino if (!tnfa)
33035f2eab64SJohn Marino return;
3304*d5f8dde1SJohn Marino preg->TRE_REGEX_T_FIELD = NULL;
33055f2eab64SJohn Marino
33065f2eab64SJohn Marino for (i = 0; i < tnfa->num_transitions; i++)
33075f2eab64SJohn Marino if (tnfa->transitions[i].state)
33085f2eab64SJohn Marino {
33095f2eab64SJohn Marino if (tnfa->transitions[i].tags)
33105f2eab64SJohn Marino xfree(tnfa->transitions[i].tags);
3311*d5f8dde1SJohn Marino if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3312*d5f8dde1SJohn Marino xfree(tnfa->transitions[i].u.bracket_match_list);
33135f2eab64SJohn Marino if (tnfa->transitions[i].params)
33145f2eab64SJohn Marino xfree(tnfa->transitions[i].params);
33155f2eab64SJohn Marino }
33165f2eab64SJohn Marino if (tnfa->transitions)
33175f2eab64SJohn Marino xfree(tnfa->transitions);
33185f2eab64SJohn Marino
33195f2eab64SJohn Marino if (tnfa->initial)
33205f2eab64SJohn Marino {
33215f2eab64SJohn Marino for (trans = tnfa->initial; trans->state; trans++)
33225f2eab64SJohn Marino {
33235f2eab64SJohn Marino if (trans->tags)
33245f2eab64SJohn Marino xfree(trans->tags);
33255f2eab64SJohn Marino if (trans->params)
33265f2eab64SJohn Marino xfree(trans->params);
33275f2eab64SJohn Marino }
33285f2eab64SJohn Marino xfree(tnfa->initial);
33295f2eab64SJohn Marino }
33305f2eab64SJohn Marino
33315f2eab64SJohn Marino if (tnfa->submatch_data)
33325f2eab64SJohn Marino {
33335f2eab64SJohn Marino xfree(tnfa->submatch_data);
33345f2eab64SJohn Marino }
33355f2eab64SJohn Marino
33365f2eab64SJohn Marino if (tnfa->tag_directions)
33375f2eab64SJohn Marino xfree(tnfa->tag_directions);
33385f2eab64SJohn Marino if (tnfa->minimal_tags)
33395f2eab64SJohn Marino xfree(tnfa->minimal_tags);
3340*d5f8dde1SJohn Marino
3341*d5f8dde1SJohn Marino if (tnfa->loc)
3342*d5f8dde1SJohn Marino xlocale_release(tnfa->loc);
3343*d5f8dde1SJohn Marino
3344*d5f8dde1SJohn Marino if (tnfa->last_matched_branch)
3345*d5f8dde1SJohn Marino xfree(tnfa->last_matched_branch);
3346*d5f8dde1SJohn Marino
33475f2eab64SJohn Marino xfree(tnfa);
33485f2eab64SJohn Marino }
33495f2eab64SJohn Marino
33505f2eab64SJohn Marino char *
tre_version(void)33515f2eab64SJohn Marino tre_version(void)
33525f2eab64SJohn Marino {
33535f2eab64SJohn Marino static char str[256];
33545f2eab64SJohn Marino char *version;
33555f2eab64SJohn Marino
33565f2eab64SJohn Marino if (str[0] == 0)
33575f2eab64SJohn Marino {
33585f2eab64SJohn Marino (void) tre_config(TRE_CONFIG_VERSION, &version);
33595f2eab64SJohn Marino (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
33605f2eab64SJohn Marino }
33615f2eab64SJohn Marino return str;
33625f2eab64SJohn Marino }
33635f2eab64SJohn Marino
33645f2eab64SJohn Marino int
tre_config(int query,void * result)33655f2eab64SJohn Marino tre_config(int query, void *result)
33665f2eab64SJohn Marino {
33675f2eab64SJohn Marino int *int_result = result;
33685f2eab64SJohn Marino const char **string_result = result;
33695f2eab64SJohn Marino
33705f2eab64SJohn Marino switch (query)
33715f2eab64SJohn Marino {
33725f2eab64SJohn Marino case TRE_CONFIG_APPROX:
33735f2eab64SJohn Marino #ifdef TRE_APPROX
33745f2eab64SJohn Marino *int_result = 1;
33755f2eab64SJohn Marino #else /* !TRE_APPROX */
33765f2eab64SJohn Marino *int_result = 0;
33775f2eab64SJohn Marino #endif /* !TRE_APPROX */
33785f2eab64SJohn Marino return REG_OK;
33795f2eab64SJohn Marino
33805f2eab64SJohn Marino case TRE_CONFIG_WCHAR:
33815f2eab64SJohn Marino #ifdef TRE_WCHAR
33825f2eab64SJohn Marino *int_result = 1;
33835f2eab64SJohn Marino #else /* !TRE_WCHAR */
33845f2eab64SJohn Marino *int_result = 0;
33855f2eab64SJohn Marino #endif /* !TRE_WCHAR */
33865f2eab64SJohn Marino return REG_OK;
33875f2eab64SJohn Marino
33885f2eab64SJohn Marino case TRE_CONFIG_MULTIBYTE:
33895f2eab64SJohn Marino #ifdef TRE_MULTIBYTE
33905f2eab64SJohn Marino *int_result = 1;
33915f2eab64SJohn Marino #else /* !TRE_MULTIBYTE */
33925f2eab64SJohn Marino *int_result = 0;
33935f2eab64SJohn Marino #endif /* !TRE_MULTIBYTE */
33945f2eab64SJohn Marino return REG_OK;
33955f2eab64SJohn Marino
33965f2eab64SJohn Marino case TRE_CONFIG_SYSTEM_ABI:
33975f2eab64SJohn Marino #ifdef TRE_CONFIG_SYSTEM_ABI
33985f2eab64SJohn Marino *int_result = 1;
33995f2eab64SJohn Marino #else /* !TRE_CONFIG_SYSTEM_ABI */
34005f2eab64SJohn Marino *int_result = 0;
34015f2eab64SJohn Marino #endif /* !TRE_CONFIG_SYSTEM_ABI */
34025f2eab64SJohn Marino return REG_OK;
34035f2eab64SJohn Marino
34045f2eab64SJohn Marino case TRE_CONFIG_VERSION:
34055f2eab64SJohn Marino *string_result = TRE_VERSION;
34065f2eab64SJohn Marino return REG_OK;
34075f2eab64SJohn Marino }
34085f2eab64SJohn Marino
34095f2eab64SJohn Marino return REG_NOMATCH;
34105f2eab64SJohn Marino }
34115f2eab64SJohn Marino
34125f2eab64SJohn Marino
34135f2eab64SJohn Marino /* EOF */
3414