1*63d4abf0Sagc /*
2*63d4abf0Sagc tre-ast.c - Abstract syntax tree (AST) routines
3*63d4abf0Sagc
4*63d4abf0Sagc This software is released under a BSD-style license.
5*63d4abf0Sagc See the file LICENSE for details and copyright.
6*63d4abf0Sagc
7*63d4abf0Sagc */
8*63d4abf0Sagc
9*63d4abf0Sagc #ifdef HAVE_CONFIG_H
10*63d4abf0Sagc #include <config.h>
11*63d4abf0Sagc #endif /* HAVE_CONFIG_H */
12*63d4abf0Sagc #include <assert.h>
13*63d4abf0Sagc
14*63d4abf0Sagc #include "tre-ast.h"
15*63d4abf0Sagc #include "tre-mem.h"
16*63d4abf0Sagc
17*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem,tre_ast_type_t type,size_t size)18*63d4abf0Sagc tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
19*63d4abf0Sagc {
20*63d4abf0Sagc tre_ast_node_t *node;
21*63d4abf0Sagc
22*63d4abf0Sagc node = tre_mem_calloc(mem, sizeof(*node));
23*63d4abf0Sagc if (!node)
24*63d4abf0Sagc return NULL;
25*63d4abf0Sagc node->obj = tre_mem_calloc(mem, size);
26*63d4abf0Sagc if (!node->obj)
27*63d4abf0Sagc return NULL;
28*63d4abf0Sagc node->type = type;
29*63d4abf0Sagc node->nullable = -1;
30*63d4abf0Sagc node->submatch_id = -1;
31*63d4abf0Sagc
32*63d4abf0Sagc return node;
33*63d4abf0Sagc }
34*63d4abf0Sagc
35*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem,int code_min,int code_max,int position)36*63d4abf0Sagc tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
37*63d4abf0Sagc {
38*63d4abf0Sagc tre_ast_node_t *node;
39*63d4abf0Sagc tre_literal_t *lit;
40*63d4abf0Sagc
41*63d4abf0Sagc node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
42*63d4abf0Sagc if (!node)
43*63d4abf0Sagc return NULL;
44*63d4abf0Sagc lit = node->obj;
45*63d4abf0Sagc lit->code_min = code_min;
46*63d4abf0Sagc lit->code_max = code_max;
47*63d4abf0Sagc lit->position = position;
48*63d4abf0Sagc
49*63d4abf0Sagc return node;
50*63d4abf0Sagc }
51*63d4abf0Sagc
52*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem,tre_ast_node_t * arg,int min,int max,int minimal)53*63d4abf0Sagc tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
54*63d4abf0Sagc int minimal)
55*63d4abf0Sagc {
56*63d4abf0Sagc tre_ast_node_t *node;
57*63d4abf0Sagc tre_iteration_t *iter;
58*63d4abf0Sagc
59*63d4abf0Sagc node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
60*63d4abf0Sagc if (!node)
61*63d4abf0Sagc return NULL;
62*63d4abf0Sagc iter = node->obj;
63*63d4abf0Sagc iter->arg = arg;
64*63d4abf0Sagc iter->min = min;
65*63d4abf0Sagc iter->max = max;
66*63d4abf0Sagc iter->minimal = minimal;
67*63d4abf0Sagc node->num_submatches = arg->num_submatches;
68*63d4abf0Sagc
69*63d4abf0Sagc return node;
70*63d4abf0Sagc }
71*63d4abf0Sagc
72*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)73*63d4abf0Sagc tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
74*63d4abf0Sagc {
75*63d4abf0Sagc tre_ast_node_t *node;
76*63d4abf0Sagc
77*63d4abf0Sagc node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
78*63d4abf0Sagc if (node == NULL)
79*63d4abf0Sagc return NULL;
80*63d4abf0Sagc ((tre_union_t *)node->obj)->left = left;
81*63d4abf0Sagc ((tre_union_t *)node->obj)->right = right;
82*63d4abf0Sagc node->num_submatches = left->num_submatches + right->num_submatches;
83*63d4abf0Sagc
84*63d4abf0Sagc return node;
85*63d4abf0Sagc }
86*63d4abf0Sagc
87*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)88*63d4abf0Sagc tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
89*63d4abf0Sagc tre_ast_node_t *right)
90*63d4abf0Sagc {
91*63d4abf0Sagc tre_ast_node_t *node;
92*63d4abf0Sagc
93*63d4abf0Sagc node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
94*63d4abf0Sagc if (node == NULL)
95*63d4abf0Sagc return NULL;
96*63d4abf0Sagc ((tre_catenation_t *)node->obj)->left = left;
97*63d4abf0Sagc ((tre_catenation_t *)node->obj)->right = right;
98*63d4abf0Sagc node->num_submatches = left->num_submatches + right->num_submatches;
99*63d4abf0Sagc
100*63d4abf0Sagc return node;
101*63d4abf0Sagc }
102*63d4abf0Sagc
103*63d4abf0Sagc #ifdef TRE_DEBUG
104*63d4abf0Sagc
105*63d4abf0Sagc static void
tre_findent(FILE * stream,int i)106*63d4abf0Sagc tre_findent(FILE *stream, int i)
107*63d4abf0Sagc {
108*63d4abf0Sagc while (i-- > 0)
109*63d4abf0Sagc fputc(' ', stream);
110*63d4abf0Sagc }
111*63d4abf0Sagc
112*63d4abf0Sagc void
tre_print_params(int * params)113*63d4abf0Sagc tre_print_params(int *params)
114*63d4abf0Sagc {
115*63d4abf0Sagc int i;
116*63d4abf0Sagc if (params)
117*63d4abf0Sagc {
118*63d4abf0Sagc DPRINT(("params ["));
119*63d4abf0Sagc for (i = 0; i < TRE_PARAM_LAST; i++)
120*63d4abf0Sagc {
121*63d4abf0Sagc if (params[i] == TRE_PARAM_UNSET)
122*63d4abf0Sagc DPRINT(("unset"));
123*63d4abf0Sagc else if (params[i] == TRE_PARAM_DEFAULT)
124*63d4abf0Sagc DPRINT(("default"));
125*63d4abf0Sagc else
126*63d4abf0Sagc DPRINT(("%d", params[i]));
127*63d4abf0Sagc if (i < TRE_PARAM_LAST - 1)
128*63d4abf0Sagc DPRINT((", "));
129*63d4abf0Sagc }
130*63d4abf0Sagc DPRINT(("]"));
131*63d4abf0Sagc }
132*63d4abf0Sagc }
133*63d4abf0Sagc
134*63d4abf0Sagc static void
tre_do_print(FILE * stream,tre_ast_node_t * ast,int indent)135*63d4abf0Sagc tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
136*63d4abf0Sagc {
137*63d4abf0Sagc int code_min, code_max, pos;
138*63d4abf0Sagc int num_tags = ast->num_tags;
139*63d4abf0Sagc tre_literal_t *lit;
140*63d4abf0Sagc tre_iteration_t *iter;
141*63d4abf0Sagc
142*63d4abf0Sagc tre_findent(stream, indent);
143*63d4abf0Sagc switch (ast->type)
144*63d4abf0Sagc {
145*63d4abf0Sagc case LITERAL:
146*63d4abf0Sagc lit = ast->obj;
147*63d4abf0Sagc code_min = lit->code_min;
148*63d4abf0Sagc code_max = lit->code_max;
149*63d4abf0Sagc pos = lit->position;
150*63d4abf0Sagc if (IS_EMPTY(lit))
151*63d4abf0Sagc {
152*63d4abf0Sagc fprintf(stream, "literal empty\n");
153*63d4abf0Sagc }
154*63d4abf0Sagc else if (IS_ASSERTION(lit))
155*63d4abf0Sagc {
156*63d4abf0Sagc int i;
157*63d4abf0Sagc char *assertions[] = { "bol", "eol", "ctype", "!ctype",
158*63d4abf0Sagc "bow", "eow", "wb", "!wb" };
159*63d4abf0Sagc if (code_max >= ASSERT_LAST << 1)
160*63d4abf0Sagc assert(0);
161*63d4abf0Sagc fprintf(stream, "assertions: ");
162*63d4abf0Sagc for (i = 0; (1 << i) <= ASSERT_LAST; i++)
163*63d4abf0Sagc if (code_max & (1 << i))
164*63d4abf0Sagc fprintf(stream, "%s ", assertions[i]);
165*63d4abf0Sagc fprintf(stream, "\n");
166*63d4abf0Sagc }
167*63d4abf0Sagc else if (IS_TAG(lit))
168*63d4abf0Sagc {
169*63d4abf0Sagc fprintf(stream, "tag %d\n", code_max);
170*63d4abf0Sagc }
171*63d4abf0Sagc else if (IS_BACKREF(lit))
172*63d4abf0Sagc {
173*63d4abf0Sagc fprintf(stream, "backref %d, pos %d\n", code_max, pos);
174*63d4abf0Sagc }
175*63d4abf0Sagc else if (IS_PARAMETER(lit))
176*63d4abf0Sagc {
177*63d4abf0Sagc tre_print_params(lit->u.params);
178*63d4abf0Sagc fprintf(stream, "\n");
179*63d4abf0Sagc }
180*63d4abf0Sagc else
181*63d4abf0Sagc {
182*63d4abf0Sagc fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
183*63d4abf0Sagc "%d tags\n", code_min, code_max, code_min, code_max, pos,
184*63d4abf0Sagc ast->submatch_id, num_tags);
185*63d4abf0Sagc }
186*63d4abf0Sagc break;
187*63d4abf0Sagc case ITERATION:
188*63d4abf0Sagc iter = ast->obj;
189*63d4abf0Sagc fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
190*63d4abf0Sagc iter->min, iter->max, ast->submatch_id, num_tags,
191*63d4abf0Sagc iter->minimal ? "minimal" : "greedy");
192*63d4abf0Sagc tre_do_print(stream, iter->arg, indent + 2);
193*63d4abf0Sagc break;
194*63d4abf0Sagc case UNION:
195*63d4abf0Sagc fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
196*63d4abf0Sagc tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
197*63d4abf0Sagc tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
198*63d4abf0Sagc break;
199*63d4abf0Sagc case CATENATION:
200*63d4abf0Sagc fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
201*63d4abf0Sagc num_tags);
202*63d4abf0Sagc tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
203*63d4abf0Sagc tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
204*63d4abf0Sagc break;
205*63d4abf0Sagc default:
206*63d4abf0Sagc assert(0);
207*63d4abf0Sagc break;
208*63d4abf0Sagc }
209*63d4abf0Sagc }
210*63d4abf0Sagc
211*63d4abf0Sagc static void
tre_ast_fprint(FILE * stream,tre_ast_node_t * ast)212*63d4abf0Sagc tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
213*63d4abf0Sagc {
214*63d4abf0Sagc tre_do_print(stream, ast, 0);
215*63d4abf0Sagc }
216*63d4abf0Sagc
217*63d4abf0Sagc void
tre_ast_print(tre_ast_node_t * tree)218*63d4abf0Sagc tre_ast_print(tre_ast_node_t *tree)
219*63d4abf0Sagc {
220*63d4abf0Sagc printf("AST:\n");
221*63d4abf0Sagc tre_ast_fprint(stdout, tree);
222*63d4abf0Sagc }
223*63d4abf0Sagc
224*63d4abf0Sagc #endif /* TRE_DEBUG */
225*63d4abf0Sagc
226*63d4abf0Sagc /* EOF */
227