xref: /netbsd-src/external/bsd/tre/dist/lib/tre-ast.c (revision 63d4abf06d37aace2f9e41a494102a64fe3abddb)
1*63d4abf0Sagc /*
2*63d4abf0Sagc   tre-ast.c - Abstract syntax tree (AST) routines
3*63d4abf0Sagc 
4*63d4abf0Sagc   This software is released under a BSD-style license.
5*63d4abf0Sagc   See the file LICENSE for details and copyright.
6*63d4abf0Sagc 
7*63d4abf0Sagc */
8*63d4abf0Sagc 
9*63d4abf0Sagc #ifdef HAVE_CONFIG_H
10*63d4abf0Sagc #include <config.h>
11*63d4abf0Sagc #endif /* HAVE_CONFIG_H */
12*63d4abf0Sagc #include <assert.h>
13*63d4abf0Sagc 
14*63d4abf0Sagc #include "tre-ast.h"
15*63d4abf0Sagc #include "tre-mem.h"
16*63d4abf0Sagc 
17*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem,tre_ast_type_t type,size_t size)18*63d4abf0Sagc tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
19*63d4abf0Sagc {
20*63d4abf0Sagc   tre_ast_node_t *node;
21*63d4abf0Sagc 
22*63d4abf0Sagc   node = tre_mem_calloc(mem, sizeof(*node));
23*63d4abf0Sagc   if (!node)
24*63d4abf0Sagc     return NULL;
25*63d4abf0Sagc   node->obj = tre_mem_calloc(mem, size);
26*63d4abf0Sagc   if (!node->obj)
27*63d4abf0Sagc     return NULL;
28*63d4abf0Sagc   node->type = type;
29*63d4abf0Sagc   node->nullable = -1;
30*63d4abf0Sagc   node->submatch_id = -1;
31*63d4abf0Sagc 
32*63d4abf0Sagc   return node;
33*63d4abf0Sagc }
34*63d4abf0Sagc 
35*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem,int code_min,int code_max,int position)36*63d4abf0Sagc tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
37*63d4abf0Sagc {
38*63d4abf0Sagc   tre_ast_node_t *node;
39*63d4abf0Sagc   tre_literal_t *lit;
40*63d4abf0Sagc 
41*63d4abf0Sagc   node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
42*63d4abf0Sagc   if (!node)
43*63d4abf0Sagc     return NULL;
44*63d4abf0Sagc   lit = node->obj;
45*63d4abf0Sagc   lit->code_min = code_min;
46*63d4abf0Sagc   lit->code_max = code_max;
47*63d4abf0Sagc   lit->position = position;
48*63d4abf0Sagc 
49*63d4abf0Sagc   return node;
50*63d4abf0Sagc }
51*63d4abf0Sagc 
52*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem,tre_ast_node_t * arg,int min,int max,int minimal)53*63d4abf0Sagc tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
54*63d4abf0Sagc 		 int minimal)
55*63d4abf0Sagc {
56*63d4abf0Sagc   tre_ast_node_t *node;
57*63d4abf0Sagc   tre_iteration_t *iter;
58*63d4abf0Sagc 
59*63d4abf0Sagc   node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
60*63d4abf0Sagc   if (!node)
61*63d4abf0Sagc     return NULL;
62*63d4abf0Sagc   iter = node->obj;
63*63d4abf0Sagc   iter->arg = arg;
64*63d4abf0Sagc   iter->min = min;
65*63d4abf0Sagc   iter->max = max;
66*63d4abf0Sagc   iter->minimal = minimal;
67*63d4abf0Sagc   node->num_submatches = arg->num_submatches;
68*63d4abf0Sagc 
69*63d4abf0Sagc   return node;
70*63d4abf0Sagc }
71*63d4abf0Sagc 
72*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)73*63d4abf0Sagc tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
74*63d4abf0Sagc {
75*63d4abf0Sagc   tre_ast_node_t *node;
76*63d4abf0Sagc 
77*63d4abf0Sagc   node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
78*63d4abf0Sagc   if (node == NULL)
79*63d4abf0Sagc     return NULL;
80*63d4abf0Sagc   ((tre_union_t *)node->obj)->left = left;
81*63d4abf0Sagc   ((tre_union_t *)node->obj)->right = right;
82*63d4abf0Sagc   node->num_submatches = left->num_submatches + right->num_submatches;
83*63d4abf0Sagc 
84*63d4abf0Sagc   return node;
85*63d4abf0Sagc }
86*63d4abf0Sagc 
87*63d4abf0Sagc tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)88*63d4abf0Sagc tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
89*63d4abf0Sagc 		       tre_ast_node_t *right)
90*63d4abf0Sagc {
91*63d4abf0Sagc   tre_ast_node_t *node;
92*63d4abf0Sagc 
93*63d4abf0Sagc   node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
94*63d4abf0Sagc   if (node == NULL)
95*63d4abf0Sagc     return NULL;
96*63d4abf0Sagc   ((tre_catenation_t *)node->obj)->left = left;
97*63d4abf0Sagc   ((tre_catenation_t *)node->obj)->right = right;
98*63d4abf0Sagc   node->num_submatches = left->num_submatches + right->num_submatches;
99*63d4abf0Sagc 
100*63d4abf0Sagc   return node;
101*63d4abf0Sagc }
102*63d4abf0Sagc 
103*63d4abf0Sagc #ifdef TRE_DEBUG
104*63d4abf0Sagc 
105*63d4abf0Sagc static void
tre_findent(FILE * stream,int i)106*63d4abf0Sagc tre_findent(FILE *stream, int i)
107*63d4abf0Sagc {
108*63d4abf0Sagc   while (i-- > 0)
109*63d4abf0Sagc     fputc(' ', stream);
110*63d4abf0Sagc }
111*63d4abf0Sagc 
112*63d4abf0Sagc void
tre_print_params(int * params)113*63d4abf0Sagc tre_print_params(int *params)
114*63d4abf0Sagc {
115*63d4abf0Sagc   int i;
116*63d4abf0Sagc   if (params)
117*63d4abf0Sagc     {
118*63d4abf0Sagc       DPRINT(("params ["));
119*63d4abf0Sagc       for (i = 0; i < TRE_PARAM_LAST; i++)
120*63d4abf0Sagc 	{
121*63d4abf0Sagc 	  if (params[i] == TRE_PARAM_UNSET)
122*63d4abf0Sagc 	    DPRINT(("unset"));
123*63d4abf0Sagc 	  else if (params[i] == TRE_PARAM_DEFAULT)
124*63d4abf0Sagc 	    DPRINT(("default"));
125*63d4abf0Sagc 	  else
126*63d4abf0Sagc 	    DPRINT(("%d", params[i]));
127*63d4abf0Sagc 	  if (i < TRE_PARAM_LAST - 1)
128*63d4abf0Sagc 	    DPRINT((", "));
129*63d4abf0Sagc 	}
130*63d4abf0Sagc       DPRINT(("]"));
131*63d4abf0Sagc     }
132*63d4abf0Sagc }
133*63d4abf0Sagc 
134*63d4abf0Sagc static void
tre_do_print(FILE * stream,tre_ast_node_t * ast,int indent)135*63d4abf0Sagc tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
136*63d4abf0Sagc {
137*63d4abf0Sagc   int code_min, code_max, pos;
138*63d4abf0Sagc   int num_tags = ast->num_tags;
139*63d4abf0Sagc   tre_literal_t *lit;
140*63d4abf0Sagc   tre_iteration_t *iter;
141*63d4abf0Sagc 
142*63d4abf0Sagc   tre_findent(stream, indent);
143*63d4abf0Sagc   switch (ast->type)
144*63d4abf0Sagc     {
145*63d4abf0Sagc     case LITERAL:
146*63d4abf0Sagc       lit = ast->obj;
147*63d4abf0Sagc       code_min = lit->code_min;
148*63d4abf0Sagc       code_max = lit->code_max;
149*63d4abf0Sagc       pos = lit->position;
150*63d4abf0Sagc       if (IS_EMPTY(lit))
151*63d4abf0Sagc 	{
152*63d4abf0Sagc 	  fprintf(stream, "literal empty\n");
153*63d4abf0Sagc 	}
154*63d4abf0Sagc       else if (IS_ASSERTION(lit))
155*63d4abf0Sagc 	{
156*63d4abf0Sagc 	  int i;
157*63d4abf0Sagc 	  char *assertions[] = { "bol", "eol", "ctype", "!ctype",
158*63d4abf0Sagc 				 "bow", "eow", "wb", "!wb" };
159*63d4abf0Sagc 	  if (code_max >= ASSERT_LAST << 1)
160*63d4abf0Sagc 	    assert(0);
161*63d4abf0Sagc 	  fprintf(stream, "assertions: ");
162*63d4abf0Sagc 	  for (i = 0; (1 << i) <= ASSERT_LAST; i++)
163*63d4abf0Sagc 	    if (code_max & (1 << i))
164*63d4abf0Sagc 	      fprintf(stream, "%s ", assertions[i]);
165*63d4abf0Sagc 	  fprintf(stream, "\n");
166*63d4abf0Sagc 	}
167*63d4abf0Sagc       else if (IS_TAG(lit))
168*63d4abf0Sagc 	{
169*63d4abf0Sagc 	  fprintf(stream, "tag %d\n", code_max);
170*63d4abf0Sagc 	}
171*63d4abf0Sagc       else if (IS_BACKREF(lit))
172*63d4abf0Sagc 	{
173*63d4abf0Sagc 	  fprintf(stream, "backref %d, pos %d\n", code_max, pos);
174*63d4abf0Sagc 	}
175*63d4abf0Sagc       else if (IS_PARAMETER(lit))
176*63d4abf0Sagc 	{
177*63d4abf0Sagc 	  tre_print_params(lit->u.params);
178*63d4abf0Sagc 	  fprintf(stream, "\n");
179*63d4abf0Sagc 	}
180*63d4abf0Sagc       else
181*63d4abf0Sagc 	{
182*63d4abf0Sagc 	  fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
183*63d4abf0Sagc 		  "%d tags\n", code_min, code_max, code_min, code_max, pos,
184*63d4abf0Sagc 		  ast->submatch_id, num_tags);
185*63d4abf0Sagc 	}
186*63d4abf0Sagc       break;
187*63d4abf0Sagc     case ITERATION:
188*63d4abf0Sagc       iter = ast->obj;
189*63d4abf0Sagc       fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
190*63d4abf0Sagc 	      iter->min, iter->max, ast->submatch_id, num_tags,
191*63d4abf0Sagc 	      iter->minimal ? "minimal" : "greedy");
192*63d4abf0Sagc       tre_do_print(stream, iter->arg, indent + 2);
193*63d4abf0Sagc       break;
194*63d4abf0Sagc     case UNION:
195*63d4abf0Sagc       fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
196*63d4abf0Sagc       tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
197*63d4abf0Sagc       tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
198*63d4abf0Sagc       break;
199*63d4abf0Sagc     case CATENATION:
200*63d4abf0Sagc       fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
201*63d4abf0Sagc 	      num_tags);
202*63d4abf0Sagc       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
203*63d4abf0Sagc       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
204*63d4abf0Sagc       break;
205*63d4abf0Sagc     default:
206*63d4abf0Sagc       assert(0);
207*63d4abf0Sagc       break;
208*63d4abf0Sagc     }
209*63d4abf0Sagc }
210*63d4abf0Sagc 
211*63d4abf0Sagc static void
tre_ast_fprint(FILE * stream,tre_ast_node_t * ast)212*63d4abf0Sagc tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
213*63d4abf0Sagc {
214*63d4abf0Sagc   tre_do_print(stream, ast, 0);
215*63d4abf0Sagc }
216*63d4abf0Sagc 
217*63d4abf0Sagc void
tre_ast_print(tre_ast_node_t * tree)218*63d4abf0Sagc tre_ast_print(tre_ast_node_t *tree)
219*63d4abf0Sagc {
220*63d4abf0Sagc   printf("AST:\n");
221*63d4abf0Sagc   tre_ast_fprint(stdout, tree);
222*63d4abf0Sagc }
223*63d4abf0Sagc 
224*63d4abf0Sagc #endif /* TRE_DEBUG */
225*63d4abf0Sagc 
226*63d4abf0Sagc /* EOF */
227