xref: /dflybsd-src/contrib/tre/lib/tre-ast.h (revision d5f8dde1e2bae450329b3ee8c25ef109b5713f07)
15f2eab64SJohn Marino /*
25f2eab64SJohn Marino   tre-ast.h - Abstract syntax tree (AST) definitions
35f2eab64SJohn Marino 
45f2eab64SJohn Marino   This software is released under a BSD-style license.
55f2eab64SJohn Marino   See the file LICENSE for details and copyright.
65f2eab64SJohn Marino 
75f2eab64SJohn Marino */
85f2eab64SJohn Marino 
95f2eab64SJohn Marino 
105f2eab64SJohn Marino #ifndef TRE_AST_H
115f2eab64SJohn Marino #define TRE_AST_H 1
125f2eab64SJohn Marino 
13*d5f8dde1SJohn Marino #include <limits.h>
14*d5f8dde1SJohn Marino 
155f2eab64SJohn Marino #include "tre-mem.h"
165f2eab64SJohn Marino #include "tre-internal.h"
175f2eab64SJohn Marino #include "tre-compile.h"
185f2eab64SJohn Marino 
195f2eab64SJohn Marino /* The different AST node types. */
205f2eab64SJohn Marino typedef enum {
215f2eab64SJohn Marino   LITERAL,
225f2eab64SJohn Marino   CATENATION,
235f2eab64SJohn Marino   ITERATION,
245f2eab64SJohn Marino   UNION
255f2eab64SJohn Marino } tre_ast_type_t;
265f2eab64SJohn Marino 
275f2eab64SJohn Marino /* Special subtypes of TRE_LITERAL. */
285f2eab64SJohn Marino #define EMPTY	  -1   /* Empty leaf (denotes empty string). */
295f2eab64SJohn Marino #define ASSERTION -2   /* Assertion leaf. */
305f2eab64SJohn Marino #define TAG	  -3   /* Tag leaf. */
315f2eab64SJohn Marino #define BACKREF	  -4   /* Back reference leaf. */
325f2eab64SJohn Marino #define PARAMETER -5   /* Parameter. */
335f2eab64SJohn Marino 
345f2eab64SJohn Marino #define IS_SPECIAL(x)	((x)->code_min < 0)
355f2eab64SJohn Marino #define IS_EMPTY(x)	((x)->code_min == EMPTY)
365f2eab64SJohn Marino #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
375f2eab64SJohn Marino #define IS_TAG(x)	((x)->code_min == TAG)
385f2eab64SJohn Marino #define IS_BACKREF(x)	((x)->code_min == BACKREF)
395f2eab64SJohn Marino #define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
405f2eab64SJohn Marino 
41*d5f8dde1SJohn Marino #define SUBMATCH_ID_INVISIBLE_START	(INT_MAX / 2 + 1)
42*d5f8dde1SJohn Marino 
435f2eab64SJohn Marino 
445f2eab64SJohn Marino /* A generic AST node.  All AST nodes consist of this node on the top
455f2eab64SJohn Marino    level with `obj' pointing to the actual content. */
46*d5f8dde1SJohn Marino typedef struct _tre_ast_node {
475f2eab64SJohn Marino   void *obj;             /* Pointer to actual node. */
48*d5f8dde1SJohn Marino   tre_last_matched_branch_pre_t *last_matched_branch;
49*d5f8dde1SJohn Marino   tre_last_matched_pre_t *last_matched_in_progress;
50*d5f8dde1SJohn Marino   tre_pos_and_tags_t *firstpos;
51*d5f8dde1SJohn Marino   tre_pos_and_tags_t *lastpos;
52*d5f8dde1SJohn Marino   /* The original pointer is used to point to the original node, when a
53*d5f8dde1SJohn Marino    * CATENATION and tag are added.  If NULL, this is node is the original */
54*d5f8dde1SJohn Marino   struct _tre_ast_node *original;
55*d5f8dde1SJohn Marino   tre_ast_type_t type;   /* Type of the node. */
565f2eab64SJohn Marino   int submatch_id;
575f2eab64SJohn Marino   int num_submatches;
585f2eab64SJohn Marino   int num_tags;
59*d5f8dde1SJohn Marino   short nullable;
60*d5f8dde1SJohn Marino   short make_branches;
615f2eab64SJohn Marino } tre_ast_node_t;
625f2eab64SJohn Marino 
635f2eab64SJohn Marino 
645f2eab64SJohn Marino /* A "literal" node.  These are created for assertions, back references,
655f2eab64SJohn Marino    tags, matching parameter settings, and all expressions that match one
665f2eab64SJohn Marino    character. */
675f2eab64SJohn Marino typedef struct {
68*d5f8dde1SJohn Marino   tre_cint_t code_min;
69*d5f8dde1SJohn Marino   tre_cint_t code_max;
705f2eab64SJohn Marino   int position;
715f2eab64SJohn Marino   union {
72*d5f8dde1SJohn Marino     tre_bracket_match_list_t *bracket_match_list;
735f2eab64SJohn Marino     int *params;
745f2eab64SJohn Marino   } u;
755f2eab64SJohn Marino } tre_literal_t;
765f2eab64SJohn Marino 
775f2eab64SJohn Marino /* A "catenation" node.	 These are created when two regexps are concatenated.
785f2eab64SJohn Marino    If there are more than one subexpressions in sequence, the `left' part
795f2eab64SJohn Marino    holds all but the last, and `right' part holds the last subexpression
805f2eab64SJohn Marino    (catenation is left associative). */
815f2eab64SJohn Marino typedef struct {
825f2eab64SJohn Marino   tre_ast_node_t *left;
835f2eab64SJohn Marino   tre_ast_node_t *right;
845f2eab64SJohn Marino } tre_catenation_t;
855f2eab64SJohn Marino 
865f2eab64SJohn Marino /* An "iteration" node.	 These are created for the "*", "+", "?", and "{m,n}"
875f2eab64SJohn Marino    operators. */
885f2eab64SJohn Marino typedef struct {
895f2eab64SJohn Marino   /* Subexpression to match. */
905f2eab64SJohn Marino   tre_ast_node_t *arg;
915f2eab64SJohn Marino   /* Minimum number of consecutive matches. */
925f2eab64SJohn Marino   int min;
935f2eab64SJohn Marino   /* Maximum number of consecutive matches. */
945f2eab64SJohn Marino   int max;
955f2eab64SJohn Marino   /* If 0, match as many characters as possible, if 1 match as few as
965f2eab64SJohn Marino      possible.	Note that this does not always mean the same thing as
975f2eab64SJohn Marino      matching as many/few repetitions as possible. */
985f2eab64SJohn Marino   unsigned int minimal:1;
995f2eab64SJohn Marino   /* Approximate matching parameters (or NULL). */
1005f2eab64SJohn Marino   int *params;
1015f2eab64SJohn Marino } tre_iteration_t;
1025f2eab64SJohn Marino 
1035f2eab64SJohn Marino /* An "union" node.  These are created for the "|" operator. */
1045f2eab64SJohn Marino typedef struct {
1055f2eab64SJohn Marino   tre_ast_node_t *left;
1065f2eab64SJohn Marino   tre_ast_node_t *right;
107*d5f8dde1SJohn Marino   /* The left end right end tags if non-zero */
108*d5f8dde1SJohn Marino   int left_tag;
109*d5f8dde1SJohn Marino   int right_tag;
1105f2eab64SJohn Marino } tre_union_t;
1115f2eab64SJohn Marino 
1125f2eab64SJohn Marino tre_ast_node_t *
1135f2eab64SJohn Marino tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
1145f2eab64SJohn Marino 
1155f2eab64SJohn Marino tre_ast_node_t *
1165f2eab64SJohn Marino tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
1175f2eab64SJohn Marino 
1185f2eab64SJohn Marino tre_ast_node_t *
1195f2eab64SJohn Marino tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
1205f2eab64SJohn Marino 		 int minimal);
1215f2eab64SJohn Marino 
1225f2eab64SJohn Marino tre_ast_node_t *
1235f2eab64SJohn Marino tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
1245f2eab64SJohn Marino 
1255f2eab64SJohn Marino tre_ast_node_t *
1265f2eab64SJohn Marino tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
1275f2eab64SJohn Marino 		       tre_ast_node_t *right);
1285f2eab64SJohn Marino 
1295f2eab64SJohn Marino #ifdef TRE_DEBUG
1305f2eab64SJohn Marino void
1315f2eab64SJohn Marino tre_ast_print(tre_ast_node_t *tree);
1325f2eab64SJohn Marino 
1335f2eab64SJohn Marino /* XXX - rethink AST printing API */
1345f2eab64SJohn Marino void
1355f2eab64SJohn Marino tre_print_params(int *params);
1365f2eab64SJohn Marino #endif /* TRE_DEBUG */
1375f2eab64SJohn Marino 
1385f2eab64SJohn Marino #endif /* TRE_AST_H */
1395f2eab64SJohn Marino 
1405f2eab64SJohn Marino /* EOF */
141