15f2eab64SJohn Marino /* 25f2eab64SJohn Marino tre-ast.h - Abstract syntax tree (AST) definitions 35f2eab64SJohn Marino 45f2eab64SJohn Marino This software is released under a BSD-style license. 55f2eab64SJohn Marino See the file LICENSE for details and copyright. 65f2eab64SJohn Marino 75f2eab64SJohn Marino */ 85f2eab64SJohn Marino 95f2eab64SJohn Marino 105f2eab64SJohn Marino #ifndef TRE_AST_H 115f2eab64SJohn Marino #define TRE_AST_H 1 125f2eab64SJohn Marino 13*d5f8dde1SJohn Marino #include <limits.h> 14*d5f8dde1SJohn Marino 155f2eab64SJohn Marino #include "tre-mem.h" 165f2eab64SJohn Marino #include "tre-internal.h" 175f2eab64SJohn Marino #include "tre-compile.h" 185f2eab64SJohn Marino 195f2eab64SJohn Marino /* The different AST node types. */ 205f2eab64SJohn Marino typedef enum { 215f2eab64SJohn Marino LITERAL, 225f2eab64SJohn Marino CATENATION, 235f2eab64SJohn Marino ITERATION, 245f2eab64SJohn Marino UNION 255f2eab64SJohn Marino } tre_ast_type_t; 265f2eab64SJohn Marino 275f2eab64SJohn Marino /* Special subtypes of TRE_LITERAL. */ 285f2eab64SJohn Marino #define EMPTY -1 /* Empty leaf (denotes empty string). */ 295f2eab64SJohn Marino #define ASSERTION -2 /* Assertion leaf. */ 305f2eab64SJohn Marino #define TAG -3 /* Tag leaf. */ 315f2eab64SJohn Marino #define BACKREF -4 /* Back reference leaf. */ 325f2eab64SJohn Marino #define PARAMETER -5 /* Parameter. */ 335f2eab64SJohn Marino 345f2eab64SJohn Marino #define IS_SPECIAL(x) ((x)->code_min < 0) 355f2eab64SJohn Marino #define IS_EMPTY(x) ((x)->code_min == EMPTY) 365f2eab64SJohn Marino #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) 375f2eab64SJohn Marino #define IS_TAG(x) ((x)->code_min == TAG) 385f2eab64SJohn Marino #define IS_BACKREF(x) ((x)->code_min == BACKREF) 395f2eab64SJohn Marino #define IS_PARAMETER(x) ((x)->code_min == PARAMETER) 405f2eab64SJohn Marino 41*d5f8dde1SJohn Marino #define SUBMATCH_ID_INVISIBLE_START (INT_MAX / 2 + 1) 42*d5f8dde1SJohn Marino 435f2eab64SJohn Marino 445f2eab64SJohn Marino /* A generic AST node. All AST nodes consist of this node on the top 455f2eab64SJohn Marino level with `obj' pointing to the actual content. */ 46*d5f8dde1SJohn Marino typedef struct _tre_ast_node { 475f2eab64SJohn Marino void *obj; /* Pointer to actual node. */ 48*d5f8dde1SJohn Marino tre_last_matched_branch_pre_t *last_matched_branch; 49*d5f8dde1SJohn Marino tre_last_matched_pre_t *last_matched_in_progress; 50*d5f8dde1SJohn Marino tre_pos_and_tags_t *firstpos; 51*d5f8dde1SJohn Marino tre_pos_and_tags_t *lastpos; 52*d5f8dde1SJohn Marino /* The original pointer is used to point to the original node, when a 53*d5f8dde1SJohn Marino * CATENATION and tag are added. If NULL, this is node is the original */ 54*d5f8dde1SJohn Marino struct _tre_ast_node *original; 55*d5f8dde1SJohn Marino tre_ast_type_t type; /* Type of the node. */ 565f2eab64SJohn Marino int submatch_id; 575f2eab64SJohn Marino int num_submatches; 585f2eab64SJohn Marino int num_tags; 59*d5f8dde1SJohn Marino short nullable; 60*d5f8dde1SJohn Marino short make_branches; 615f2eab64SJohn Marino } tre_ast_node_t; 625f2eab64SJohn Marino 635f2eab64SJohn Marino 645f2eab64SJohn Marino /* A "literal" node. These are created for assertions, back references, 655f2eab64SJohn Marino tags, matching parameter settings, and all expressions that match one 665f2eab64SJohn Marino character. */ 675f2eab64SJohn Marino typedef struct { 68*d5f8dde1SJohn Marino tre_cint_t code_min; 69*d5f8dde1SJohn Marino tre_cint_t code_max; 705f2eab64SJohn Marino int position; 715f2eab64SJohn Marino union { 72*d5f8dde1SJohn Marino tre_bracket_match_list_t *bracket_match_list; 735f2eab64SJohn Marino int *params; 745f2eab64SJohn Marino } u; 755f2eab64SJohn Marino } tre_literal_t; 765f2eab64SJohn Marino 775f2eab64SJohn Marino /* A "catenation" node. These are created when two regexps are concatenated. 785f2eab64SJohn Marino If there are more than one subexpressions in sequence, the `left' part 795f2eab64SJohn Marino holds all but the last, and `right' part holds the last subexpression 805f2eab64SJohn Marino (catenation is left associative). */ 815f2eab64SJohn Marino typedef struct { 825f2eab64SJohn Marino tre_ast_node_t *left; 835f2eab64SJohn Marino tre_ast_node_t *right; 845f2eab64SJohn Marino } tre_catenation_t; 855f2eab64SJohn Marino 865f2eab64SJohn Marino /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" 875f2eab64SJohn Marino operators. */ 885f2eab64SJohn Marino typedef struct { 895f2eab64SJohn Marino /* Subexpression to match. */ 905f2eab64SJohn Marino tre_ast_node_t *arg; 915f2eab64SJohn Marino /* Minimum number of consecutive matches. */ 925f2eab64SJohn Marino int min; 935f2eab64SJohn Marino /* Maximum number of consecutive matches. */ 945f2eab64SJohn Marino int max; 955f2eab64SJohn Marino /* If 0, match as many characters as possible, if 1 match as few as 965f2eab64SJohn Marino possible. Note that this does not always mean the same thing as 975f2eab64SJohn Marino matching as many/few repetitions as possible. */ 985f2eab64SJohn Marino unsigned int minimal:1; 995f2eab64SJohn Marino /* Approximate matching parameters (or NULL). */ 1005f2eab64SJohn Marino int *params; 1015f2eab64SJohn Marino } tre_iteration_t; 1025f2eab64SJohn Marino 1035f2eab64SJohn Marino /* An "union" node. These are created for the "|" operator. */ 1045f2eab64SJohn Marino typedef struct { 1055f2eab64SJohn Marino tre_ast_node_t *left; 1065f2eab64SJohn Marino tre_ast_node_t *right; 107*d5f8dde1SJohn Marino /* The left end right end tags if non-zero */ 108*d5f8dde1SJohn Marino int left_tag; 109*d5f8dde1SJohn Marino int right_tag; 1105f2eab64SJohn Marino } tre_union_t; 1115f2eab64SJohn Marino 1125f2eab64SJohn Marino tre_ast_node_t * 1135f2eab64SJohn Marino tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); 1145f2eab64SJohn Marino 1155f2eab64SJohn Marino tre_ast_node_t * 1165f2eab64SJohn Marino tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); 1175f2eab64SJohn Marino 1185f2eab64SJohn Marino tre_ast_node_t * 1195f2eab64SJohn Marino tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, 1205f2eab64SJohn Marino int minimal); 1215f2eab64SJohn Marino 1225f2eab64SJohn Marino tre_ast_node_t * 1235f2eab64SJohn Marino tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); 1245f2eab64SJohn Marino 1255f2eab64SJohn Marino tre_ast_node_t * 1265f2eab64SJohn Marino tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, 1275f2eab64SJohn Marino tre_ast_node_t *right); 1285f2eab64SJohn Marino 1295f2eab64SJohn Marino #ifdef TRE_DEBUG 1305f2eab64SJohn Marino void 1315f2eab64SJohn Marino tre_ast_print(tre_ast_node_t *tree); 1325f2eab64SJohn Marino 1335f2eab64SJohn Marino /* XXX - rethink AST printing API */ 1345f2eab64SJohn Marino void 1355f2eab64SJohn Marino tre_print_params(int *params); 1365f2eab64SJohn Marino #endif /* TRE_DEBUG */ 1375f2eab64SJohn Marino 1385f2eab64SJohn Marino #endif /* TRE_AST_H */ 1395f2eab64SJohn Marino 1405f2eab64SJohn Marino /* EOF */ 141