186d7f5d3SJohn Marino /* $NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $ */
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino /*
486d7f5d3SJohn Marino * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
586d7f5d3SJohn Marino * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
686d7f5d3SJohn Marino *
786d7f5d3SJohn Marino * This file is part of the device-mapper userspace tools.
886d7f5d3SJohn Marino *
986d7f5d3SJohn Marino * This copyrighted material is made available to anyone wishing to use,
1086d7f5d3SJohn Marino * modify, copy, or redistribute it subject to the terms and conditions
1186d7f5d3SJohn Marino * of the GNU Lesser General Public License v.2.1.
1286d7f5d3SJohn Marino *
1386d7f5d3SJohn Marino * You should have received a copy of the GNU Lesser General Public License
1486d7f5d3SJohn Marino * along with this program; if not, write to the Free Software Foundation,
1586d7f5d3SJohn Marino * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1686d7f5d3SJohn Marino */
1786d7f5d3SJohn Marino
1886d7f5d3SJohn Marino #include "dmlib.h"
1986d7f5d3SJohn Marino #include "parse_rx.h"
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino struct parse_sp { /* scratch pad for the parsing process */
2286d7f5d3SJohn Marino struct dm_pool *mem;
2386d7f5d3SJohn Marino int type; /* token type, 0 indicates a charset */
2486d7f5d3SJohn Marino dm_bitset_t charset; /* The current charset */
2586d7f5d3SJohn Marino const char *cursor; /* where we are in the regex */
2686d7f5d3SJohn Marino const char *rx_end; /* 1pte for the expression being parsed */
2786d7f5d3SJohn Marino };
2886d7f5d3SJohn Marino
2986d7f5d3SJohn Marino static struct rx_node *_or_term(struct parse_sp *ps);
3086d7f5d3SJohn Marino
_single_char(struct parse_sp * ps,unsigned int c,const char * ptr)3186d7f5d3SJohn Marino static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
3286d7f5d3SJohn Marino {
3386d7f5d3SJohn Marino ps->type = 0;
3486d7f5d3SJohn Marino ps->cursor = ptr + 1;
3586d7f5d3SJohn Marino dm_bit_clear_all(ps->charset);
3686d7f5d3SJohn Marino dm_bit_set(ps->charset, c);
3786d7f5d3SJohn Marino }
3886d7f5d3SJohn Marino
3986d7f5d3SJohn Marino /*
4086d7f5d3SJohn Marino * Get the next token from the regular expression.
4186d7f5d3SJohn Marino * Returns: 1 success, 0 end of input, -1 error.
4286d7f5d3SJohn Marino */
_rx_get_token(struct parse_sp * ps)4386d7f5d3SJohn Marino static int _rx_get_token(struct parse_sp *ps)
4486d7f5d3SJohn Marino {
4586d7f5d3SJohn Marino int neg = 0, range = 0;
4686d7f5d3SJohn Marino char c, lc = 0;
4786d7f5d3SJohn Marino const char *ptr = ps->cursor;
4886d7f5d3SJohn Marino if (ptr == ps->rx_end) { /* end of input ? */
4986d7f5d3SJohn Marino ps->type = -1;
5086d7f5d3SJohn Marino return 0;
5186d7f5d3SJohn Marino }
5286d7f5d3SJohn Marino
5386d7f5d3SJohn Marino switch (*ptr) {
5486d7f5d3SJohn Marino /* charsets and ncharsets */
5586d7f5d3SJohn Marino case '[':
5686d7f5d3SJohn Marino ptr++;
5786d7f5d3SJohn Marino if (*ptr == '^') {
5886d7f5d3SJohn Marino dm_bit_set_all(ps->charset);
5986d7f5d3SJohn Marino
6086d7f5d3SJohn Marino /* never transition on zero */
6186d7f5d3SJohn Marino dm_bit_clear(ps->charset, 0);
6286d7f5d3SJohn Marino neg = 1;
6386d7f5d3SJohn Marino ptr++;
6486d7f5d3SJohn Marino
6586d7f5d3SJohn Marino } else
6686d7f5d3SJohn Marino dm_bit_clear_all(ps->charset);
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino while ((ptr < ps->rx_end) && (*ptr != ']')) {
6986d7f5d3SJohn Marino if (*ptr == '\\') {
7086d7f5d3SJohn Marino /* an escaped character */
7186d7f5d3SJohn Marino ptr++;
7286d7f5d3SJohn Marino switch (*ptr) {
7386d7f5d3SJohn Marino case 'n':
7486d7f5d3SJohn Marino c = '\n';
7586d7f5d3SJohn Marino break;
7686d7f5d3SJohn Marino case 'r':
7786d7f5d3SJohn Marino c = '\r';
7886d7f5d3SJohn Marino break;
7986d7f5d3SJohn Marino case 't':
8086d7f5d3SJohn Marino c = '\t';
8186d7f5d3SJohn Marino break;
8286d7f5d3SJohn Marino default:
8386d7f5d3SJohn Marino c = *ptr;
8486d7f5d3SJohn Marino }
8586d7f5d3SJohn Marino } else if (*ptr == '-' && lc) {
8686d7f5d3SJohn Marino /* we've got a range on our hands */
8786d7f5d3SJohn Marino range = 1;
8886d7f5d3SJohn Marino ptr++;
8986d7f5d3SJohn Marino if (ptr == ps->rx_end) {
9086d7f5d3SJohn Marino log_error("Incomplete range"
9186d7f5d3SJohn Marino "specification");
9286d7f5d3SJohn Marino return -1;
9386d7f5d3SJohn Marino }
9486d7f5d3SJohn Marino c = *ptr;
9586d7f5d3SJohn Marino } else
9686d7f5d3SJohn Marino c = *ptr;
9786d7f5d3SJohn Marino
9886d7f5d3SJohn Marino if (range) {
9986d7f5d3SJohn Marino /* add lc - c into the bitset */
10086d7f5d3SJohn Marino if (lc > c) {
10186d7f5d3SJohn Marino char tmp = c;
10286d7f5d3SJohn Marino c = lc;
10386d7f5d3SJohn Marino lc = tmp;
10486d7f5d3SJohn Marino }
10586d7f5d3SJohn Marino
10686d7f5d3SJohn Marino for (; lc <= c; lc++) {
10786d7f5d3SJohn Marino if (neg)
10886d7f5d3SJohn Marino dm_bit_clear(ps->charset, lc);
10986d7f5d3SJohn Marino else
11086d7f5d3SJohn Marino dm_bit_set(ps->charset, lc);
11186d7f5d3SJohn Marino }
11286d7f5d3SJohn Marino range = 0;
11386d7f5d3SJohn Marino } else {
11486d7f5d3SJohn Marino /* add c into the bitset */
11586d7f5d3SJohn Marino if (neg)
11686d7f5d3SJohn Marino dm_bit_clear(ps->charset, c);
11786d7f5d3SJohn Marino else
11886d7f5d3SJohn Marino dm_bit_set(ps->charset, c);
11986d7f5d3SJohn Marino }
12086d7f5d3SJohn Marino ptr++;
12186d7f5d3SJohn Marino lc = c;
12286d7f5d3SJohn Marino }
12386d7f5d3SJohn Marino
12486d7f5d3SJohn Marino if (ptr >= ps->rx_end) {
12586d7f5d3SJohn Marino ps->type = -1;
12686d7f5d3SJohn Marino return -1;
12786d7f5d3SJohn Marino }
12886d7f5d3SJohn Marino
12986d7f5d3SJohn Marino ps->type = 0;
13086d7f5d3SJohn Marino ps->cursor = ptr + 1;
13186d7f5d3SJohn Marino break;
13286d7f5d3SJohn Marino
13386d7f5d3SJohn Marino /* These characters are special, we just return their ASCII
13486d7f5d3SJohn Marino codes as the type. Sorted into ascending order to help the
13586d7f5d3SJohn Marino compiler */
13686d7f5d3SJohn Marino case '(':
13786d7f5d3SJohn Marino case ')':
13886d7f5d3SJohn Marino case '*':
13986d7f5d3SJohn Marino case '+':
14086d7f5d3SJohn Marino case '?':
14186d7f5d3SJohn Marino case '|':
14286d7f5d3SJohn Marino ps->type = (int) *ptr;
14386d7f5d3SJohn Marino ps->cursor = ptr + 1;
14486d7f5d3SJohn Marino break;
14586d7f5d3SJohn Marino
14686d7f5d3SJohn Marino case '^':
14786d7f5d3SJohn Marino _single_char(ps, HAT_CHAR, ptr);
14886d7f5d3SJohn Marino break;
14986d7f5d3SJohn Marino
15086d7f5d3SJohn Marino case '$':
15186d7f5d3SJohn Marino _single_char(ps, DOLLAR_CHAR, ptr);
15286d7f5d3SJohn Marino break;
15386d7f5d3SJohn Marino
15486d7f5d3SJohn Marino case '.':
15586d7f5d3SJohn Marino /* The 'all but newline' character set */
15686d7f5d3SJohn Marino ps->type = 0;
15786d7f5d3SJohn Marino ps->cursor = ptr + 1;
15886d7f5d3SJohn Marino dm_bit_set_all(ps->charset);
15986d7f5d3SJohn Marino dm_bit_clear(ps->charset, (int) '\n');
16086d7f5d3SJohn Marino dm_bit_clear(ps->charset, (int) '\r');
16186d7f5d3SJohn Marino dm_bit_clear(ps->charset, 0);
16286d7f5d3SJohn Marino break;
16386d7f5d3SJohn Marino
16486d7f5d3SJohn Marino case '\\':
16586d7f5d3SJohn Marino /* escaped character */
16686d7f5d3SJohn Marino ptr++;
16786d7f5d3SJohn Marino if (ptr >= ps->rx_end) {
16886d7f5d3SJohn Marino log_error("Badly quoted character at end "
16986d7f5d3SJohn Marino "of expression");
17086d7f5d3SJohn Marino ps->type = -1;
17186d7f5d3SJohn Marino return -1;
17286d7f5d3SJohn Marino }
17386d7f5d3SJohn Marino
17486d7f5d3SJohn Marino ps->type = 0;
17586d7f5d3SJohn Marino ps->cursor = ptr + 1;
17686d7f5d3SJohn Marino dm_bit_clear_all(ps->charset);
17786d7f5d3SJohn Marino switch (*ptr) {
17886d7f5d3SJohn Marino case 'n':
17986d7f5d3SJohn Marino dm_bit_set(ps->charset, (int) '\n');
18086d7f5d3SJohn Marino break;
18186d7f5d3SJohn Marino case 'r':
18286d7f5d3SJohn Marino dm_bit_set(ps->charset, (int) '\r');
18386d7f5d3SJohn Marino break;
18486d7f5d3SJohn Marino case 't':
18586d7f5d3SJohn Marino dm_bit_set(ps->charset, (int) '\t');
18686d7f5d3SJohn Marino break;
18786d7f5d3SJohn Marino default:
18886d7f5d3SJohn Marino dm_bit_set(ps->charset, (int) *ptr);
18986d7f5d3SJohn Marino }
19086d7f5d3SJohn Marino break;
19186d7f5d3SJohn Marino
19286d7f5d3SJohn Marino default:
19386d7f5d3SJohn Marino /* add a single character to the bitset */
19486d7f5d3SJohn Marino ps->type = 0;
19586d7f5d3SJohn Marino ps->cursor = ptr + 1;
19686d7f5d3SJohn Marino dm_bit_clear_all(ps->charset);
19786d7f5d3SJohn Marino dm_bit_set(ps->charset, (int) *ptr);
19886d7f5d3SJohn Marino break;
19986d7f5d3SJohn Marino }
20086d7f5d3SJohn Marino
20186d7f5d3SJohn Marino return 1;
20286d7f5d3SJohn Marino }
20386d7f5d3SJohn Marino
_node(struct dm_pool * mem,int type,struct rx_node * l,struct rx_node * r)20486d7f5d3SJohn Marino static struct rx_node *_node(struct dm_pool *mem, int type,
20586d7f5d3SJohn Marino struct rx_node *l, struct rx_node *r)
20686d7f5d3SJohn Marino {
20786d7f5d3SJohn Marino struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
20886d7f5d3SJohn Marino
20986d7f5d3SJohn Marino if (n) {
21086d7f5d3SJohn Marino if (!(n->charset = dm_bitset_create(mem, 256))) {
21186d7f5d3SJohn Marino dm_pool_free(mem, n);
21286d7f5d3SJohn Marino return NULL;
21386d7f5d3SJohn Marino }
21486d7f5d3SJohn Marino
21586d7f5d3SJohn Marino n->type = type;
21686d7f5d3SJohn Marino n->left = l;
21786d7f5d3SJohn Marino n->right = r;
21886d7f5d3SJohn Marino }
21986d7f5d3SJohn Marino
22086d7f5d3SJohn Marino return n;
22186d7f5d3SJohn Marino }
22286d7f5d3SJohn Marino
_term(struct parse_sp * ps)22386d7f5d3SJohn Marino static struct rx_node *_term(struct parse_sp *ps)
22486d7f5d3SJohn Marino {
22586d7f5d3SJohn Marino struct rx_node *n;
22686d7f5d3SJohn Marino
22786d7f5d3SJohn Marino switch (ps->type) {
22886d7f5d3SJohn Marino case 0:
22986d7f5d3SJohn Marino if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) {
23086d7f5d3SJohn Marino stack;
23186d7f5d3SJohn Marino return NULL;
23286d7f5d3SJohn Marino }
23386d7f5d3SJohn Marino
23486d7f5d3SJohn Marino dm_bit_copy(n->charset, ps->charset);
23586d7f5d3SJohn Marino _rx_get_token(ps); /* match charset */
23686d7f5d3SJohn Marino break;
23786d7f5d3SJohn Marino
23886d7f5d3SJohn Marino case '(':
23986d7f5d3SJohn Marino _rx_get_token(ps); /* match '(' */
24086d7f5d3SJohn Marino n = _or_term(ps);
24186d7f5d3SJohn Marino if (ps->type != ')') {
24286d7f5d3SJohn Marino log_error("missing ')' in regular expression");
24386d7f5d3SJohn Marino return 0;
24486d7f5d3SJohn Marino }
24586d7f5d3SJohn Marino _rx_get_token(ps); /* match ')' */
24686d7f5d3SJohn Marino break;
24786d7f5d3SJohn Marino
24886d7f5d3SJohn Marino default:
24986d7f5d3SJohn Marino n = 0;
25086d7f5d3SJohn Marino }
25186d7f5d3SJohn Marino
25286d7f5d3SJohn Marino return n;
25386d7f5d3SJohn Marino }
25486d7f5d3SJohn Marino
_closure_term(struct parse_sp * ps)25586d7f5d3SJohn Marino static struct rx_node *_closure_term(struct parse_sp *ps)
25686d7f5d3SJohn Marino {
25786d7f5d3SJohn Marino struct rx_node *l, *n;
25886d7f5d3SJohn Marino
25986d7f5d3SJohn Marino if (!(l = _term(ps)))
26086d7f5d3SJohn Marino return NULL;
26186d7f5d3SJohn Marino
26286d7f5d3SJohn Marino for (;;) {
26386d7f5d3SJohn Marino switch (ps->type) {
26486d7f5d3SJohn Marino case '*':
26586d7f5d3SJohn Marino n = _node(ps->mem, STAR, l, NULL);
26686d7f5d3SJohn Marino break;
26786d7f5d3SJohn Marino
26886d7f5d3SJohn Marino case '+':
26986d7f5d3SJohn Marino n = _node(ps->mem, PLUS, l, NULL);
27086d7f5d3SJohn Marino break;
27186d7f5d3SJohn Marino
27286d7f5d3SJohn Marino case '?':
27386d7f5d3SJohn Marino n = _node(ps->mem, QUEST, l, NULL);
27486d7f5d3SJohn Marino break;
27586d7f5d3SJohn Marino
27686d7f5d3SJohn Marino default:
27786d7f5d3SJohn Marino return l;
27886d7f5d3SJohn Marino }
27986d7f5d3SJohn Marino
28086d7f5d3SJohn Marino if (!n) {
28186d7f5d3SJohn Marino stack;
28286d7f5d3SJohn Marino return NULL;
28386d7f5d3SJohn Marino }
28486d7f5d3SJohn Marino
28586d7f5d3SJohn Marino _rx_get_token(ps);
28686d7f5d3SJohn Marino l = n;
28786d7f5d3SJohn Marino }
28886d7f5d3SJohn Marino
28986d7f5d3SJohn Marino return n;
29086d7f5d3SJohn Marino }
29186d7f5d3SJohn Marino
_cat_term(struct parse_sp * ps)29286d7f5d3SJohn Marino static struct rx_node *_cat_term(struct parse_sp *ps)
29386d7f5d3SJohn Marino {
29486d7f5d3SJohn Marino struct rx_node *l, *r, *n;
29586d7f5d3SJohn Marino
29686d7f5d3SJohn Marino if (!(l = _closure_term(ps)))
29786d7f5d3SJohn Marino return NULL;
29886d7f5d3SJohn Marino
29986d7f5d3SJohn Marino if (ps->type == '|')
30086d7f5d3SJohn Marino return l;
30186d7f5d3SJohn Marino
30286d7f5d3SJohn Marino if (!(r = _cat_term(ps)))
30386d7f5d3SJohn Marino return l;
30486d7f5d3SJohn Marino
30586d7f5d3SJohn Marino if (!(n = _node(ps->mem, CAT, l, r)))
30686d7f5d3SJohn Marino stack;
30786d7f5d3SJohn Marino
30886d7f5d3SJohn Marino return n;
30986d7f5d3SJohn Marino }
31086d7f5d3SJohn Marino
_or_term(struct parse_sp * ps)31186d7f5d3SJohn Marino static struct rx_node *_or_term(struct parse_sp *ps)
31286d7f5d3SJohn Marino {
31386d7f5d3SJohn Marino struct rx_node *l, *r, *n;
31486d7f5d3SJohn Marino
31586d7f5d3SJohn Marino if (!(l = _cat_term(ps)))
31686d7f5d3SJohn Marino return NULL;
31786d7f5d3SJohn Marino
31886d7f5d3SJohn Marino if (ps->type != '|')
31986d7f5d3SJohn Marino return l;
32086d7f5d3SJohn Marino
32186d7f5d3SJohn Marino _rx_get_token(ps); /* match '|' */
32286d7f5d3SJohn Marino
32386d7f5d3SJohn Marino if (!(r = _or_term(ps))) {
32486d7f5d3SJohn Marino log_error("Badly formed 'or' expression");
32586d7f5d3SJohn Marino return NULL;
32686d7f5d3SJohn Marino }
32786d7f5d3SJohn Marino
32886d7f5d3SJohn Marino if (!(n = _node(ps->mem, OR, l, r)))
32986d7f5d3SJohn Marino stack;
33086d7f5d3SJohn Marino
33186d7f5d3SJohn Marino return n;
33286d7f5d3SJohn Marino }
33386d7f5d3SJohn Marino
rx_parse_tok(struct dm_pool * mem,const char * begin,const char * end)33486d7f5d3SJohn Marino struct rx_node *rx_parse_tok(struct dm_pool *mem,
33586d7f5d3SJohn Marino const char *begin, const char *end)
33686d7f5d3SJohn Marino {
33786d7f5d3SJohn Marino struct rx_node *r;
33886d7f5d3SJohn Marino struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
33986d7f5d3SJohn Marino
34086d7f5d3SJohn Marino if (!ps) {
34186d7f5d3SJohn Marino stack;
34286d7f5d3SJohn Marino return NULL;
34386d7f5d3SJohn Marino }
34486d7f5d3SJohn Marino
34586d7f5d3SJohn Marino ps->mem = mem;
34686d7f5d3SJohn Marino ps->charset = dm_bitset_create(mem, 256);
34786d7f5d3SJohn Marino ps->cursor = begin;
34886d7f5d3SJohn Marino ps->rx_end = end;
34986d7f5d3SJohn Marino _rx_get_token(ps); /* load the first token */
35086d7f5d3SJohn Marino
35186d7f5d3SJohn Marino if (!(r = _or_term(ps))) {
35286d7f5d3SJohn Marino log_error("Parse error in regex");
35386d7f5d3SJohn Marino dm_pool_free(mem, ps);
35486d7f5d3SJohn Marino }
35586d7f5d3SJohn Marino
35686d7f5d3SJohn Marino return r;
35786d7f5d3SJohn Marino }
35886d7f5d3SJohn Marino
rx_parse_str(struct dm_pool * mem,const char * str)35986d7f5d3SJohn Marino struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
36086d7f5d3SJohn Marino {
36186d7f5d3SJohn Marino return rx_parse_tok(mem, str, str + strlen(str));
36286d7f5d3SJohn Marino }
363