xref: /dflybsd-src/contrib/lvm2/dist/libdm/regex/parse_rx.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /*	$NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $	*/
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino /*
486d7f5d3SJohn Marino  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
586d7f5d3SJohn Marino  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
686d7f5d3SJohn Marino  *
786d7f5d3SJohn Marino  * This file is part of the device-mapper userspace tools.
886d7f5d3SJohn Marino  *
986d7f5d3SJohn Marino  * This copyrighted material is made available to anyone wishing to use,
1086d7f5d3SJohn Marino  * modify, copy, or redistribute it subject to the terms and conditions
1186d7f5d3SJohn Marino  * of the GNU Lesser General Public License v.2.1.
1286d7f5d3SJohn Marino  *
1386d7f5d3SJohn Marino  * You should have received a copy of the GNU Lesser General Public License
1486d7f5d3SJohn Marino  * along with this program; if not, write to the Free Software Foundation,
1586d7f5d3SJohn Marino  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1686d7f5d3SJohn Marino  */
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino #include "dmlib.h"
1986d7f5d3SJohn Marino #include "parse_rx.h"
2086d7f5d3SJohn Marino 
2186d7f5d3SJohn Marino struct parse_sp {		/* scratch pad for the parsing process */
2286d7f5d3SJohn Marino 	struct dm_pool *mem;
2386d7f5d3SJohn Marino 	int type;		/* token type, 0 indicates a charset */
2486d7f5d3SJohn Marino 	dm_bitset_t charset;	/* The current charset */
2586d7f5d3SJohn Marino 	const char *cursor;	/* where we are in the regex */
2686d7f5d3SJohn Marino 	const char *rx_end;	/* 1pte for the expression being parsed */
2786d7f5d3SJohn Marino };
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino static struct rx_node *_or_term(struct parse_sp *ps);
3086d7f5d3SJohn Marino 
_single_char(struct parse_sp * ps,unsigned int c,const char * ptr)3186d7f5d3SJohn Marino static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
3286d7f5d3SJohn Marino {
3386d7f5d3SJohn Marino 	ps->type = 0;
3486d7f5d3SJohn Marino 	ps->cursor = ptr + 1;
3586d7f5d3SJohn Marino 	dm_bit_clear_all(ps->charset);
3686d7f5d3SJohn Marino 	dm_bit_set(ps->charset, c);
3786d7f5d3SJohn Marino }
3886d7f5d3SJohn Marino 
3986d7f5d3SJohn Marino /*
4086d7f5d3SJohn Marino  * Get the next token from the regular expression.
4186d7f5d3SJohn Marino  * Returns: 1 success, 0 end of input, -1 error.
4286d7f5d3SJohn Marino  */
_rx_get_token(struct parse_sp * ps)4386d7f5d3SJohn Marino static int _rx_get_token(struct parse_sp *ps)
4486d7f5d3SJohn Marino {
4586d7f5d3SJohn Marino 	int neg = 0, range = 0;
4686d7f5d3SJohn Marino 	char c, lc = 0;
4786d7f5d3SJohn Marino 	const char *ptr = ps->cursor;
4886d7f5d3SJohn Marino 	if (ptr == ps->rx_end) {	/* end of input ? */
4986d7f5d3SJohn Marino 		ps->type = -1;
5086d7f5d3SJohn Marino 		return 0;
5186d7f5d3SJohn Marino 	}
5286d7f5d3SJohn Marino 
5386d7f5d3SJohn Marino 	switch (*ptr) {
5486d7f5d3SJohn Marino 		/* charsets and ncharsets */
5586d7f5d3SJohn Marino 	case '[':
5686d7f5d3SJohn Marino 		ptr++;
5786d7f5d3SJohn Marino 		if (*ptr == '^') {
5886d7f5d3SJohn Marino 			dm_bit_set_all(ps->charset);
5986d7f5d3SJohn Marino 
6086d7f5d3SJohn Marino 			/* never transition on zero */
6186d7f5d3SJohn Marino 			dm_bit_clear(ps->charset, 0);
6286d7f5d3SJohn Marino 			neg = 1;
6386d7f5d3SJohn Marino 			ptr++;
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino 		} else
6686d7f5d3SJohn Marino 			dm_bit_clear_all(ps->charset);
6786d7f5d3SJohn Marino 
6886d7f5d3SJohn Marino 		while ((ptr < ps->rx_end) && (*ptr != ']')) {
6986d7f5d3SJohn Marino 			if (*ptr == '\\') {
7086d7f5d3SJohn Marino 				/* an escaped character */
7186d7f5d3SJohn Marino 				ptr++;
7286d7f5d3SJohn Marino 				switch (*ptr) {
7386d7f5d3SJohn Marino 				case 'n':
7486d7f5d3SJohn Marino 					c = '\n';
7586d7f5d3SJohn Marino 					break;
7686d7f5d3SJohn Marino 				case 'r':
7786d7f5d3SJohn Marino 					c = '\r';
7886d7f5d3SJohn Marino 					break;
7986d7f5d3SJohn Marino 				case 't':
8086d7f5d3SJohn Marino 					c = '\t';
8186d7f5d3SJohn Marino 					break;
8286d7f5d3SJohn Marino 				default:
8386d7f5d3SJohn Marino 					c = *ptr;
8486d7f5d3SJohn Marino 				}
8586d7f5d3SJohn Marino 			} else if (*ptr == '-' && lc) {
8686d7f5d3SJohn Marino 				/* we've got a range on our hands */
8786d7f5d3SJohn Marino 				range = 1;
8886d7f5d3SJohn Marino 				ptr++;
8986d7f5d3SJohn Marino 				if (ptr == ps->rx_end) {
9086d7f5d3SJohn Marino 					log_error("Incomplete range"
9186d7f5d3SJohn Marino 						  "specification");
9286d7f5d3SJohn Marino 					return -1;
9386d7f5d3SJohn Marino 				}
9486d7f5d3SJohn Marino 				c = *ptr;
9586d7f5d3SJohn Marino 			} else
9686d7f5d3SJohn Marino 				c = *ptr;
9786d7f5d3SJohn Marino 
9886d7f5d3SJohn Marino 			if (range) {
9986d7f5d3SJohn Marino 				/* add lc - c into the bitset */
10086d7f5d3SJohn Marino 				if (lc > c) {
10186d7f5d3SJohn Marino 					char tmp = c;
10286d7f5d3SJohn Marino 					c = lc;
10386d7f5d3SJohn Marino 					lc = tmp;
10486d7f5d3SJohn Marino 				}
10586d7f5d3SJohn Marino 
10686d7f5d3SJohn Marino 				for (; lc <= c; lc++) {
10786d7f5d3SJohn Marino 					if (neg)
10886d7f5d3SJohn Marino 						dm_bit_clear(ps->charset, lc);
10986d7f5d3SJohn Marino 					else
11086d7f5d3SJohn Marino 						dm_bit_set(ps->charset, lc);
11186d7f5d3SJohn Marino 				}
11286d7f5d3SJohn Marino 				range = 0;
11386d7f5d3SJohn Marino 			} else {
11486d7f5d3SJohn Marino 				/* add c into the bitset */
11586d7f5d3SJohn Marino 				if (neg)
11686d7f5d3SJohn Marino 					dm_bit_clear(ps->charset, c);
11786d7f5d3SJohn Marino 				else
11886d7f5d3SJohn Marino 					dm_bit_set(ps->charset, c);
11986d7f5d3SJohn Marino 			}
12086d7f5d3SJohn Marino 			ptr++;
12186d7f5d3SJohn Marino 			lc = c;
12286d7f5d3SJohn Marino 		}
12386d7f5d3SJohn Marino 
12486d7f5d3SJohn Marino 		if (ptr >= ps->rx_end) {
12586d7f5d3SJohn Marino 			ps->type = -1;
12686d7f5d3SJohn Marino 			return -1;
12786d7f5d3SJohn Marino 		}
12886d7f5d3SJohn Marino 
12986d7f5d3SJohn Marino 		ps->type = 0;
13086d7f5d3SJohn Marino 		ps->cursor = ptr + 1;
13186d7f5d3SJohn Marino 		break;
13286d7f5d3SJohn Marino 
13386d7f5d3SJohn Marino 		/* These characters are special, we just return their ASCII
13486d7f5d3SJohn Marino 		   codes as the type.  Sorted into ascending order to help the
13586d7f5d3SJohn Marino 		   compiler */
13686d7f5d3SJohn Marino 	case '(':
13786d7f5d3SJohn Marino 	case ')':
13886d7f5d3SJohn Marino 	case '*':
13986d7f5d3SJohn Marino 	case '+':
14086d7f5d3SJohn Marino 	case '?':
14186d7f5d3SJohn Marino 	case '|':
14286d7f5d3SJohn Marino 		ps->type = (int) *ptr;
14386d7f5d3SJohn Marino 		ps->cursor = ptr + 1;
14486d7f5d3SJohn Marino 		break;
14586d7f5d3SJohn Marino 
14686d7f5d3SJohn Marino 	case '^':
14786d7f5d3SJohn Marino 		_single_char(ps, HAT_CHAR, ptr);
14886d7f5d3SJohn Marino 		break;
14986d7f5d3SJohn Marino 
15086d7f5d3SJohn Marino 	case '$':
15186d7f5d3SJohn Marino 		_single_char(ps, DOLLAR_CHAR, ptr);
15286d7f5d3SJohn Marino 		break;
15386d7f5d3SJohn Marino 
15486d7f5d3SJohn Marino 	case '.':
15586d7f5d3SJohn Marino 		/* The 'all but newline' character set */
15686d7f5d3SJohn Marino 		ps->type = 0;
15786d7f5d3SJohn Marino 		ps->cursor = ptr + 1;
15886d7f5d3SJohn Marino 		dm_bit_set_all(ps->charset);
15986d7f5d3SJohn Marino 		dm_bit_clear(ps->charset, (int) '\n');
16086d7f5d3SJohn Marino 		dm_bit_clear(ps->charset, (int) '\r');
16186d7f5d3SJohn Marino 		dm_bit_clear(ps->charset, 0);
16286d7f5d3SJohn Marino 		break;
16386d7f5d3SJohn Marino 
16486d7f5d3SJohn Marino 	case '\\':
16586d7f5d3SJohn Marino 		/* escaped character */
16686d7f5d3SJohn Marino 		ptr++;
16786d7f5d3SJohn Marino 		if (ptr >= ps->rx_end) {
16886d7f5d3SJohn Marino 			log_error("Badly quoted character at end "
16986d7f5d3SJohn Marino 				  "of expression");
17086d7f5d3SJohn Marino 			ps->type = -1;
17186d7f5d3SJohn Marino 			return -1;
17286d7f5d3SJohn Marino 		}
17386d7f5d3SJohn Marino 
17486d7f5d3SJohn Marino 		ps->type = 0;
17586d7f5d3SJohn Marino 		ps->cursor = ptr + 1;
17686d7f5d3SJohn Marino 		dm_bit_clear_all(ps->charset);
17786d7f5d3SJohn Marino 		switch (*ptr) {
17886d7f5d3SJohn Marino 		case 'n':
17986d7f5d3SJohn Marino 			dm_bit_set(ps->charset, (int) '\n');
18086d7f5d3SJohn Marino 			break;
18186d7f5d3SJohn Marino 		case 'r':
18286d7f5d3SJohn Marino 			dm_bit_set(ps->charset, (int) '\r');
18386d7f5d3SJohn Marino 			break;
18486d7f5d3SJohn Marino 		case 't':
18586d7f5d3SJohn Marino 			dm_bit_set(ps->charset, (int) '\t');
18686d7f5d3SJohn Marino 			break;
18786d7f5d3SJohn Marino 		default:
18886d7f5d3SJohn Marino 			dm_bit_set(ps->charset, (int) *ptr);
18986d7f5d3SJohn Marino 		}
19086d7f5d3SJohn Marino 		break;
19186d7f5d3SJohn Marino 
19286d7f5d3SJohn Marino 	default:
19386d7f5d3SJohn Marino 		/* add a single character to the bitset */
19486d7f5d3SJohn Marino 		ps->type = 0;
19586d7f5d3SJohn Marino 		ps->cursor = ptr + 1;
19686d7f5d3SJohn Marino 		dm_bit_clear_all(ps->charset);
19786d7f5d3SJohn Marino 		dm_bit_set(ps->charset, (int) *ptr);
19886d7f5d3SJohn Marino 		break;
19986d7f5d3SJohn Marino 	}
20086d7f5d3SJohn Marino 
20186d7f5d3SJohn Marino 	return 1;
20286d7f5d3SJohn Marino }
20386d7f5d3SJohn Marino 
_node(struct dm_pool * mem,int type,struct rx_node * l,struct rx_node * r)20486d7f5d3SJohn Marino static struct rx_node *_node(struct dm_pool *mem, int type,
20586d7f5d3SJohn Marino 			     struct rx_node *l, struct rx_node *r)
20686d7f5d3SJohn Marino {
20786d7f5d3SJohn Marino 	struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
20886d7f5d3SJohn Marino 
20986d7f5d3SJohn Marino 	if (n) {
21086d7f5d3SJohn Marino 		if (!(n->charset = dm_bitset_create(mem, 256))) {
21186d7f5d3SJohn Marino 			dm_pool_free(mem, n);
21286d7f5d3SJohn Marino 			return NULL;
21386d7f5d3SJohn Marino 		}
21486d7f5d3SJohn Marino 
21586d7f5d3SJohn Marino 		n->type = type;
21686d7f5d3SJohn Marino 		n->left = l;
21786d7f5d3SJohn Marino 		n->right = r;
21886d7f5d3SJohn Marino 	}
21986d7f5d3SJohn Marino 
22086d7f5d3SJohn Marino 	return n;
22186d7f5d3SJohn Marino }
22286d7f5d3SJohn Marino 
_term(struct parse_sp * ps)22386d7f5d3SJohn Marino static struct rx_node *_term(struct parse_sp *ps)
22486d7f5d3SJohn Marino {
22586d7f5d3SJohn Marino 	struct rx_node *n;
22686d7f5d3SJohn Marino 
22786d7f5d3SJohn Marino 	switch (ps->type) {
22886d7f5d3SJohn Marino 	case 0:
22986d7f5d3SJohn Marino 		if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) {
23086d7f5d3SJohn Marino 			stack;
23186d7f5d3SJohn Marino 			return NULL;
23286d7f5d3SJohn Marino 		}
23386d7f5d3SJohn Marino 
23486d7f5d3SJohn Marino 		dm_bit_copy(n->charset, ps->charset);
23586d7f5d3SJohn Marino 		_rx_get_token(ps);	/* match charset */
23686d7f5d3SJohn Marino 		break;
23786d7f5d3SJohn Marino 
23886d7f5d3SJohn Marino 	case '(':
23986d7f5d3SJohn Marino 		_rx_get_token(ps);	/* match '(' */
24086d7f5d3SJohn Marino 		n = _or_term(ps);
24186d7f5d3SJohn Marino 		if (ps->type != ')') {
24286d7f5d3SJohn Marino 			log_error("missing ')' in regular expression");
24386d7f5d3SJohn Marino 			return 0;
24486d7f5d3SJohn Marino 		}
24586d7f5d3SJohn Marino 		_rx_get_token(ps);	/* match ')' */
24686d7f5d3SJohn Marino 		break;
24786d7f5d3SJohn Marino 
24886d7f5d3SJohn Marino 	default:
24986d7f5d3SJohn Marino 		n = 0;
25086d7f5d3SJohn Marino 	}
25186d7f5d3SJohn Marino 
25286d7f5d3SJohn Marino 	return n;
25386d7f5d3SJohn Marino }
25486d7f5d3SJohn Marino 
_closure_term(struct parse_sp * ps)25586d7f5d3SJohn Marino static struct rx_node *_closure_term(struct parse_sp *ps)
25686d7f5d3SJohn Marino {
25786d7f5d3SJohn Marino 	struct rx_node *l, *n;
25886d7f5d3SJohn Marino 
25986d7f5d3SJohn Marino 	if (!(l = _term(ps)))
26086d7f5d3SJohn Marino 		return NULL;
26186d7f5d3SJohn Marino 
26286d7f5d3SJohn Marino 	for (;;) {
26386d7f5d3SJohn Marino 		switch (ps->type) {
26486d7f5d3SJohn Marino 		case '*':
26586d7f5d3SJohn Marino 			n = _node(ps->mem, STAR, l, NULL);
26686d7f5d3SJohn Marino 			break;
26786d7f5d3SJohn Marino 
26886d7f5d3SJohn Marino 		case '+':
26986d7f5d3SJohn Marino 			n = _node(ps->mem, PLUS, l, NULL);
27086d7f5d3SJohn Marino 			break;
27186d7f5d3SJohn Marino 
27286d7f5d3SJohn Marino 		case '?':
27386d7f5d3SJohn Marino 			n = _node(ps->mem, QUEST, l, NULL);
27486d7f5d3SJohn Marino 			break;
27586d7f5d3SJohn Marino 
27686d7f5d3SJohn Marino 		default:
27786d7f5d3SJohn Marino 			return l;
27886d7f5d3SJohn Marino 		}
27986d7f5d3SJohn Marino 
28086d7f5d3SJohn Marino 		if (!n) {
28186d7f5d3SJohn Marino 			stack;
28286d7f5d3SJohn Marino 			return NULL;
28386d7f5d3SJohn Marino 		}
28486d7f5d3SJohn Marino 
28586d7f5d3SJohn Marino 		_rx_get_token(ps);
28686d7f5d3SJohn Marino 		l = n;
28786d7f5d3SJohn Marino 	}
28886d7f5d3SJohn Marino 
28986d7f5d3SJohn Marino 	return n;
29086d7f5d3SJohn Marino }
29186d7f5d3SJohn Marino 
_cat_term(struct parse_sp * ps)29286d7f5d3SJohn Marino static struct rx_node *_cat_term(struct parse_sp *ps)
29386d7f5d3SJohn Marino {
29486d7f5d3SJohn Marino 	struct rx_node *l, *r, *n;
29586d7f5d3SJohn Marino 
29686d7f5d3SJohn Marino 	if (!(l = _closure_term(ps)))
29786d7f5d3SJohn Marino 		return NULL;
29886d7f5d3SJohn Marino 
29986d7f5d3SJohn Marino 	if (ps->type == '|')
30086d7f5d3SJohn Marino 		return l;
30186d7f5d3SJohn Marino 
30286d7f5d3SJohn Marino 	if (!(r = _cat_term(ps)))
30386d7f5d3SJohn Marino 		return l;
30486d7f5d3SJohn Marino 
30586d7f5d3SJohn Marino 	if (!(n = _node(ps->mem, CAT, l, r)))
30686d7f5d3SJohn Marino 		stack;
30786d7f5d3SJohn Marino 
30886d7f5d3SJohn Marino 	return n;
30986d7f5d3SJohn Marino }
31086d7f5d3SJohn Marino 
_or_term(struct parse_sp * ps)31186d7f5d3SJohn Marino static struct rx_node *_or_term(struct parse_sp *ps)
31286d7f5d3SJohn Marino {
31386d7f5d3SJohn Marino 	struct rx_node *l, *r, *n;
31486d7f5d3SJohn Marino 
31586d7f5d3SJohn Marino 	if (!(l = _cat_term(ps)))
31686d7f5d3SJohn Marino 		return NULL;
31786d7f5d3SJohn Marino 
31886d7f5d3SJohn Marino 	if (ps->type != '|')
31986d7f5d3SJohn Marino 		return l;
32086d7f5d3SJohn Marino 
32186d7f5d3SJohn Marino 	_rx_get_token(ps);		/* match '|' */
32286d7f5d3SJohn Marino 
32386d7f5d3SJohn Marino 	if (!(r = _or_term(ps))) {
32486d7f5d3SJohn Marino 		log_error("Badly formed 'or' expression");
32586d7f5d3SJohn Marino 		return NULL;
32686d7f5d3SJohn Marino 	}
32786d7f5d3SJohn Marino 
32886d7f5d3SJohn Marino 	if (!(n = _node(ps->mem, OR, l, r)))
32986d7f5d3SJohn Marino 		stack;
33086d7f5d3SJohn Marino 
33186d7f5d3SJohn Marino 	return n;
33286d7f5d3SJohn Marino }
33386d7f5d3SJohn Marino 
rx_parse_tok(struct dm_pool * mem,const char * begin,const char * end)33486d7f5d3SJohn Marino struct rx_node *rx_parse_tok(struct dm_pool *mem,
33586d7f5d3SJohn Marino 			     const char *begin, const char *end)
33686d7f5d3SJohn Marino {
33786d7f5d3SJohn Marino 	struct rx_node *r;
33886d7f5d3SJohn Marino 	struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
33986d7f5d3SJohn Marino 
34086d7f5d3SJohn Marino 	if (!ps) {
34186d7f5d3SJohn Marino 		stack;
34286d7f5d3SJohn Marino 		return NULL;
34386d7f5d3SJohn Marino 	}
34486d7f5d3SJohn Marino 
34586d7f5d3SJohn Marino 	ps->mem = mem;
34686d7f5d3SJohn Marino 	ps->charset = dm_bitset_create(mem, 256);
34786d7f5d3SJohn Marino 	ps->cursor = begin;
34886d7f5d3SJohn Marino 	ps->rx_end = end;
34986d7f5d3SJohn Marino 	_rx_get_token(ps);		/* load the first token */
35086d7f5d3SJohn Marino 
35186d7f5d3SJohn Marino 	if (!(r = _or_term(ps))) {
35286d7f5d3SJohn Marino 		log_error("Parse error in regex");
35386d7f5d3SJohn Marino 		dm_pool_free(mem, ps);
35486d7f5d3SJohn Marino 	}
35586d7f5d3SJohn Marino 
35686d7f5d3SJohn Marino 	return r;
35786d7f5d3SJohn Marino }
35886d7f5d3SJohn Marino 
rx_parse_str(struct dm_pool * mem,const char * str)35986d7f5d3SJohn Marino struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
36086d7f5d3SJohn Marino {
36186d7f5d3SJohn Marino 	return rx_parse_tok(mem, str, str + strlen(str));
36286d7f5d3SJohn Marino }
363