1*56a34939Shaad /* $NetBSD: matcher.c,v 1.1.1.1 2008/12/22 00:18:36 haad Exp $ */
2*56a34939Shaad
3*56a34939Shaad /*
4*56a34939Shaad * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5*56a34939Shaad * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6*56a34939Shaad *
7*56a34939Shaad * This file is part of the device-mapper userspace tools.
8*56a34939Shaad *
9*56a34939Shaad * This copyrighted material is made available to anyone wishing to use,
10*56a34939Shaad * modify, copy, or redistribute it subject to the terms and conditions
11*56a34939Shaad * of the GNU Lesser General Public License v.2.1.
12*56a34939Shaad *
13*56a34939Shaad * You should have received a copy of the GNU Lesser General Public License
14*56a34939Shaad * along with this program; if not, write to the Free Software Foundation,
15*56a34939Shaad * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16*56a34939Shaad */
17*56a34939Shaad
18*56a34939Shaad #include "dmlib.h"
19*56a34939Shaad #include "parse_rx.h"
20*56a34939Shaad #include "ttree.h"
21*56a34939Shaad #include "assert.h"
22*56a34939Shaad
23*56a34939Shaad struct dfa_state {
24*56a34939Shaad int final;
25*56a34939Shaad struct dfa_state *lookup[256];
26*56a34939Shaad };
27*56a34939Shaad
28*56a34939Shaad struct state_queue {
29*56a34939Shaad struct dfa_state *s;
30*56a34939Shaad dm_bitset_t bits;
31*56a34939Shaad struct state_queue *next;
32*56a34939Shaad };
33*56a34939Shaad
34*56a34939Shaad struct dm_regex { /* Instance variables for the lexer */
35*56a34939Shaad struct dfa_state *start;
36*56a34939Shaad unsigned num_nodes;
37*56a34939Shaad int nodes_entered;
38*56a34939Shaad struct rx_node **nodes;
39*56a34939Shaad struct dm_pool *scratch, *mem;
40*56a34939Shaad };
41*56a34939Shaad
42*56a34939Shaad #define TARGET_TRANS '\0'
43*56a34939Shaad
_count_nodes(struct rx_node * rx)44*56a34939Shaad static int _count_nodes(struct rx_node *rx)
45*56a34939Shaad {
46*56a34939Shaad int r = 1;
47*56a34939Shaad
48*56a34939Shaad if (rx->left)
49*56a34939Shaad r += _count_nodes(rx->left);
50*56a34939Shaad
51*56a34939Shaad if (rx->right)
52*56a34939Shaad r += _count_nodes(rx->right);
53*56a34939Shaad
54*56a34939Shaad return r;
55*56a34939Shaad }
56*56a34939Shaad
_fill_table(struct dm_regex * m,struct rx_node * rx)57*56a34939Shaad static void _fill_table(struct dm_regex *m, struct rx_node *rx)
58*56a34939Shaad {
59*56a34939Shaad assert((rx->type != OR) || (rx->left && rx->right));
60*56a34939Shaad
61*56a34939Shaad if (rx->left)
62*56a34939Shaad _fill_table(m, rx->left);
63*56a34939Shaad
64*56a34939Shaad if (rx->right)
65*56a34939Shaad _fill_table(m, rx->right);
66*56a34939Shaad
67*56a34939Shaad m->nodes[m->nodes_entered++] = rx;
68*56a34939Shaad }
69*56a34939Shaad
_create_bitsets(struct dm_regex * m)70*56a34939Shaad static void _create_bitsets(struct dm_regex *m)
71*56a34939Shaad {
72*56a34939Shaad int i;
73*56a34939Shaad
74*56a34939Shaad for (i = 0; i < m->num_nodes; i++) {
75*56a34939Shaad struct rx_node *n = m->nodes[i];
76*56a34939Shaad n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
77*56a34939Shaad n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
78*56a34939Shaad n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
79*56a34939Shaad }
80*56a34939Shaad }
81*56a34939Shaad
_calc_functions(struct dm_regex * m)82*56a34939Shaad static void _calc_functions(struct dm_regex *m)
83*56a34939Shaad {
84*56a34939Shaad int i, j, final = 1;
85*56a34939Shaad struct rx_node *rx, *c1, *c2;
86*56a34939Shaad
87*56a34939Shaad for (i = 0; i < m->num_nodes; i++) {
88*56a34939Shaad rx = m->nodes[i];
89*56a34939Shaad c1 = rx->left;
90*56a34939Shaad c2 = rx->right;
91*56a34939Shaad
92*56a34939Shaad if (dm_bit(rx->charset, TARGET_TRANS))
93*56a34939Shaad rx->final = final++;
94*56a34939Shaad
95*56a34939Shaad switch (rx->type) {
96*56a34939Shaad case CAT:
97*56a34939Shaad if (c1->nullable)
98*56a34939Shaad dm_bit_union(rx->firstpos,
99*56a34939Shaad c1->firstpos, c2->firstpos);
100*56a34939Shaad else
101*56a34939Shaad dm_bit_copy(rx->firstpos, c1->firstpos);
102*56a34939Shaad
103*56a34939Shaad if (c2->nullable)
104*56a34939Shaad dm_bit_union(rx->lastpos,
105*56a34939Shaad c1->lastpos, c2->lastpos);
106*56a34939Shaad else
107*56a34939Shaad dm_bit_copy(rx->lastpos, c2->lastpos);
108*56a34939Shaad
109*56a34939Shaad rx->nullable = c1->nullable && c2->nullable;
110*56a34939Shaad break;
111*56a34939Shaad
112*56a34939Shaad case PLUS:
113*56a34939Shaad dm_bit_copy(rx->firstpos, c1->firstpos);
114*56a34939Shaad dm_bit_copy(rx->lastpos, c1->lastpos);
115*56a34939Shaad rx->nullable = c1->nullable;
116*56a34939Shaad break;
117*56a34939Shaad
118*56a34939Shaad case OR:
119*56a34939Shaad dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos);
120*56a34939Shaad dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos);
121*56a34939Shaad rx->nullable = c1->nullable || c2->nullable;
122*56a34939Shaad break;
123*56a34939Shaad
124*56a34939Shaad case QUEST:
125*56a34939Shaad case STAR:
126*56a34939Shaad dm_bit_copy(rx->firstpos, c1->firstpos);
127*56a34939Shaad dm_bit_copy(rx->lastpos, c1->lastpos);
128*56a34939Shaad rx->nullable = 1;
129*56a34939Shaad break;
130*56a34939Shaad
131*56a34939Shaad case CHARSET:
132*56a34939Shaad dm_bit_set(rx->firstpos, i);
133*56a34939Shaad dm_bit_set(rx->lastpos, i);
134*56a34939Shaad rx->nullable = 0;
135*56a34939Shaad break;
136*56a34939Shaad
137*56a34939Shaad default:
138*56a34939Shaad log_error("Internal error: Unknown calc node type");
139*56a34939Shaad }
140*56a34939Shaad
141*56a34939Shaad /*
142*56a34939Shaad * followpos has it's own switch
143*56a34939Shaad * because PLUS and STAR do the
144*56a34939Shaad * same thing.
145*56a34939Shaad */
146*56a34939Shaad switch (rx->type) {
147*56a34939Shaad case CAT:
148*56a34939Shaad for (j = 0; j < m->num_nodes; j++) {
149*56a34939Shaad if (dm_bit(c1->lastpos, j)) {
150*56a34939Shaad struct rx_node *n = m->nodes[j];
151*56a34939Shaad dm_bit_union(n->followpos,
152*56a34939Shaad n->followpos, c2->firstpos);
153*56a34939Shaad }
154*56a34939Shaad }
155*56a34939Shaad break;
156*56a34939Shaad
157*56a34939Shaad case PLUS:
158*56a34939Shaad case STAR:
159*56a34939Shaad for (j = 0; j < m->num_nodes; j++) {
160*56a34939Shaad if (dm_bit(rx->lastpos, j)) {
161*56a34939Shaad struct rx_node *n = m->nodes[j];
162*56a34939Shaad dm_bit_union(n->followpos,
163*56a34939Shaad n->followpos, rx->firstpos);
164*56a34939Shaad }
165*56a34939Shaad }
166*56a34939Shaad break;
167*56a34939Shaad }
168*56a34939Shaad }
169*56a34939Shaad }
170*56a34939Shaad
_create_dfa_state(struct dm_pool * mem)171*56a34939Shaad static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
172*56a34939Shaad {
173*56a34939Shaad return dm_pool_zalloc(mem, sizeof(struct dfa_state));
174*56a34939Shaad }
175*56a34939Shaad
_create_state_queue(struct dm_pool * mem,struct dfa_state * dfa,dm_bitset_t bits)176*56a34939Shaad static struct state_queue *_create_state_queue(struct dm_pool *mem,
177*56a34939Shaad struct dfa_state *dfa,
178*56a34939Shaad dm_bitset_t bits)
179*56a34939Shaad {
180*56a34939Shaad struct state_queue *r = dm_pool_alloc(mem, sizeof(*r));
181*56a34939Shaad
182*56a34939Shaad if (!r) {
183*56a34939Shaad stack;
184*56a34939Shaad return NULL;
185*56a34939Shaad }
186*56a34939Shaad
187*56a34939Shaad r->s = dfa;
188*56a34939Shaad r->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */
189*56a34939Shaad dm_bit_copy(r->bits, bits);
190*56a34939Shaad r->next = 0;
191*56a34939Shaad return r;
192*56a34939Shaad }
193*56a34939Shaad
_calc_states(struct dm_regex * m,struct rx_node * rx)194*56a34939Shaad static int _calc_states(struct dm_regex *m, struct rx_node *rx)
195*56a34939Shaad {
196*56a34939Shaad unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
197*56a34939Shaad struct ttree *tt = ttree_create(m->scratch, iwidth);
198*56a34939Shaad struct state_queue *h, *t, *tmp;
199*56a34939Shaad struct dfa_state *dfa, *ldfa;
200*56a34939Shaad int i, a, set_bits = 0, count = 0;
201*56a34939Shaad dm_bitset_t bs, dfa_bits;
202*56a34939Shaad
203*56a34939Shaad if (!tt)
204*56a34939Shaad return_0;
205*56a34939Shaad
206*56a34939Shaad if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
207*56a34939Shaad return_0;
208*56a34939Shaad
209*56a34939Shaad /* create first state */
210*56a34939Shaad dfa = _create_dfa_state(m->mem);
211*56a34939Shaad m->start = dfa;
212*56a34939Shaad ttree_insert(tt, rx->firstpos + 1, dfa);
213*56a34939Shaad
214*56a34939Shaad /* prime the queue */
215*56a34939Shaad h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
216*56a34939Shaad while (h) {
217*56a34939Shaad /* pop state off front of the queue */
218*56a34939Shaad dfa = h->s;
219*56a34939Shaad dfa_bits = h->bits;
220*56a34939Shaad h = h->next;
221*56a34939Shaad
222*56a34939Shaad /* iterate through all the inputs for this state */
223*56a34939Shaad dm_bit_clear_all(bs);
224*56a34939Shaad for (a = 0; a < 256; a++) {
225*56a34939Shaad /* iterate through all the states in firstpos */
226*56a34939Shaad for (i = dm_bit_get_first(dfa_bits);
227*56a34939Shaad i >= 0; i = dm_bit_get_next(dfa_bits, i)) {
228*56a34939Shaad if (dm_bit(m->nodes[i]->charset, a)) {
229*56a34939Shaad if (a == TARGET_TRANS)
230*56a34939Shaad dfa->final = m->nodes[i]->final;
231*56a34939Shaad
232*56a34939Shaad dm_bit_union(bs, bs,
233*56a34939Shaad m->nodes[i]->followpos);
234*56a34939Shaad set_bits = 1;
235*56a34939Shaad }
236*56a34939Shaad }
237*56a34939Shaad
238*56a34939Shaad if (set_bits) {
239*56a34939Shaad ldfa = ttree_lookup(tt, bs + 1);
240*56a34939Shaad if (!ldfa) {
241*56a34939Shaad /* push */
242*56a34939Shaad ldfa = _create_dfa_state(m->mem);
243*56a34939Shaad ttree_insert(tt, bs + 1, ldfa);
244*56a34939Shaad tmp =
245*56a34939Shaad _create_state_queue(m->scratch,
246*56a34939Shaad ldfa, bs);
247*56a34939Shaad if (!h)
248*56a34939Shaad h = t = tmp;
249*56a34939Shaad else {
250*56a34939Shaad t->next = tmp;
251*56a34939Shaad t = tmp;
252*56a34939Shaad }
253*56a34939Shaad
254*56a34939Shaad count++;
255*56a34939Shaad }
256*56a34939Shaad
257*56a34939Shaad dfa->lookup[a] = ldfa;
258*56a34939Shaad set_bits = 0;
259*56a34939Shaad dm_bit_clear_all(bs);
260*56a34939Shaad }
261*56a34939Shaad }
262*56a34939Shaad }
263*56a34939Shaad
264*56a34939Shaad log_debug("Matcher built with %d dfa states", count);
265*56a34939Shaad return 1;
266*56a34939Shaad }
267*56a34939Shaad
dm_regex_create(struct dm_pool * mem,const char ** patterns,unsigned num_patterns)268*56a34939Shaad struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
269*56a34939Shaad unsigned num_patterns)
270*56a34939Shaad {
271*56a34939Shaad char *all, *ptr;
272*56a34939Shaad int i;
273*56a34939Shaad size_t len = 0;
274*56a34939Shaad struct rx_node *rx;
275*56a34939Shaad struct dm_pool *scratch = dm_pool_create("regex matcher", 10 * 1024);
276*56a34939Shaad struct dm_regex *m;
277*56a34939Shaad
278*56a34939Shaad if (!scratch)
279*56a34939Shaad return_NULL;
280*56a34939Shaad
281*56a34939Shaad if (!(m = dm_pool_alloc(mem, sizeof(*m)))) {
282*56a34939Shaad dm_pool_destroy(scratch);
283*56a34939Shaad return_NULL;
284*56a34939Shaad }
285*56a34939Shaad
286*56a34939Shaad memset(m, 0, sizeof(*m));
287*56a34939Shaad
288*56a34939Shaad /* join the regexps together, delimiting with zero */
289*56a34939Shaad for (i = 0; i < num_patterns; i++)
290*56a34939Shaad len += strlen(patterns[i]) + 8;
291*56a34939Shaad
292*56a34939Shaad ptr = all = dm_pool_alloc(scratch, len + 1);
293*56a34939Shaad
294*56a34939Shaad if (!all)
295*56a34939Shaad goto_bad;
296*56a34939Shaad
297*56a34939Shaad for (i = 0; i < num_patterns; i++) {
298*56a34939Shaad ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS);
299*56a34939Shaad if (i < (num_patterns - 1))
300*56a34939Shaad *ptr++ = '|';
301*56a34939Shaad }
302*56a34939Shaad
303*56a34939Shaad /* parse this expression */
304*56a34939Shaad if (!(rx = rx_parse_tok(scratch, all, ptr))) {
305*56a34939Shaad log_error("Couldn't parse regex");
306*56a34939Shaad goto bad;
307*56a34939Shaad }
308*56a34939Shaad
309*56a34939Shaad m->mem = mem;
310*56a34939Shaad m->scratch = scratch;
311*56a34939Shaad m->num_nodes = _count_nodes(rx);
312*56a34939Shaad m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
313*56a34939Shaad
314*56a34939Shaad if (!m->nodes)
315*56a34939Shaad goto_bad;
316*56a34939Shaad
317*56a34939Shaad _fill_table(m, rx);
318*56a34939Shaad _create_bitsets(m);
319*56a34939Shaad _calc_functions(m);
320*56a34939Shaad _calc_states(m, rx);
321*56a34939Shaad dm_pool_destroy(scratch);
322*56a34939Shaad m->scratch = NULL;
323*56a34939Shaad
324*56a34939Shaad return m;
325*56a34939Shaad
326*56a34939Shaad bad:
327*56a34939Shaad dm_pool_destroy(scratch);
328*56a34939Shaad dm_pool_free(mem, m);
329*56a34939Shaad return NULL;
330*56a34939Shaad }
331*56a34939Shaad
_step_matcher(int c,struct dfa_state * cs,int * r)332*56a34939Shaad static struct dfa_state *_step_matcher(int c, struct dfa_state *cs, int *r)
333*56a34939Shaad {
334*56a34939Shaad if (!(cs = cs->lookup[(unsigned char) c]))
335*56a34939Shaad return NULL;
336*56a34939Shaad
337*56a34939Shaad if (cs->final && (cs->final > *r))
338*56a34939Shaad *r = cs->final;
339*56a34939Shaad
340*56a34939Shaad return cs;
341*56a34939Shaad }
342*56a34939Shaad
dm_regex_match(struct dm_regex * regex,const char * s)343*56a34939Shaad int dm_regex_match(struct dm_regex *regex, const char *s)
344*56a34939Shaad {
345*56a34939Shaad struct dfa_state *cs = regex->start;
346*56a34939Shaad int r = 0;
347*56a34939Shaad
348*56a34939Shaad if (!(cs = _step_matcher(HAT_CHAR, cs, &r)))
349*56a34939Shaad goto out;
350*56a34939Shaad
351*56a34939Shaad for (; *s; s++)
352*56a34939Shaad if (!(cs = _step_matcher(*s, cs, &r)))
353*56a34939Shaad goto out;
354*56a34939Shaad
355*56a34939Shaad _step_matcher(DOLLAR_CHAR, cs, &r);
356*56a34939Shaad
357*56a34939Shaad out:
358*56a34939Shaad /* subtract 1 to get back to zero index */
359*56a34939Shaad return r - 1;
360*56a34939Shaad }
361