1*86d7f5d3SJohn Marino /* $NetBSD: matcher.c,v 1.1.1.1 2008/12/22 00:18:36 haad Exp $ */
2*86d7f5d3SJohn Marino
3*86d7f5d3SJohn Marino /*
4*86d7f5d3SJohn Marino * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5*86d7f5d3SJohn Marino * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6*86d7f5d3SJohn Marino *
7*86d7f5d3SJohn Marino * This file is part of the device-mapper userspace tools.
8*86d7f5d3SJohn Marino *
9*86d7f5d3SJohn Marino * This copyrighted material is made available to anyone wishing to use,
10*86d7f5d3SJohn Marino * modify, copy, or redistribute it subject to the terms and conditions
11*86d7f5d3SJohn Marino * of the GNU Lesser General Public License v.2.1.
12*86d7f5d3SJohn Marino *
13*86d7f5d3SJohn Marino * You should have received a copy of the GNU Lesser General Public License
14*86d7f5d3SJohn Marino * along with this program; if not, write to the Free Software Foundation,
15*86d7f5d3SJohn Marino * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16*86d7f5d3SJohn Marino */
17*86d7f5d3SJohn Marino
18*86d7f5d3SJohn Marino #include "dmlib.h"
19*86d7f5d3SJohn Marino #include "parse_rx.h"
20*86d7f5d3SJohn Marino #include "ttree.h"
21*86d7f5d3SJohn Marino #include "assert.h"
22*86d7f5d3SJohn Marino
23*86d7f5d3SJohn Marino struct dfa_state {
24*86d7f5d3SJohn Marino int final;
25*86d7f5d3SJohn Marino struct dfa_state *lookup[256];
26*86d7f5d3SJohn Marino };
27*86d7f5d3SJohn Marino
28*86d7f5d3SJohn Marino struct state_queue {
29*86d7f5d3SJohn Marino struct dfa_state *s;
30*86d7f5d3SJohn Marino dm_bitset_t bits;
31*86d7f5d3SJohn Marino struct state_queue *next;
32*86d7f5d3SJohn Marino };
33*86d7f5d3SJohn Marino
34*86d7f5d3SJohn Marino struct dm_regex { /* Instance variables for the lexer */
35*86d7f5d3SJohn Marino struct dfa_state *start;
36*86d7f5d3SJohn Marino unsigned num_nodes;
37*86d7f5d3SJohn Marino int nodes_entered;
38*86d7f5d3SJohn Marino struct rx_node **nodes;
39*86d7f5d3SJohn Marino struct dm_pool *scratch, *mem;
40*86d7f5d3SJohn Marino };
41*86d7f5d3SJohn Marino
42*86d7f5d3SJohn Marino #define TARGET_TRANS '\0'
43*86d7f5d3SJohn Marino
_count_nodes(struct rx_node * rx)44*86d7f5d3SJohn Marino static int _count_nodes(struct rx_node *rx)
45*86d7f5d3SJohn Marino {
46*86d7f5d3SJohn Marino int r = 1;
47*86d7f5d3SJohn Marino
48*86d7f5d3SJohn Marino if (rx->left)
49*86d7f5d3SJohn Marino r += _count_nodes(rx->left);
50*86d7f5d3SJohn Marino
51*86d7f5d3SJohn Marino if (rx->right)
52*86d7f5d3SJohn Marino r += _count_nodes(rx->right);
53*86d7f5d3SJohn Marino
54*86d7f5d3SJohn Marino return r;
55*86d7f5d3SJohn Marino }
56*86d7f5d3SJohn Marino
_fill_table(struct dm_regex * m,struct rx_node * rx)57*86d7f5d3SJohn Marino static void _fill_table(struct dm_regex *m, struct rx_node *rx)
58*86d7f5d3SJohn Marino {
59*86d7f5d3SJohn Marino assert((rx->type != OR) || (rx->left && rx->right));
60*86d7f5d3SJohn Marino
61*86d7f5d3SJohn Marino if (rx->left)
62*86d7f5d3SJohn Marino _fill_table(m, rx->left);
63*86d7f5d3SJohn Marino
64*86d7f5d3SJohn Marino if (rx->right)
65*86d7f5d3SJohn Marino _fill_table(m, rx->right);
66*86d7f5d3SJohn Marino
67*86d7f5d3SJohn Marino m->nodes[m->nodes_entered++] = rx;
68*86d7f5d3SJohn Marino }
69*86d7f5d3SJohn Marino
_create_bitsets(struct dm_regex * m)70*86d7f5d3SJohn Marino static void _create_bitsets(struct dm_regex *m)
71*86d7f5d3SJohn Marino {
72*86d7f5d3SJohn Marino int i;
73*86d7f5d3SJohn Marino
74*86d7f5d3SJohn Marino for (i = 0; i < m->num_nodes; i++) {
75*86d7f5d3SJohn Marino struct rx_node *n = m->nodes[i];
76*86d7f5d3SJohn Marino n->firstpos = dm_bitset_create(m->scratch, m->num_nodes);
77*86d7f5d3SJohn Marino n->lastpos = dm_bitset_create(m->scratch, m->num_nodes);
78*86d7f5d3SJohn Marino n->followpos = dm_bitset_create(m->scratch, m->num_nodes);
79*86d7f5d3SJohn Marino }
80*86d7f5d3SJohn Marino }
81*86d7f5d3SJohn Marino
_calc_functions(struct dm_regex * m)82*86d7f5d3SJohn Marino static void _calc_functions(struct dm_regex *m)
83*86d7f5d3SJohn Marino {
84*86d7f5d3SJohn Marino int i, j, final = 1;
85*86d7f5d3SJohn Marino struct rx_node *rx, *c1, *c2;
86*86d7f5d3SJohn Marino
87*86d7f5d3SJohn Marino for (i = 0; i < m->num_nodes; i++) {
88*86d7f5d3SJohn Marino rx = m->nodes[i];
89*86d7f5d3SJohn Marino c1 = rx->left;
90*86d7f5d3SJohn Marino c2 = rx->right;
91*86d7f5d3SJohn Marino
92*86d7f5d3SJohn Marino if (dm_bit(rx->charset, TARGET_TRANS))
93*86d7f5d3SJohn Marino rx->final = final++;
94*86d7f5d3SJohn Marino
95*86d7f5d3SJohn Marino switch (rx->type) {
96*86d7f5d3SJohn Marino case CAT:
97*86d7f5d3SJohn Marino if (c1->nullable)
98*86d7f5d3SJohn Marino dm_bit_union(rx->firstpos,
99*86d7f5d3SJohn Marino c1->firstpos, c2->firstpos);
100*86d7f5d3SJohn Marino else
101*86d7f5d3SJohn Marino dm_bit_copy(rx->firstpos, c1->firstpos);
102*86d7f5d3SJohn Marino
103*86d7f5d3SJohn Marino if (c2->nullable)
104*86d7f5d3SJohn Marino dm_bit_union(rx->lastpos,
105*86d7f5d3SJohn Marino c1->lastpos, c2->lastpos);
106*86d7f5d3SJohn Marino else
107*86d7f5d3SJohn Marino dm_bit_copy(rx->lastpos, c2->lastpos);
108*86d7f5d3SJohn Marino
109*86d7f5d3SJohn Marino rx->nullable = c1->nullable && c2->nullable;
110*86d7f5d3SJohn Marino break;
111*86d7f5d3SJohn Marino
112*86d7f5d3SJohn Marino case PLUS:
113*86d7f5d3SJohn Marino dm_bit_copy(rx->firstpos, c1->firstpos);
114*86d7f5d3SJohn Marino dm_bit_copy(rx->lastpos, c1->lastpos);
115*86d7f5d3SJohn Marino rx->nullable = c1->nullable;
116*86d7f5d3SJohn Marino break;
117*86d7f5d3SJohn Marino
118*86d7f5d3SJohn Marino case OR:
119*86d7f5d3SJohn Marino dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos);
120*86d7f5d3SJohn Marino dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos);
121*86d7f5d3SJohn Marino rx->nullable = c1->nullable || c2->nullable;
122*86d7f5d3SJohn Marino break;
123*86d7f5d3SJohn Marino
124*86d7f5d3SJohn Marino case QUEST:
125*86d7f5d3SJohn Marino case STAR:
126*86d7f5d3SJohn Marino dm_bit_copy(rx->firstpos, c1->firstpos);
127*86d7f5d3SJohn Marino dm_bit_copy(rx->lastpos, c1->lastpos);
128*86d7f5d3SJohn Marino rx->nullable = 1;
129*86d7f5d3SJohn Marino break;
130*86d7f5d3SJohn Marino
131*86d7f5d3SJohn Marino case CHARSET:
132*86d7f5d3SJohn Marino dm_bit_set(rx->firstpos, i);
133*86d7f5d3SJohn Marino dm_bit_set(rx->lastpos, i);
134*86d7f5d3SJohn Marino rx->nullable = 0;
135*86d7f5d3SJohn Marino break;
136*86d7f5d3SJohn Marino
137*86d7f5d3SJohn Marino default:
138*86d7f5d3SJohn Marino log_error("Internal error: Unknown calc node type");
139*86d7f5d3SJohn Marino }
140*86d7f5d3SJohn Marino
141*86d7f5d3SJohn Marino /*
142*86d7f5d3SJohn Marino * followpos has it's own switch
143*86d7f5d3SJohn Marino * because PLUS and STAR do the
144*86d7f5d3SJohn Marino * same thing.
145*86d7f5d3SJohn Marino */
146*86d7f5d3SJohn Marino switch (rx->type) {
147*86d7f5d3SJohn Marino case CAT:
148*86d7f5d3SJohn Marino for (j = 0; j < m->num_nodes; j++) {
149*86d7f5d3SJohn Marino if (dm_bit(c1->lastpos, j)) {
150*86d7f5d3SJohn Marino struct rx_node *n = m->nodes[j];
151*86d7f5d3SJohn Marino dm_bit_union(n->followpos,
152*86d7f5d3SJohn Marino n->followpos, c2->firstpos);
153*86d7f5d3SJohn Marino }
154*86d7f5d3SJohn Marino }
155*86d7f5d3SJohn Marino break;
156*86d7f5d3SJohn Marino
157*86d7f5d3SJohn Marino case PLUS:
158*86d7f5d3SJohn Marino case STAR:
159*86d7f5d3SJohn Marino for (j = 0; j < m->num_nodes; j++) {
160*86d7f5d3SJohn Marino if (dm_bit(rx->lastpos, j)) {
161*86d7f5d3SJohn Marino struct rx_node *n = m->nodes[j];
162*86d7f5d3SJohn Marino dm_bit_union(n->followpos,
163*86d7f5d3SJohn Marino n->followpos, rx->firstpos);
164*86d7f5d3SJohn Marino }
165*86d7f5d3SJohn Marino }
166*86d7f5d3SJohn Marino break;
167*86d7f5d3SJohn Marino }
168*86d7f5d3SJohn Marino }
169*86d7f5d3SJohn Marino }
170*86d7f5d3SJohn Marino
_create_dfa_state(struct dm_pool * mem)171*86d7f5d3SJohn Marino static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
172*86d7f5d3SJohn Marino {
173*86d7f5d3SJohn Marino return dm_pool_zalloc(mem, sizeof(struct dfa_state));
174*86d7f5d3SJohn Marino }
175*86d7f5d3SJohn Marino
_create_state_queue(struct dm_pool * mem,struct dfa_state * dfa,dm_bitset_t bits)176*86d7f5d3SJohn Marino static struct state_queue *_create_state_queue(struct dm_pool *mem,
177*86d7f5d3SJohn Marino struct dfa_state *dfa,
178*86d7f5d3SJohn Marino dm_bitset_t bits)
179*86d7f5d3SJohn Marino {
180*86d7f5d3SJohn Marino struct state_queue *r = dm_pool_alloc(mem, sizeof(*r));
181*86d7f5d3SJohn Marino
182*86d7f5d3SJohn Marino if (!r) {
183*86d7f5d3SJohn Marino stack;
184*86d7f5d3SJohn Marino return NULL;
185*86d7f5d3SJohn Marino }
186*86d7f5d3SJohn Marino
187*86d7f5d3SJohn Marino r->s = dfa;
188*86d7f5d3SJohn Marino r->bits = dm_bitset_create(mem, bits[0]); /* first element is the size */
189*86d7f5d3SJohn Marino dm_bit_copy(r->bits, bits);
190*86d7f5d3SJohn Marino r->next = 0;
191*86d7f5d3SJohn Marino return r;
192*86d7f5d3SJohn Marino }
193*86d7f5d3SJohn Marino
_calc_states(struct dm_regex * m,struct rx_node * rx)194*86d7f5d3SJohn Marino static int _calc_states(struct dm_regex *m, struct rx_node *rx)
195*86d7f5d3SJohn Marino {
196*86d7f5d3SJohn Marino unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1;
197*86d7f5d3SJohn Marino struct ttree *tt = ttree_create(m->scratch, iwidth);
198*86d7f5d3SJohn Marino struct state_queue *h, *t, *tmp;
199*86d7f5d3SJohn Marino struct dfa_state *dfa, *ldfa;
200*86d7f5d3SJohn Marino int i, a, set_bits = 0, count = 0;
201*86d7f5d3SJohn Marino dm_bitset_t bs, dfa_bits;
202*86d7f5d3SJohn Marino
203*86d7f5d3SJohn Marino if (!tt)
204*86d7f5d3SJohn Marino return_0;
205*86d7f5d3SJohn Marino
206*86d7f5d3SJohn Marino if (!(bs = dm_bitset_create(m->scratch, m->num_nodes)))
207*86d7f5d3SJohn Marino return_0;
208*86d7f5d3SJohn Marino
209*86d7f5d3SJohn Marino /* create first state */
210*86d7f5d3SJohn Marino dfa = _create_dfa_state(m->mem);
211*86d7f5d3SJohn Marino m->start = dfa;
212*86d7f5d3SJohn Marino ttree_insert(tt, rx->firstpos + 1, dfa);
213*86d7f5d3SJohn Marino
214*86d7f5d3SJohn Marino /* prime the queue */
215*86d7f5d3SJohn Marino h = t = _create_state_queue(m->scratch, dfa, rx->firstpos);
216*86d7f5d3SJohn Marino while (h) {
217*86d7f5d3SJohn Marino /* pop state off front of the queue */
218*86d7f5d3SJohn Marino dfa = h->s;
219*86d7f5d3SJohn Marino dfa_bits = h->bits;
220*86d7f5d3SJohn Marino h = h->next;
221*86d7f5d3SJohn Marino
222*86d7f5d3SJohn Marino /* iterate through all the inputs for this state */
223*86d7f5d3SJohn Marino dm_bit_clear_all(bs);
224*86d7f5d3SJohn Marino for (a = 0; a < 256; a++) {
225*86d7f5d3SJohn Marino /* iterate through all the states in firstpos */
226*86d7f5d3SJohn Marino for (i = dm_bit_get_first(dfa_bits);
227*86d7f5d3SJohn Marino i >= 0; i = dm_bit_get_next(dfa_bits, i)) {
228*86d7f5d3SJohn Marino if (dm_bit(m->nodes[i]->charset, a)) {
229*86d7f5d3SJohn Marino if (a == TARGET_TRANS)
230*86d7f5d3SJohn Marino dfa->final = m->nodes[i]->final;
231*86d7f5d3SJohn Marino
232*86d7f5d3SJohn Marino dm_bit_union(bs, bs,
233*86d7f5d3SJohn Marino m->nodes[i]->followpos);
234*86d7f5d3SJohn Marino set_bits = 1;
235*86d7f5d3SJohn Marino }
236*86d7f5d3SJohn Marino }
237*86d7f5d3SJohn Marino
238*86d7f5d3SJohn Marino if (set_bits) {
239*86d7f5d3SJohn Marino ldfa = ttree_lookup(tt, bs + 1);
240*86d7f5d3SJohn Marino if (!ldfa) {
241*86d7f5d3SJohn Marino /* push */
242*86d7f5d3SJohn Marino ldfa = _create_dfa_state(m->mem);
243*86d7f5d3SJohn Marino ttree_insert(tt, bs + 1, ldfa);
244*86d7f5d3SJohn Marino tmp =
245*86d7f5d3SJohn Marino _create_state_queue(m->scratch,
246*86d7f5d3SJohn Marino ldfa, bs);
247*86d7f5d3SJohn Marino if (!h)
248*86d7f5d3SJohn Marino h = t = tmp;
249*86d7f5d3SJohn Marino else {
250*86d7f5d3SJohn Marino t->next = tmp;
251*86d7f5d3SJohn Marino t = tmp;
252*86d7f5d3SJohn Marino }
253*86d7f5d3SJohn Marino
254*86d7f5d3SJohn Marino count++;
255*86d7f5d3SJohn Marino }
256*86d7f5d3SJohn Marino
257*86d7f5d3SJohn Marino dfa->lookup[a] = ldfa;
258*86d7f5d3SJohn Marino set_bits = 0;
259*86d7f5d3SJohn Marino dm_bit_clear_all(bs);
260*86d7f5d3SJohn Marino }
261*86d7f5d3SJohn Marino }
262*86d7f5d3SJohn Marino }
263*86d7f5d3SJohn Marino
264*86d7f5d3SJohn Marino log_debug("Matcher built with %d dfa states", count);
265*86d7f5d3SJohn Marino return 1;
266*86d7f5d3SJohn Marino }
267*86d7f5d3SJohn Marino
dm_regex_create(struct dm_pool * mem,const char ** patterns,unsigned num_patterns)268*86d7f5d3SJohn Marino struct dm_regex *dm_regex_create(struct dm_pool *mem, const char **patterns,
269*86d7f5d3SJohn Marino unsigned num_patterns)
270*86d7f5d3SJohn Marino {
271*86d7f5d3SJohn Marino char *all, *ptr;
272*86d7f5d3SJohn Marino int i;
273*86d7f5d3SJohn Marino size_t len = 0;
274*86d7f5d3SJohn Marino struct rx_node *rx;
275*86d7f5d3SJohn Marino struct dm_pool *scratch = dm_pool_create("regex matcher", 10 * 1024);
276*86d7f5d3SJohn Marino struct dm_regex *m;
277*86d7f5d3SJohn Marino
278*86d7f5d3SJohn Marino if (!scratch)
279*86d7f5d3SJohn Marino return_NULL;
280*86d7f5d3SJohn Marino
281*86d7f5d3SJohn Marino if (!(m = dm_pool_alloc(mem, sizeof(*m)))) {
282*86d7f5d3SJohn Marino dm_pool_destroy(scratch);
283*86d7f5d3SJohn Marino return_NULL;
284*86d7f5d3SJohn Marino }
285*86d7f5d3SJohn Marino
286*86d7f5d3SJohn Marino memset(m, 0, sizeof(*m));
287*86d7f5d3SJohn Marino
288*86d7f5d3SJohn Marino /* join the regexps together, delimiting with zero */
289*86d7f5d3SJohn Marino for (i = 0; i < num_patterns; i++)
290*86d7f5d3SJohn Marino len += strlen(patterns[i]) + 8;
291*86d7f5d3SJohn Marino
292*86d7f5d3SJohn Marino ptr = all = dm_pool_alloc(scratch, len + 1);
293*86d7f5d3SJohn Marino
294*86d7f5d3SJohn Marino if (!all)
295*86d7f5d3SJohn Marino goto_bad;
296*86d7f5d3SJohn Marino
297*86d7f5d3SJohn Marino for (i = 0; i < num_patterns; i++) {
298*86d7f5d3SJohn Marino ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS);
299*86d7f5d3SJohn Marino if (i < (num_patterns - 1))
300*86d7f5d3SJohn Marino *ptr++ = '|';
301*86d7f5d3SJohn Marino }
302*86d7f5d3SJohn Marino
303*86d7f5d3SJohn Marino /* parse this expression */
304*86d7f5d3SJohn Marino if (!(rx = rx_parse_tok(scratch, all, ptr))) {
305*86d7f5d3SJohn Marino log_error("Couldn't parse regex");
306*86d7f5d3SJohn Marino goto bad;
307*86d7f5d3SJohn Marino }
308*86d7f5d3SJohn Marino
309*86d7f5d3SJohn Marino m->mem = mem;
310*86d7f5d3SJohn Marino m->scratch = scratch;
311*86d7f5d3SJohn Marino m->num_nodes = _count_nodes(rx);
312*86d7f5d3SJohn Marino m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes);
313*86d7f5d3SJohn Marino
314*86d7f5d3SJohn Marino if (!m->nodes)
315*86d7f5d3SJohn Marino goto_bad;
316*86d7f5d3SJohn Marino
317*86d7f5d3SJohn Marino _fill_table(m, rx);
318*86d7f5d3SJohn Marino _create_bitsets(m);
319*86d7f5d3SJohn Marino _calc_functions(m);
320*86d7f5d3SJohn Marino _calc_states(m, rx);
321*86d7f5d3SJohn Marino dm_pool_destroy(scratch);
322*86d7f5d3SJohn Marino m->scratch = NULL;
323*86d7f5d3SJohn Marino
324*86d7f5d3SJohn Marino return m;
325*86d7f5d3SJohn Marino
326*86d7f5d3SJohn Marino bad:
327*86d7f5d3SJohn Marino dm_pool_destroy(scratch);
328*86d7f5d3SJohn Marino dm_pool_free(mem, m);
329*86d7f5d3SJohn Marino return NULL;
330*86d7f5d3SJohn Marino }
331*86d7f5d3SJohn Marino
_step_matcher(int c,struct dfa_state * cs,int * r)332*86d7f5d3SJohn Marino static struct dfa_state *_step_matcher(int c, struct dfa_state *cs, int *r)
333*86d7f5d3SJohn Marino {
334*86d7f5d3SJohn Marino if (!(cs = cs->lookup[(unsigned char) c]))
335*86d7f5d3SJohn Marino return NULL;
336*86d7f5d3SJohn Marino
337*86d7f5d3SJohn Marino if (cs->final && (cs->final > *r))
338*86d7f5d3SJohn Marino *r = cs->final;
339*86d7f5d3SJohn Marino
340*86d7f5d3SJohn Marino return cs;
341*86d7f5d3SJohn Marino }
342*86d7f5d3SJohn Marino
dm_regex_match(struct dm_regex * regex,const char * s)343*86d7f5d3SJohn Marino int dm_regex_match(struct dm_regex *regex, const char *s)
344*86d7f5d3SJohn Marino {
345*86d7f5d3SJohn Marino struct dfa_state *cs = regex->start;
346*86d7f5d3SJohn Marino int r = 0;
347*86d7f5d3SJohn Marino
348*86d7f5d3SJohn Marino if (!(cs = _step_matcher(HAT_CHAR, cs, &r)))
349*86d7f5d3SJohn Marino goto out;
350*86d7f5d3SJohn Marino
351*86d7f5d3SJohn Marino for (; *s; s++)
352*86d7f5d3SJohn Marino if (!(cs = _step_matcher(*s, cs, &r)))
353*86d7f5d3SJohn Marino goto out;
354*86d7f5d3SJohn Marino
355*86d7f5d3SJohn Marino _step_matcher(DOLLAR_CHAR, cs, &r);
356*86d7f5d3SJohn Marino
357*86d7f5d3SJohn Marino out:
358*86d7f5d3SJohn Marino /* subtract 1 to get back to zero index */
359*86d7f5d3SJohn Marino return r - 1;
360*86d7f5d3SJohn Marino }
361