xref: /netbsd-src/lib/libc/string/bm.c (revision 11ea949f802a9ecce7dabf7e55ac219bebe04fe4)
1*11ea949fSshm /*	$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $	*/
23d2267c8Schristos 
37f79e48cScgd /*-
47f79e48cScgd  * Copyright (c) 1994
57f79e48cScgd  *	The Regents of the University of California.  All rights reserved.
67f79e48cScgd  *
77f79e48cScgd  * This code is derived from software contributed to Berkeley by
87f79e48cScgd  * Andrew Hume of AT&T Bell Laboratories.
97f79e48cScgd  *
107f79e48cScgd  * Redistribution and use in source and binary forms, with or without
117f79e48cScgd  * modification, are permitted provided that the following conditions
127f79e48cScgd  * are met:
137f79e48cScgd  * 1. Redistributions of source code must retain the above copyright
147f79e48cScgd  *    notice, this list of conditions and the following disclaimer.
157f79e48cScgd  * 2. Redistributions in binary form must reproduce the above copyright
167f79e48cScgd  *    notice, this list of conditions and the following disclaimer in the
177f79e48cScgd  *    documentation and/or other materials provided with the distribution.
18eb7c1594Sagc  * 3. Neither the name of the University nor the names of its contributors
197f79e48cScgd  *    may be used to endorse or promote products derived from this software
207f79e48cScgd  *    without specific prior written permission.
217f79e48cScgd  *
227f79e48cScgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
237f79e48cScgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
247f79e48cScgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
257f79e48cScgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
267f79e48cScgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
277f79e48cScgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
287f79e48cScgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
297f79e48cScgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
307f79e48cScgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
317f79e48cScgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
327f79e48cScgd  * SUCH DAMAGE.
337f79e48cScgd  */
347f79e48cScgd 
353d2267c8Schristos #include <sys/cdefs.h>
36fdde8d33Sjtc #if defined(LIBC_SCCS) && !defined(lint)
373d2267c8Schristos #if 0
383d2267c8Schristos static char sccsid[] = "@(#)bm.c	8.7 (Berkeley) 6/21/94";
393d2267c8Schristos #else
40*11ea949fSshm __RCSID("$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $");
413d2267c8Schristos #endif
42fdde8d33Sjtc #endif /* LIBC_SCCS && not lint */
437f79e48cScgd 
4443fa6fe3Sjtc #include "namespace.h"
457f79e48cScgd #include <sys/types.h>
467f79e48cScgd 
47b48252f3Slukem #include <assert.h>
487f79e48cScgd #include <bm.h>
497f79e48cScgd #include <errno.h>
507f79e48cScgd #include <stdlib.h>
517f79e48cScgd #include <string.h>
527f79e48cScgd 
5343fa6fe3Sjtc #ifdef __weak_alias
5460549036Smycroft __weak_alias(bm_comp,_bm_comp)
5560549036Smycroft __weak_alias(bm_exec,_bm_exec)
5660549036Smycroft __weak_alias(bm_free,_bm_free)
5743fa6fe3Sjtc #endif
5843fa6fe3Sjtc 
597f79e48cScgd /*
607f79e48cScgd  * XXX
617f79e48cScgd  * The default frequency table starts at 99 and counts down.  The default
627f79e48cScgd  * table should probably be oriented toward text, and will necessarily be
637f79e48cScgd  * locale specific.  This one is for English.  It was derived from the
647f79e48cScgd  * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
657f79e48cScgd  * of email and random text.  Change it if you can find something better.
667f79e48cScgd  */
677f79e48cScgd static u_char const freq_def[256] = {
687f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
697f79e48cScgd 	 0, 77, 90,  0,  0,  0,  0,  0,
707f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
717f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
727f79e48cScgd 	99, 28, 42, 27, 16, 14, 20, 51,
737f79e48cScgd 	66, 65, 59, 24, 75, 76, 84, 56,
747f79e48cScgd 	72, 74, 64, 55, 54, 47, 41, 37,
757f79e48cScgd 	44, 61, 70, 43, 23, 53, 49, 22,
767f79e48cScgd 	33, 58, 40, 46, 45, 57, 60, 26,
777f79e48cScgd 	30, 63, 21, 12, 32, 50, 38, 39,
787f79e48cScgd 	34, 11, 48, 67, 62, 35, 15, 29,
797f79e48cScgd 	71, 18,  9, 17, 25, 13, 10, 52,
807f79e48cScgd 	36, 95, 78, 86, 87, 98, 82, 80,
817f79e48cScgd 	88, 94, 19, 68, 89, 83, 93, 96,
827f79e48cScgd 	81,  7, 91, 92, 97, 85, 69, 73,
837f79e48cScgd 	31, 79,  8,  5,  4,  6,  3,  0,
847f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
857f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
867f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
877f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
887f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
897f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
907f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
917f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
927f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
937f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
947f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
957f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
967f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
977f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
987f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
997f79e48cScgd 	 0,  0,  0,  0,  0,  0,  0,  0,
1007f79e48cScgd };
1017f79e48cScgd 
1027f79e48cScgd bm_pat *
bm_comp(u_char const * pb,size_t len,u_char const * freq)1039e66e6d7Sabs bm_comp(u_char const *pb, size_t len, u_char const *freq)
1047f79e48cScgd {
10574174020Sperry 	u_char const *pe, *p;
10674174020Sperry 	size_t *d, r;
10774174020Sperry 	int j;
1087f79e48cScgd 	int sv_errno;
1097f79e48cScgd 	bm_pat *pat;
1107f79e48cScgd 
111b48252f3Slukem 	_DIAGASSERT(pb != NULL);
112b48252f3Slukem 	/* freq may be NULL */
113b48252f3Slukem 
1147f79e48cScgd 	if (len == 0) {
1157f79e48cScgd 		errno = EINVAL;
1167f79e48cScgd 		return (NULL);
1177f79e48cScgd 	}
1187f79e48cScgd 	if ((pat = malloc(sizeof(*pat))) == NULL)
1197f79e48cScgd 		return (NULL);
1207f79e48cScgd 	pat->pat = NULL;
1217f79e48cScgd 	pat->delta = NULL;
1227f79e48cScgd 
1237f79e48cScgd 	pat->patlen = len;			/* copy pattern */
1247f79e48cScgd 	if ((pat->pat = malloc(pat->patlen)) == NULL)
1257f79e48cScgd 		goto mem;
1267f79e48cScgd 	memcpy(pat->pat, pb, pat->patlen);
1277f79e48cScgd 						/* get skip delta */
1287f79e48cScgd 	if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
1297f79e48cScgd 		goto mem;
1307f79e48cScgd 	for (j = 0, d = pat->delta; j < 256; j++)
1317f79e48cScgd 		d[j] = pat->patlen;
1327f79e48cScgd 	for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
1337f79e48cScgd 		d[*pb] = pe - pb;
1347f79e48cScgd 
1357f79e48cScgd 	if (freq == NULL)			/* default freq table */
1367f79e48cScgd 		freq = freq_def;
1377f79e48cScgd 	r = 0;					/* get guard */
1387f79e48cScgd 	for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
1397f79e48cScgd 		if (freq[*pb] < freq[pat->pat[r]])
1407f79e48cScgd 			r = pb - pat->pat;
1417f79e48cScgd 	pat->rarec = pat->pat[r];
1427f79e48cScgd 	pat->rareoff = r - (pat->patlen - 1);
1437f79e48cScgd 
1447f79e48cScgd 						/* get md2 shift */
1457f79e48cScgd 	for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
1467f79e48cScgd 		if (*p == *pe)
1477f79e48cScgd 			break;
1487f79e48cScgd 
1497f79e48cScgd 	/* *p is first leftward reoccurrence of *pe */
1507f79e48cScgd 	pat->md2 = pe - p;
1517f79e48cScgd 	return (pat);
1527f79e48cScgd 
1537f79e48cScgd mem:	sv_errno = errno;
1547f79e48cScgd 	bm_free(pat);
1557f79e48cScgd 	errno = sv_errno;
1567f79e48cScgd 	return (NULL);
1577f79e48cScgd }
1587f79e48cScgd 
1597f79e48cScgd void
bm_free(bm_pat * pat)1609e66e6d7Sabs bm_free(bm_pat *pat)
1617f79e48cScgd {
162b48252f3Slukem 
163b48252f3Slukem 	_DIAGASSERT(pat != NULL);
164b48252f3Slukem 
1657f79e48cScgd 	free(pat->pat);
1667f79e48cScgd 	free(pat->delta);
1677f79e48cScgd 	free(pat);
1687f79e48cScgd }
1697f79e48cScgd 
1707f79e48cScgd u_char *
bm_exec(bm_pat * pat,u_char * base,size_t n)1719e66e6d7Sabs bm_exec(bm_pat *pat, u_char *base, size_t n)
1727f79e48cScgd {
17374174020Sperry 	u_char *e, *ep, *p, *q, *s;
17474174020Sperry 	size_t *d0, k, md2, n1, ro;
17574174020Sperry 	int rc;
1767f79e48cScgd 
177b48252f3Slukem 	_DIAGASSERT(pat != NULL);
178b48252f3Slukem 	_DIAGASSERT(base != NULL);
179b48252f3Slukem 
1807f79e48cScgd 	if (n == 0)
1817f79e48cScgd 		return (NULL);
1827f79e48cScgd 
1837f79e48cScgd 	d0 = pat->delta;
1847f79e48cScgd 	n1 = pat->patlen - 1;
1857f79e48cScgd 	md2 = pat->md2;
1867f79e48cScgd 	ro = pat->rareoff;
1877f79e48cScgd 	rc = pat->rarec;
1887f79e48cScgd 	ep = pat->pat + pat->patlen - 1;
1897f79e48cScgd 	s = base + (pat->patlen - 1);
1907f79e48cScgd 
1917f79e48cScgd 	/* fast loop up to n - 3 * patlen */
1927f79e48cScgd 	e = base + n - 3 * pat->patlen;
1937f79e48cScgd 	while (s < e) {
1947f79e48cScgd 		k = d0[*s];		/* ufast skip loop */
195*11ea949fSshm 		while (k && s < e) {
1967f79e48cScgd 			k = d0[*(s += k)];
1977f79e48cScgd 			k = d0[*(s += k)];
1987f79e48cScgd 		}
1997f79e48cScgd 		if (s >= e)
2007f79e48cScgd 			break;
2017f79e48cScgd 		if (s[ro] != rc)	/* guard test */
2027f79e48cScgd 			goto mismatch1;
2037f79e48cScgd 					/* fwd match */
2047f79e48cScgd 		for (p = pat->pat, q = s - n1; p < ep;)
2057f79e48cScgd 			if (*q++ != *p++)
2067f79e48cScgd 				goto mismatch1;
2077f79e48cScgd 		return (s - n1);
2087f79e48cScgd 
2097f79e48cScgd mismatch1:	s += md2;		/* md2 shift */
2107f79e48cScgd 	}
2117f79e48cScgd 
2127f79e48cScgd 	/* slow loop up to end */
2137f79e48cScgd 	e = base + n;
2147f79e48cScgd 	while (s < e) {
2157f79e48cScgd 		s += d0[*s];		/* step */
2167f79e48cScgd 		if (s >= e)
2177f79e48cScgd 			break;
2187f79e48cScgd 		if (s[ro] != rc)	/* guard test */
2197f79e48cScgd 			goto mismatch2;
2207f79e48cScgd 					/* fwd match */
2213c5606e4Scgd 		for (p = pat->pat, q = s - n1; p <= ep;)
2227f79e48cScgd 			if (*q++ != *p++)
2237f79e48cScgd 				goto mismatch2;
2247f79e48cScgd 		return (s - n1);
2257f79e48cScgd 
2267f79e48cScgd mismatch2:	s += md2;		/* md2 shift */
2277f79e48cScgd 	}
2287f79e48cScgd 
2297f79e48cScgd 	return (NULL);
2307f79e48cScgd }
231