1*11ea949fSshm /* $NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $ */
23d2267c8Schristos
37f79e48cScgd /*-
47f79e48cScgd * Copyright (c) 1994
57f79e48cScgd * The Regents of the University of California. All rights reserved.
67f79e48cScgd *
77f79e48cScgd * This code is derived from software contributed to Berkeley by
87f79e48cScgd * Andrew Hume of AT&T Bell Laboratories.
97f79e48cScgd *
107f79e48cScgd * Redistribution and use in source and binary forms, with or without
117f79e48cScgd * modification, are permitted provided that the following conditions
127f79e48cScgd * are met:
137f79e48cScgd * 1. Redistributions of source code must retain the above copyright
147f79e48cScgd * notice, this list of conditions and the following disclaimer.
157f79e48cScgd * 2. Redistributions in binary form must reproduce the above copyright
167f79e48cScgd * notice, this list of conditions and the following disclaimer in the
177f79e48cScgd * documentation and/or other materials provided with the distribution.
18eb7c1594Sagc * 3. Neither the name of the University nor the names of its contributors
197f79e48cScgd * may be used to endorse or promote products derived from this software
207f79e48cScgd * without specific prior written permission.
217f79e48cScgd *
227f79e48cScgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
237f79e48cScgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
247f79e48cScgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
257f79e48cScgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
267f79e48cScgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
277f79e48cScgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
287f79e48cScgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
297f79e48cScgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
307f79e48cScgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
317f79e48cScgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
327f79e48cScgd * SUCH DAMAGE.
337f79e48cScgd */
347f79e48cScgd
353d2267c8Schristos #include <sys/cdefs.h>
36fdde8d33Sjtc #if defined(LIBC_SCCS) && !defined(lint)
373d2267c8Schristos #if 0
383d2267c8Schristos static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94";
393d2267c8Schristos #else
40*11ea949fSshm __RCSID("$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $");
413d2267c8Schristos #endif
42fdde8d33Sjtc #endif /* LIBC_SCCS && not lint */
437f79e48cScgd
4443fa6fe3Sjtc #include "namespace.h"
457f79e48cScgd #include <sys/types.h>
467f79e48cScgd
47b48252f3Slukem #include <assert.h>
487f79e48cScgd #include <bm.h>
497f79e48cScgd #include <errno.h>
507f79e48cScgd #include <stdlib.h>
517f79e48cScgd #include <string.h>
527f79e48cScgd
5343fa6fe3Sjtc #ifdef __weak_alias
5460549036Smycroft __weak_alias(bm_comp,_bm_comp)
5560549036Smycroft __weak_alias(bm_exec,_bm_exec)
5660549036Smycroft __weak_alias(bm_free,_bm_free)
5743fa6fe3Sjtc #endif
5843fa6fe3Sjtc
597f79e48cScgd /*
607f79e48cScgd * XXX
617f79e48cScgd * The default frequency table starts at 99 and counts down. The default
627f79e48cScgd * table should probably be oriented toward text, and will necessarily be
637f79e48cScgd * locale specific. This one is for English. It was derived from the
647f79e48cScgd * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
657f79e48cScgd * of email and random text. Change it if you can find something better.
667f79e48cScgd */
677f79e48cScgd static u_char const freq_def[256] = {
687f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
697f79e48cScgd 0, 77, 90, 0, 0, 0, 0, 0,
707f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
717f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
727f79e48cScgd 99, 28, 42, 27, 16, 14, 20, 51,
737f79e48cScgd 66, 65, 59, 24, 75, 76, 84, 56,
747f79e48cScgd 72, 74, 64, 55, 54, 47, 41, 37,
757f79e48cScgd 44, 61, 70, 43, 23, 53, 49, 22,
767f79e48cScgd 33, 58, 40, 46, 45, 57, 60, 26,
777f79e48cScgd 30, 63, 21, 12, 32, 50, 38, 39,
787f79e48cScgd 34, 11, 48, 67, 62, 35, 15, 29,
797f79e48cScgd 71, 18, 9, 17, 25, 13, 10, 52,
807f79e48cScgd 36, 95, 78, 86, 87, 98, 82, 80,
817f79e48cScgd 88, 94, 19, 68, 89, 83, 93, 96,
827f79e48cScgd 81, 7, 91, 92, 97, 85, 69, 73,
837f79e48cScgd 31, 79, 8, 5, 4, 6, 3, 0,
847f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
857f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
867f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
877f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
887f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
897f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
907f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
917f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
927f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
937f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
947f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
957f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
967f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
977f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
987f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
997f79e48cScgd 0, 0, 0, 0, 0, 0, 0, 0,
1007f79e48cScgd };
1017f79e48cScgd
1027f79e48cScgd bm_pat *
bm_comp(u_char const * pb,size_t len,u_char const * freq)1039e66e6d7Sabs bm_comp(u_char const *pb, size_t len, u_char const *freq)
1047f79e48cScgd {
10574174020Sperry u_char const *pe, *p;
10674174020Sperry size_t *d, r;
10774174020Sperry int j;
1087f79e48cScgd int sv_errno;
1097f79e48cScgd bm_pat *pat;
1107f79e48cScgd
111b48252f3Slukem _DIAGASSERT(pb != NULL);
112b48252f3Slukem /* freq may be NULL */
113b48252f3Slukem
1147f79e48cScgd if (len == 0) {
1157f79e48cScgd errno = EINVAL;
1167f79e48cScgd return (NULL);
1177f79e48cScgd }
1187f79e48cScgd if ((pat = malloc(sizeof(*pat))) == NULL)
1197f79e48cScgd return (NULL);
1207f79e48cScgd pat->pat = NULL;
1217f79e48cScgd pat->delta = NULL;
1227f79e48cScgd
1237f79e48cScgd pat->patlen = len; /* copy pattern */
1247f79e48cScgd if ((pat->pat = malloc(pat->patlen)) == NULL)
1257f79e48cScgd goto mem;
1267f79e48cScgd memcpy(pat->pat, pb, pat->patlen);
1277f79e48cScgd /* get skip delta */
1287f79e48cScgd if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
1297f79e48cScgd goto mem;
1307f79e48cScgd for (j = 0, d = pat->delta; j < 256; j++)
1317f79e48cScgd d[j] = pat->patlen;
1327f79e48cScgd for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
1337f79e48cScgd d[*pb] = pe - pb;
1347f79e48cScgd
1357f79e48cScgd if (freq == NULL) /* default freq table */
1367f79e48cScgd freq = freq_def;
1377f79e48cScgd r = 0; /* get guard */
1387f79e48cScgd for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
1397f79e48cScgd if (freq[*pb] < freq[pat->pat[r]])
1407f79e48cScgd r = pb - pat->pat;
1417f79e48cScgd pat->rarec = pat->pat[r];
1427f79e48cScgd pat->rareoff = r - (pat->patlen - 1);
1437f79e48cScgd
1447f79e48cScgd /* get md2 shift */
1457f79e48cScgd for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
1467f79e48cScgd if (*p == *pe)
1477f79e48cScgd break;
1487f79e48cScgd
1497f79e48cScgd /* *p is first leftward reoccurrence of *pe */
1507f79e48cScgd pat->md2 = pe - p;
1517f79e48cScgd return (pat);
1527f79e48cScgd
1537f79e48cScgd mem: sv_errno = errno;
1547f79e48cScgd bm_free(pat);
1557f79e48cScgd errno = sv_errno;
1567f79e48cScgd return (NULL);
1577f79e48cScgd }
1587f79e48cScgd
1597f79e48cScgd void
bm_free(bm_pat * pat)1609e66e6d7Sabs bm_free(bm_pat *pat)
1617f79e48cScgd {
162b48252f3Slukem
163b48252f3Slukem _DIAGASSERT(pat != NULL);
164b48252f3Slukem
1657f79e48cScgd free(pat->pat);
1667f79e48cScgd free(pat->delta);
1677f79e48cScgd free(pat);
1687f79e48cScgd }
1697f79e48cScgd
1707f79e48cScgd u_char *
bm_exec(bm_pat * pat,u_char * base,size_t n)1719e66e6d7Sabs bm_exec(bm_pat *pat, u_char *base, size_t n)
1727f79e48cScgd {
17374174020Sperry u_char *e, *ep, *p, *q, *s;
17474174020Sperry size_t *d0, k, md2, n1, ro;
17574174020Sperry int rc;
1767f79e48cScgd
177b48252f3Slukem _DIAGASSERT(pat != NULL);
178b48252f3Slukem _DIAGASSERT(base != NULL);
179b48252f3Slukem
1807f79e48cScgd if (n == 0)
1817f79e48cScgd return (NULL);
1827f79e48cScgd
1837f79e48cScgd d0 = pat->delta;
1847f79e48cScgd n1 = pat->patlen - 1;
1857f79e48cScgd md2 = pat->md2;
1867f79e48cScgd ro = pat->rareoff;
1877f79e48cScgd rc = pat->rarec;
1887f79e48cScgd ep = pat->pat + pat->patlen - 1;
1897f79e48cScgd s = base + (pat->patlen - 1);
1907f79e48cScgd
1917f79e48cScgd /* fast loop up to n - 3 * patlen */
1927f79e48cScgd e = base + n - 3 * pat->patlen;
1937f79e48cScgd while (s < e) {
1947f79e48cScgd k = d0[*s]; /* ufast skip loop */
195*11ea949fSshm while (k && s < e) {
1967f79e48cScgd k = d0[*(s += k)];
1977f79e48cScgd k = d0[*(s += k)];
1987f79e48cScgd }
1997f79e48cScgd if (s >= e)
2007f79e48cScgd break;
2017f79e48cScgd if (s[ro] != rc) /* guard test */
2027f79e48cScgd goto mismatch1;
2037f79e48cScgd /* fwd match */
2047f79e48cScgd for (p = pat->pat, q = s - n1; p < ep;)
2057f79e48cScgd if (*q++ != *p++)
2067f79e48cScgd goto mismatch1;
2077f79e48cScgd return (s - n1);
2087f79e48cScgd
2097f79e48cScgd mismatch1: s += md2; /* md2 shift */
2107f79e48cScgd }
2117f79e48cScgd
2127f79e48cScgd /* slow loop up to end */
2137f79e48cScgd e = base + n;
2147f79e48cScgd while (s < e) {
2157f79e48cScgd s += d0[*s]; /* step */
2167f79e48cScgd if (s >= e)
2177f79e48cScgd break;
2187f79e48cScgd if (s[ro] != rc) /* guard test */
2197f79e48cScgd goto mismatch2;
2207f79e48cScgd /* fwd match */
2213c5606e4Scgd for (p = pat->pat, q = s - n1; p <= ep;)
2227f79e48cScgd if (*q++ != *p++)
2237f79e48cScgd goto mismatch2;
2247f79e48cScgd return (s - n1);
2257f79e48cScgd
2267f79e48cScgd mismatch2: s += md2; /* md2 shift */
2277f79e48cScgd }
2287f79e48cScgd
2297f79e48cScgd return (NULL);
2307f79e48cScgd }
231