15ea9e707SThomas Veerman /****************************************************************
25ea9e707SThomas Veerman Copyright (C) Lucent Technologies 1997
35ea9e707SThomas Veerman All Rights Reserved
45ea9e707SThomas Veerman
55ea9e707SThomas Veerman Permission to use, copy, modify, and distribute this software and
65ea9e707SThomas Veerman its documentation for any purpose and without fee is hereby
75ea9e707SThomas Veerman granted, provided that the above copyright notice appear in all
85ea9e707SThomas Veerman copies and that both that the copyright notice and this
95ea9e707SThomas Veerman permission notice and warranty disclaimer appear in supporting
105ea9e707SThomas Veerman documentation, and that the name Lucent Technologies or any of
115ea9e707SThomas Veerman its entities not be used in advertising or publicity pertaining
125ea9e707SThomas Veerman to distribution of the software without specific, written prior
135ea9e707SThomas Veerman permission.
145ea9e707SThomas Veerman
155ea9e707SThomas Veerman LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
165ea9e707SThomas Veerman INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
175ea9e707SThomas Veerman IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
185ea9e707SThomas Veerman SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
195ea9e707SThomas Veerman WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
205ea9e707SThomas Veerman IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
215ea9e707SThomas Veerman ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
225ea9e707SThomas Veerman THIS SOFTWARE.
235ea9e707SThomas Veerman ****************************************************************/
245ea9e707SThomas Veerman
255ea9e707SThomas Veerman /* lasciate ogne speranza, voi ch'intrate. */
265ea9e707SThomas Veerman
275ea9e707SThomas Veerman #if HAVE_NBTOOL_CONFIG_H
285ea9e707SThomas Veerman #include "nbtool_config.h"
295ea9e707SThomas Veerman #endif
305ea9e707SThomas Veerman
315ea9e707SThomas Veerman #define DEBUG
325ea9e707SThomas Veerman
335ea9e707SThomas Veerman #include <ctype.h>
345ea9e707SThomas Veerman #include <stdio.h>
355ea9e707SThomas Veerman #include <string.h>
365ea9e707SThomas Veerman #include <stdlib.h>
375ea9e707SThomas Veerman #include <assert.h>
385ea9e707SThomas Veerman #include "awk.h"
395ea9e707SThomas Veerman #include "awkgram.h"
405ea9e707SThomas Veerman
415ea9e707SThomas Veerman #define HAT (NCHARS+2) /* matches ^ in regular expr */
425ea9e707SThomas Veerman /* NCHARS is 2**n */
435ea9e707SThomas Veerman #define MAXLIN 22
445ea9e707SThomas Veerman
455ea9e707SThomas Veerman #define type(v) (v)->nobj /* badly overloaded here */
465ea9e707SThomas Veerman #define info(v) (v)->ntype /* badly overloaded here */
475ea9e707SThomas Veerman #define left(v) (v)->narg[0]
485ea9e707SThomas Veerman #define right(v) (v)->narg[1]
495ea9e707SThomas Veerman #define parent(v) (v)->nnext
505ea9e707SThomas Veerman
515ea9e707SThomas Veerman #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
525ea9e707SThomas Veerman #define ELEAF case EMPTYRE: /* empty string in regexp */
535ea9e707SThomas Veerman #define UNARY case STAR: case PLUS: case QUEST:
545ea9e707SThomas Veerman
555ea9e707SThomas Veerman /* encoding in tree Nodes:
565ea9e707SThomas Veerman leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
575ea9e707SThomas Veerman left is index, right contains value or pointer to value
585ea9e707SThomas Veerman unary (STAR, PLUS, QUEST): left is child, right is null
595ea9e707SThomas Veerman binary (CAT, OR): left and right are children
605ea9e707SThomas Veerman parent contains pointer to parent
615ea9e707SThomas Veerman */
625ea9e707SThomas Veerman
635ea9e707SThomas Veerman
645ea9e707SThomas Veerman int *setvec;
655ea9e707SThomas Veerman int *tmpset;
665ea9e707SThomas Veerman int maxsetvec = 0;
675ea9e707SThomas Veerman
685ea9e707SThomas Veerman int rtok; /* next token in current re */
695ea9e707SThomas Veerman int rlxval;
705ea9e707SThomas Veerman static const uschar *rlxstr;
715ea9e707SThomas Veerman static const uschar *prestr; /* current position in current re */
725ea9e707SThomas Veerman static const uschar *lastre; /* origin of last re */
735ea9e707SThomas Veerman
745ea9e707SThomas Veerman static int setcnt;
755ea9e707SThomas Veerman static int poscnt;
765ea9e707SThomas Veerman
775ea9e707SThomas Veerman uschar *patbeg;
785ea9e707SThomas Veerman int patlen;
795ea9e707SThomas Veerman
805ea9e707SThomas Veerman #define NFA 128 /* cache this many dynamic fa's */
815ea9e707SThomas Veerman fa *fatab[NFA];
825ea9e707SThomas Veerman int nfatab = 0; /* entries in fatab */
835ea9e707SThomas Veerman
845ea9e707SThomas Veerman static void
resizesetvec(const char * msg)855ea9e707SThomas Veerman resizesetvec(const char *msg)
865ea9e707SThomas Veerman {
875ea9e707SThomas Veerman if (maxsetvec == 0)
885ea9e707SThomas Veerman maxsetvec = MAXLIN;
895ea9e707SThomas Veerman else
905ea9e707SThomas Veerman maxsetvec *= 4;
915ea9e707SThomas Veerman setvec = realloc(setvec, maxsetvec * sizeof(*setvec));
925ea9e707SThomas Veerman tmpset = realloc(tmpset, maxsetvec * sizeof(*tmpset));
935ea9e707SThomas Veerman if (setvec == 0 || tmpset == 0)
945ea9e707SThomas Veerman overflo(msg);
955ea9e707SThomas Veerman }
965ea9e707SThomas Veerman
975ea9e707SThomas Veerman static void
resize_state(fa * f,int state)985ea9e707SThomas Veerman resize_state(fa *f, int state)
995ea9e707SThomas Veerman {
1005ea9e707SThomas Veerman void *p;
1015ea9e707SThomas Veerman int i, new_count;
1025ea9e707SThomas Veerman
1035ea9e707SThomas Veerman if (++state < f->state_count)
1045ea9e707SThomas Veerman return;
1055ea9e707SThomas Veerman
1065ea9e707SThomas Veerman new_count = state + 10; /* needs to be tuned */
1075ea9e707SThomas Veerman
1085ea9e707SThomas Veerman p = realloc(f->gototab, new_count * sizeof(f->gototab[0]));
1095ea9e707SThomas Veerman if (p == NULL)
1105ea9e707SThomas Veerman goto out;
1115ea9e707SThomas Veerman f->gototab = p;
1125ea9e707SThomas Veerman
1135ea9e707SThomas Veerman p = realloc(f->out, new_count * sizeof(f->out[0]));
1145ea9e707SThomas Veerman if (p == NULL)
1155ea9e707SThomas Veerman goto out;
1165ea9e707SThomas Veerman f->out = p;
1175ea9e707SThomas Veerman
1185ea9e707SThomas Veerman p = realloc(f->posns, new_count * sizeof(f->posns[0]));
1195ea9e707SThomas Veerman if (p == NULL)
1205ea9e707SThomas Veerman goto out;
1215ea9e707SThomas Veerman f->posns = p;
1225ea9e707SThomas Veerman
1235ea9e707SThomas Veerman for (i = f->state_count; i < new_count; ++i) {
1245ea9e707SThomas Veerman f->gototab[i] = calloc(1, NCHARS * sizeof (**f->gototab));
1255ea9e707SThomas Veerman if (f->gototab[i] == NULL)
1265ea9e707SThomas Veerman goto out;
1275ea9e707SThomas Veerman f->out[i] = 0;
1285ea9e707SThomas Veerman f->posns[i] = NULL;
1295ea9e707SThomas Veerman }
1305ea9e707SThomas Veerman f->state_count = new_count;
1315ea9e707SThomas Veerman return;
1325ea9e707SThomas Veerman out:
1335ea9e707SThomas Veerman overflo("out of memory in resize_state");
1345ea9e707SThomas Veerman }
1355ea9e707SThomas Veerman
makedfa(const char * s,int anchor)1365ea9e707SThomas Veerman fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
1375ea9e707SThomas Veerman {
1385ea9e707SThomas Veerman int i, use, nuse;
1395ea9e707SThomas Veerman fa *pfa;
1405ea9e707SThomas Veerman static int now = 1;
1415ea9e707SThomas Veerman
1425ea9e707SThomas Veerman if (setvec == 0) /* first time through any RE */
1435ea9e707SThomas Veerman resizesetvec("out of space initializing makedfa");
1445ea9e707SThomas Veerman
1455ea9e707SThomas Veerman if (compile_time) /* a constant for sure */
1465ea9e707SThomas Veerman return mkdfa(s, anchor);
1475ea9e707SThomas Veerman for (i = 0; i < nfatab; i++) /* is it there already? */
1485ea9e707SThomas Veerman if (fatab[i]->anchor == anchor
1495ea9e707SThomas Veerman && strcmp((const char *) fatab[i]->restr, s) == 0) {
1505ea9e707SThomas Veerman fatab[i]->use = now++;
1515ea9e707SThomas Veerman return fatab[i];
1525ea9e707SThomas Veerman }
1535ea9e707SThomas Veerman pfa = mkdfa(s, anchor);
1545ea9e707SThomas Veerman if (nfatab < NFA) { /* room for another */
1555ea9e707SThomas Veerman fatab[nfatab] = pfa;
1565ea9e707SThomas Veerman fatab[nfatab]->use = now++;
1575ea9e707SThomas Veerman nfatab++;
1585ea9e707SThomas Veerman return pfa;
1595ea9e707SThomas Veerman }
1605ea9e707SThomas Veerman use = fatab[0]->use; /* replace least-recently used */
1615ea9e707SThomas Veerman nuse = 0;
1625ea9e707SThomas Veerman for (i = 1; i < nfatab; i++)
1635ea9e707SThomas Veerman if (fatab[i]->use < use) {
1645ea9e707SThomas Veerman use = fatab[i]->use;
1655ea9e707SThomas Veerman nuse = i;
1665ea9e707SThomas Veerman }
1675ea9e707SThomas Veerman freefa(fatab[nuse]);
1685ea9e707SThomas Veerman fatab[nuse] = pfa;
1695ea9e707SThomas Veerman pfa->use = now++;
1705ea9e707SThomas Veerman return pfa;
1715ea9e707SThomas Veerman }
1725ea9e707SThomas Veerman
mkdfa(const char * s,int anchor)1735ea9e707SThomas Veerman fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
1745ea9e707SThomas Veerman /* anchor = 1 for anchored matches, else 0 */
1755ea9e707SThomas Veerman {
1765ea9e707SThomas Veerman Node *p, *p1;
1775ea9e707SThomas Veerman fa *f;
1785ea9e707SThomas Veerman
1795ea9e707SThomas Veerman p = reparse(s);
1805ea9e707SThomas Veerman p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1815ea9e707SThomas Veerman /* put ALL STAR in front of reg. exp. */
1825ea9e707SThomas Veerman p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1835ea9e707SThomas Veerman /* put FINAL after reg. exp. */
1845ea9e707SThomas Veerman
1855ea9e707SThomas Veerman poscnt = 0;
1865ea9e707SThomas Veerman penter(p1); /* enter parent pointers and leaf indices */
1875ea9e707SThomas Veerman if ((f = calloc(1, sizeof(*f) + poscnt*sizeof(rrow))) == NULL)
1885ea9e707SThomas Veerman overflo("out of space for fa");
1895ea9e707SThomas Veerman f->accept = poscnt-1; /* penter has computed number of positions in re */
1905ea9e707SThomas Veerman cfoll(f, p1); /* set up follow sets */
1915ea9e707SThomas Veerman freetr(p1);
1925ea9e707SThomas Veerman resize_state(f, 1);
1935ea9e707SThomas Veerman if ((f->posns[0] = calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
1945ea9e707SThomas Veerman overflo("out of space in makedfa");
1955ea9e707SThomas Veerman if ((f->posns[1] = calloc(1, sizeof(int))) == NULL)
1965ea9e707SThomas Veerman overflo("out of space in makedfa");
1975ea9e707SThomas Veerman *f->posns[1] = 0;
1985ea9e707SThomas Veerman f->initstat = makeinit(f, anchor);
1995ea9e707SThomas Veerman f->anchor = anchor;
2005ea9e707SThomas Veerman f->restr = (uschar *) tostring(s);
2015ea9e707SThomas Veerman return f;
2025ea9e707SThomas Veerman }
2035ea9e707SThomas Veerman
makeinit(fa * f,int anchor)2045ea9e707SThomas Veerman int makeinit(fa *f, int anchor)
2055ea9e707SThomas Veerman {
2065ea9e707SThomas Veerman int i, k;
2075ea9e707SThomas Veerman
2085ea9e707SThomas Veerman resize_state(f, 2);
2095ea9e707SThomas Veerman f->curstat = 2;
2105ea9e707SThomas Veerman f->out[2] = 0;
2115ea9e707SThomas Veerman k = *(f->re[0].lfollow);
2125ea9e707SThomas Veerman xfree(f->posns[2]);
2135ea9e707SThomas Veerman if ((f->posns[2] = calloc(1, (k+1)*sizeof(int))) == NULL)
2145ea9e707SThomas Veerman overflo("out of space in makeinit");
2155ea9e707SThomas Veerman for (i=0; i <= k; i++) {
2165ea9e707SThomas Veerman (f->posns[2])[i] = (f->re[0].lfollow)[i];
2175ea9e707SThomas Veerman }
2185ea9e707SThomas Veerman if ((f->posns[2])[1] == f->accept)
2195ea9e707SThomas Veerman f->out[2] = 1;
2205ea9e707SThomas Veerman for (i=0; i < NCHARS; i++)
2215ea9e707SThomas Veerman f->gototab[2][i] = 0;
2225ea9e707SThomas Veerman f->curstat = cgoto(f, 2, HAT);
2235ea9e707SThomas Veerman if (anchor) {
2245ea9e707SThomas Veerman *f->posns[2] = k-1; /* leave out position 0 */
2255ea9e707SThomas Veerman for (i=0; i < k; i++) {
2265ea9e707SThomas Veerman (f->posns[0])[i] = (f->posns[2])[i];
2275ea9e707SThomas Veerman }
2285ea9e707SThomas Veerman
2295ea9e707SThomas Veerman f->out[0] = f->out[2];
2305ea9e707SThomas Veerman if (f->curstat != 2) {
2315ea9e707SThomas Veerman resize_state(f, f->curstat);
2325ea9e707SThomas Veerman --(*f->posns[f->curstat]);
2335ea9e707SThomas Veerman }
2345ea9e707SThomas Veerman }
2355ea9e707SThomas Veerman return f->curstat;
2365ea9e707SThomas Veerman }
2375ea9e707SThomas Veerman
penter(Node * p)2385ea9e707SThomas Veerman void penter(Node *p) /* set up parent pointers and leaf indices */
2395ea9e707SThomas Veerman {
2405ea9e707SThomas Veerman switch (type(p)) {
2415ea9e707SThomas Veerman ELEAF
2425ea9e707SThomas Veerman LEAF
2435ea9e707SThomas Veerman info(p) = poscnt;
2445ea9e707SThomas Veerman poscnt++;
2455ea9e707SThomas Veerman break;
2465ea9e707SThomas Veerman UNARY
2475ea9e707SThomas Veerman penter(left(p));
2485ea9e707SThomas Veerman parent(left(p)) = p;
2495ea9e707SThomas Veerman break;
2505ea9e707SThomas Veerman case CAT:
2515ea9e707SThomas Veerman case OR:
2525ea9e707SThomas Veerman penter(left(p));
2535ea9e707SThomas Veerman penter(right(p));
2545ea9e707SThomas Veerman parent(left(p)) = p;
2555ea9e707SThomas Veerman parent(right(p)) = p;
2565ea9e707SThomas Veerman break;
2575ea9e707SThomas Veerman default: /* can't happen */
2585ea9e707SThomas Veerman FATAL("can't happen: unknown type %d in penter", type(p));
2595ea9e707SThomas Veerman break;
2605ea9e707SThomas Veerman }
2615ea9e707SThomas Veerman }
2625ea9e707SThomas Veerman
freetr(Node * p)2635ea9e707SThomas Veerman void freetr(Node *p) /* free parse tree */
2645ea9e707SThomas Veerman {
2655ea9e707SThomas Veerman switch (type(p)) {
2665ea9e707SThomas Veerman ELEAF
2675ea9e707SThomas Veerman LEAF
2685ea9e707SThomas Veerman xfree(p);
2695ea9e707SThomas Veerman break;
2705ea9e707SThomas Veerman UNARY
2715ea9e707SThomas Veerman freetr(left(p));
2725ea9e707SThomas Veerman xfree(p);
2735ea9e707SThomas Veerman break;
2745ea9e707SThomas Veerman case CAT:
2755ea9e707SThomas Veerman case OR:
2765ea9e707SThomas Veerman freetr(left(p));
2775ea9e707SThomas Veerman freetr(right(p));
2785ea9e707SThomas Veerman xfree(p);
2795ea9e707SThomas Veerman break;
2805ea9e707SThomas Veerman default: /* can't happen */
2815ea9e707SThomas Veerman FATAL("can't happen: unknown type %d in freetr", type(p));
2825ea9e707SThomas Veerman break;
2835ea9e707SThomas Veerman }
2845ea9e707SThomas Veerman }
2855ea9e707SThomas Veerman
2865ea9e707SThomas Veerman /* in the parsing of regular expressions, metacharacters like . have */
2875ea9e707SThomas Veerman /* to be seen literally; \056 is not a metacharacter. */
2885ea9e707SThomas Veerman
hexstr(const uschar ** pp)2895ea9e707SThomas Veerman int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
2905ea9e707SThomas Veerman { /* only pick up one 8-bit byte (2 chars) */
2915ea9e707SThomas Veerman const uschar *p;
2925ea9e707SThomas Veerman int n = 0;
2935ea9e707SThomas Veerman int i;
2945ea9e707SThomas Veerman
2955ea9e707SThomas Veerman for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
2965ea9e707SThomas Veerman if (isdigit(*p))
2975ea9e707SThomas Veerman n = 16 * n + *p - '0';
2985ea9e707SThomas Veerman else if (*p >= 'a' && *p <= 'f')
2995ea9e707SThomas Veerman n = 16 * n + *p - 'a' + 10;
3005ea9e707SThomas Veerman else if (*p >= 'A' && *p <= 'F')
3015ea9e707SThomas Veerman n = 16 * n + *p - 'A' + 10;
3025ea9e707SThomas Veerman }
3035ea9e707SThomas Veerman *pp = p;
3045ea9e707SThomas Veerman return n;
3055ea9e707SThomas Veerman }
3065ea9e707SThomas Veerman
3075ea9e707SThomas Veerman #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
3085ea9e707SThomas Veerman
quoted(const uschar ** pp)3095ea9e707SThomas Veerman int quoted(const uschar **pp) /* pick up next thing after a \\ */
3105ea9e707SThomas Veerman /* and increment *pp */
3115ea9e707SThomas Veerman {
3125ea9e707SThomas Veerman const uschar *p = *pp;
3135ea9e707SThomas Veerman int c;
3145ea9e707SThomas Veerman
3155ea9e707SThomas Veerman if ((c = *p++) == 't')
3165ea9e707SThomas Veerman c = '\t';
3175ea9e707SThomas Veerman else if (c == 'n')
3185ea9e707SThomas Veerman c = '\n';
3195ea9e707SThomas Veerman else if (c == 'f')
3205ea9e707SThomas Veerman c = '\f';
3215ea9e707SThomas Veerman else if (c == 'r')
3225ea9e707SThomas Veerman c = '\r';
3235ea9e707SThomas Veerman else if (c == 'b')
3245ea9e707SThomas Veerman c = '\b';
3255ea9e707SThomas Veerman else if (c == '\\')
3265ea9e707SThomas Veerman c = '\\';
3275ea9e707SThomas Veerman else if (c == 'x') { /* hexadecimal goo follows */
3285ea9e707SThomas Veerman c = hexstr(&p); /* this adds a null if number is invalid */
3295ea9e707SThomas Veerman } else if (isoctdigit(c)) { /* \d \dd \ddd */
3305ea9e707SThomas Veerman int n = c - '0';
3315ea9e707SThomas Veerman if (isoctdigit(*p)) {
3325ea9e707SThomas Veerman n = 8 * n + *p++ - '0';
3335ea9e707SThomas Veerman if (isoctdigit(*p))
3345ea9e707SThomas Veerman n = 8 * n + *p++ - '0';
3355ea9e707SThomas Veerman }
3365ea9e707SThomas Veerman c = n;
3375ea9e707SThomas Veerman } /* else */
3385ea9e707SThomas Veerman /* c = c; */
3395ea9e707SThomas Veerman *pp = p;
3405ea9e707SThomas Veerman return c;
3415ea9e707SThomas Veerman }
3425ea9e707SThomas Veerman
cclenter(const char * argp)3435ea9e707SThomas Veerman char *cclenter(const char *argp) /* add a character class */
3445ea9e707SThomas Veerman {
3455ea9e707SThomas Veerman int i, c, c2;
3465ea9e707SThomas Veerman const uschar *p = (const uschar *) argp;
3475ea9e707SThomas Veerman const uschar *op;
3485ea9e707SThomas Veerman uschar *bp;
3495ea9e707SThomas Veerman static uschar *buf = 0;
3505ea9e707SThomas Veerman static int bufsz = 100;
3515ea9e707SThomas Veerman
3525ea9e707SThomas Veerman op = p;
3535ea9e707SThomas Veerman if (buf == 0 && (buf = malloc(bufsz)) == NULL)
3545ea9e707SThomas Veerman FATAL("out of space for character class [%.10s...] 1", p);
3555ea9e707SThomas Veerman bp = buf;
3565ea9e707SThomas Veerman for (i = 0; (c = *p++) != 0; ) {
3575ea9e707SThomas Veerman if (c == '\\') {
3585ea9e707SThomas Veerman c = quoted(&p);
3595ea9e707SThomas Veerman } else if (c == '-' && i > 0 && bp[-1] != 0) {
3605ea9e707SThomas Veerman if (*p != 0) {
3615ea9e707SThomas Veerman c = bp[-1];
3625ea9e707SThomas Veerman c2 = *p++;
3635ea9e707SThomas Veerman if (c2 == '\\')
3645ea9e707SThomas Veerman c2 = quoted(&p);
3655ea9e707SThomas Veerman if (c > c2) { /* empty; ignore */
3665ea9e707SThomas Veerman bp--;
3675ea9e707SThomas Veerman i--;
3685ea9e707SThomas Veerman continue;
3695ea9e707SThomas Veerman }
3705ea9e707SThomas Veerman while (c < c2) {
3715ea9e707SThomas Veerman if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter1"))
3725ea9e707SThomas Veerman FATAL("out of space for character class [%.10s...] 2", p);
3735ea9e707SThomas Veerman *bp++ = ++c;
3745ea9e707SThomas Veerman i++;
3755ea9e707SThomas Veerman }
3765ea9e707SThomas Veerman continue;
3775ea9e707SThomas Veerman }
3785ea9e707SThomas Veerman }
3795ea9e707SThomas Veerman if (!adjbuf(&buf, &bufsz, bp-buf+2, 100, &bp, "cclenter2"))
3805ea9e707SThomas Veerman FATAL("out of space for character class [%.10s...] 3", p);
3815ea9e707SThomas Veerman *bp++ = c;
3825ea9e707SThomas Veerman i++;
3835ea9e707SThomas Veerman }
3845ea9e707SThomas Veerman *bp = 0;
3855ea9e707SThomas Veerman dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
3865ea9e707SThomas Veerman free(__UNCONST(op));
3875ea9e707SThomas Veerman return (char *) tostring((char *) buf);
3885ea9e707SThomas Veerman }
3895ea9e707SThomas Veerman
overflo(const char * s)3905ea9e707SThomas Veerman void overflo(const char *s)
3915ea9e707SThomas Veerman {
3925ea9e707SThomas Veerman FATAL("regular expression too big: %.30s...", s);
3935ea9e707SThomas Veerman }
3945ea9e707SThomas Veerman
cfoll(fa * f,Node * v)3955ea9e707SThomas Veerman void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
3965ea9e707SThomas Veerman {
3975ea9e707SThomas Veerman int i;
3985ea9e707SThomas Veerman int *p;
3995ea9e707SThomas Veerman
4005ea9e707SThomas Veerman switch (type(v)) {
4015ea9e707SThomas Veerman ELEAF
4025ea9e707SThomas Veerman LEAF
4035ea9e707SThomas Veerman f->re[info(v)].ltype = type(v);
4045ea9e707SThomas Veerman f->re[info(v)].lval.np = right(v);
4055ea9e707SThomas Veerman while (f->accept >= maxsetvec) /* guessing here! */
4065ea9e707SThomas Veerman resizesetvec("out of space in cfoll()");
4075ea9e707SThomas Veerman for (i = 0; i <= f->accept; i++)
4085ea9e707SThomas Veerman setvec[i] = 0;
4095ea9e707SThomas Veerman setcnt = 0;
4105ea9e707SThomas Veerman follow(v); /* computes setvec and setcnt */
4115ea9e707SThomas Veerman if ((p = calloc(1, (setcnt+1)*sizeof(int))) == NULL)
4125ea9e707SThomas Veerman overflo("out of space building follow set");
4135ea9e707SThomas Veerman f->re[info(v)].lfollow = p;
4145ea9e707SThomas Veerman *p = setcnt;
4155ea9e707SThomas Veerman for (i = f->accept; i >= 0; i--)
4165ea9e707SThomas Veerman if (setvec[i] == 1)
4175ea9e707SThomas Veerman *++p = i;
4185ea9e707SThomas Veerman break;
4195ea9e707SThomas Veerman UNARY
4205ea9e707SThomas Veerman cfoll(f,left(v));
4215ea9e707SThomas Veerman break;
4225ea9e707SThomas Veerman case CAT:
4235ea9e707SThomas Veerman case OR:
4245ea9e707SThomas Veerman cfoll(f,left(v));
4255ea9e707SThomas Veerman cfoll(f,right(v));
4265ea9e707SThomas Veerman break;
4275ea9e707SThomas Veerman default: /* can't happen */
4285ea9e707SThomas Veerman FATAL("can't happen: unknown type %d in cfoll", type(v));
4295ea9e707SThomas Veerman }
4305ea9e707SThomas Veerman }
4315ea9e707SThomas Veerman
first(Node * p)4325ea9e707SThomas Veerman int first(Node *p) /* collects initially active leaves of p into setvec */
4335ea9e707SThomas Veerman /* returns 0 if p matches empty string */
4345ea9e707SThomas Veerman {
4355ea9e707SThomas Veerman int b, lp;
4365ea9e707SThomas Veerman
4375ea9e707SThomas Veerman switch (type(p)) {
4385ea9e707SThomas Veerman ELEAF
4395ea9e707SThomas Veerman LEAF
4405ea9e707SThomas Veerman lp = info(p); /* look for high-water mark of subscripts */
4415ea9e707SThomas Veerman while (setcnt >= maxsetvec || lp >= maxsetvec) /* guessing here! */
4425ea9e707SThomas Veerman resizesetvec("out of space in first()");
4435ea9e707SThomas Veerman if (type(p) == EMPTYRE) {
4445ea9e707SThomas Veerman setvec[lp] = 0;
4455ea9e707SThomas Veerman return(0);
4465ea9e707SThomas Veerman }
4475ea9e707SThomas Veerman if (setvec[lp] != 1) {
4485ea9e707SThomas Veerman setvec[lp] = 1;
4495ea9e707SThomas Veerman setcnt++;
4505ea9e707SThomas Veerman }
4515ea9e707SThomas Veerman if (type(p) == CCL && (*(char *) right(p)) == '\0')
4525ea9e707SThomas Veerman return(0); /* empty CCL */
4535ea9e707SThomas Veerman else return(1);
4545ea9e707SThomas Veerman case PLUS:
4555ea9e707SThomas Veerman if (first(left(p)) == 0) return(0);
4565ea9e707SThomas Veerman return(1);
4575ea9e707SThomas Veerman case STAR:
4585ea9e707SThomas Veerman case QUEST:
4595ea9e707SThomas Veerman first(left(p));
4605ea9e707SThomas Veerman return(0);
4615ea9e707SThomas Veerman case CAT:
4625ea9e707SThomas Veerman if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
4635ea9e707SThomas Veerman return(1);
4645ea9e707SThomas Veerman case OR:
4655ea9e707SThomas Veerman b = first(right(p));
4665ea9e707SThomas Veerman if (first(left(p)) == 0 || b == 0) return(0);
4675ea9e707SThomas Veerman return(1);
4685ea9e707SThomas Veerman }
4695ea9e707SThomas Veerman FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
4705ea9e707SThomas Veerman return(-1);
4715ea9e707SThomas Veerman }
4725ea9e707SThomas Veerman
follow(Node * v)4735ea9e707SThomas Veerman void follow(Node *v) /* collects leaves that can follow v into setvec */
4745ea9e707SThomas Veerman {
4755ea9e707SThomas Veerman Node *p;
4765ea9e707SThomas Veerman
4775ea9e707SThomas Veerman if (type(v) == FINAL)
4785ea9e707SThomas Veerman return;
4795ea9e707SThomas Veerman p = parent(v);
4805ea9e707SThomas Veerman switch (type(p)) {
4815ea9e707SThomas Veerman case STAR:
4825ea9e707SThomas Veerman case PLUS:
4835ea9e707SThomas Veerman first(v);
4845ea9e707SThomas Veerman follow(p);
4855ea9e707SThomas Veerman return;
4865ea9e707SThomas Veerman
4875ea9e707SThomas Veerman case OR:
4885ea9e707SThomas Veerman case QUEST:
4895ea9e707SThomas Veerman follow(p);
4905ea9e707SThomas Veerman return;
4915ea9e707SThomas Veerman
4925ea9e707SThomas Veerman case CAT:
4935ea9e707SThomas Veerman if (v == left(p)) { /* v is left child of p */
4945ea9e707SThomas Veerman if (first(right(p)) == 0) {
4955ea9e707SThomas Veerman follow(p);
4965ea9e707SThomas Veerman return;
4975ea9e707SThomas Veerman }
4985ea9e707SThomas Veerman } else /* v is right child */
4995ea9e707SThomas Veerman follow(p);
5005ea9e707SThomas Veerman return;
5015ea9e707SThomas Veerman }
5025ea9e707SThomas Veerman }
5035ea9e707SThomas Veerman
member(int c,const char * sarg)5045ea9e707SThomas Veerman int member(int c, const char *sarg) /* is c in s? */
5055ea9e707SThomas Veerman {
5065ea9e707SThomas Veerman const uschar *s = (const uschar *) sarg;
5075ea9e707SThomas Veerman
5085ea9e707SThomas Veerman while (*s)
5095ea9e707SThomas Veerman if (c == *s++)
5105ea9e707SThomas Veerman return(1);
5115ea9e707SThomas Veerman return(0);
5125ea9e707SThomas Veerman }
5135ea9e707SThomas Veerman
match(fa * f,const char * p0)5145ea9e707SThomas Veerman int match(fa *f, const char *p0) /* shortest match ? */
5155ea9e707SThomas Veerman {
5165ea9e707SThomas Veerman int s, ns;
5175ea9e707SThomas Veerman const uschar *p = (const uschar *) p0;
5185ea9e707SThomas Veerman
5195ea9e707SThomas Veerman s = f->initstat;
5205ea9e707SThomas Veerman assert (s < f->state_count);
5215ea9e707SThomas Veerman
5225ea9e707SThomas Veerman if (f->out[s])
5235ea9e707SThomas Veerman return(1);
5245ea9e707SThomas Veerman do {
5255ea9e707SThomas Veerman /* assert(*p < NCHARS); */
5265ea9e707SThomas Veerman if ((ns = f->gototab[s][*p]) != 0)
5275ea9e707SThomas Veerman s = ns;
5285ea9e707SThomas Veerman else
5295ea9e707SThomas Veerman s = cgoto(f, s, *p);
5305ea9e707SThomas Veerman
5315ea9e707SThomas Veerman assert (s < f->state_count);
5325ea9e707SThomas Veerman
5335ea9e707SThomas Veerman if (f->out[s])
5345ea9e707SThomas Veerman return(1);
5355ea9e707SThomas Veerman } while (*p++ != 0);
5365ea9e707SThomas Veerman return(0);
5375ea9e707SThomas Veerman }
5385ea9e707SThomas Veerman
pmatch(fa * f,const char * p0)5395ea9e707SThomas Veerman int pmatch(fa *f, const char *p0) /* longest match, for sub */
5405ea9e707SThomas Veerman {
5415ea9e707SThomas Veerman int s, ns;
5425ea9e707SThomas Veerman uschar *p = __UNCONST(p0);
5435ea9e707SThomas Veerman uschar *q;
5445ea9e707SThomas Veerman
5455ea9e707SThomas Veerman s = f->initstat;
5465ea9e707SThomas Veerman assert(s < f->state_count);
5475ea9e707SThomas Veerman patbeg = p;
5485ea9e707SThomas Veerman patlen = -1;
5495ea9e707SThomas Veerman do {
5505ea9e707SThomas Veerman q = p;
5515ea9e707SThomas Veerman do {
5525ea9e707SThomas Veerman if (f->out[s]) /* final state */
5535ea9e707SThomas Veerman patlen = q-p;
5545ea9e707SThomas Veerman /* assert(*q < NCHARS); */
5555ea9e707SThomas Veerman if ((ns = f->gototab[s][*q]) != 0)
5565ea9e707SThomas Veerman s = ns;
5575ea9e707SThomas Veerman else
5585ea9e707SThomas Veerman s = cgoto(f, s, *q);
5595ea9e707SThomas Veerman
5605ea9e707SThomas Veerman assert(s < f->state_count);
5615ea9e707SThomas Veerman
5625ea9e707SThomas Veerman if (s == 1) { /* no transition */
5635ea9e707SThomas Veerman if (patlen >= 0) {
5645ea9e707SThomas Veerman patbeg = p;
5655ea9e707SThomas Veerman return(1);
5665ea9e707SThomas Veerman }
5675ea9e707SThomas Veerman else
5685ea9e707SThomas Veerman goto nextin; /* no match */
5695ea9e707SThomas Veerman }
5705ea9e707SThomas Veerman } while (*q++ != 0);
5715ea9e707SThomas Veerman if (f->out[s])
5725ea9e707SThomas Veerman patlen = q-p-1; /* don't count $ */
5735ea9e707SThomas Veerman if (patlen >= 0) {
5745ea9e707SThomas Veerman patbeg = p;
5755ea9e707SThomas Veerman return(1);
5765ea9e707SThomas Veerman }
5775ea9e707SThomas Veerman nextin:
5785ea9e707SThomas Veerman s = 2;
5795ea9e707SThomas Veerman } while (*p++ != 0);
5805ea9e707SThomas Veerman return (0);
5815ea9e707SThomas Veerman }
5825ea9e707SThomas Veerman
nematch(fa * f,const char * p0)5835ea9e707SThomas Veerman int nematch(fa *f, const char *p0) /* non-empty match, for sub */
5845ea9e707SThomas Veerman {
5855ea9e707SThomas Veerman int s, ns;
5865ea9e707SThomas Veerman uschar *p = __UNCONST(p0);
5875ea9e707SThomas Veerman uschar *q;
5885ea9e707SThomas Veerman
5895ea9e707SThomas Veerman s = f->initstat;
5905ea9e707SThomas Veerman assert(s < f->state_count);
5915ea9e707SThomas Veerman
5925ea9e707SThomas Veerman patlen = -1;
5935ea9e707SThomas Veerman while (*p) {
5945ea9e707SThomas Veerman q = p;
5955ea9e707SThomas Veerman do {
5965ea9e707SThomas Veerman if (f->out[s]) /* final state */
5975ea9e707SThomas Veerman patlen = q-p;
5985ea9e707SThomas Veerman /* assert(*q < NCHARS); */
5995ea9e707SThomas Veerman if ((ns = f->gototab[s][*q]) != 0)
6005ea9e707SThomas Veerman s = ns;
6015ea9e707SThomas Veerman else
6025ea9e707SThomas Veerman s = cgoto(f, s, *q);
6035ea9e707SThomas Veerman
6045ea9e707SThomas Veerman assert(s < f->state_count);
6055ea9e707SThomas Veerman
6065ea9e707SThomas Veerman if (s == 1) { /* no transition */
6075ea9e707SThomas Veerman if (patlen > 0) {
6085ea9e707SThomas Veerman patbeg = p;
6095ea9e707SThomas Veerman return(1);
6105ea9e707SThomas Veerman } else
6115ea9e707SThomas Veerman goto nnextin; /* no nonempty match */
6125ea9e707SThomas Veerman }
6135ea9e707SThomas Veerman } while (*q++ != 0);
6145ea9e707SThomas Veerman if (f->out[s])
6155ea9e707SThomas Veerman patlen = q-p-1; /* don't count $ */
6165ea9e707SThomas Veerman if (patlen > 0 ) {
6175ea9e707SThomas Veerman patbeg = p;
6185ea9e707SThomas Veerman return(1);
6195ea9e707SThomas Veerman }
6205ea9e707SThomas Veerman nnextin:
6215ea9e707SThomas Veerman s = 2;
6225ea9e707SThomas Veerman p++;
6235ea9e707SThomas Veerman }
6245ea9e707SThomas Veerman return (0);
6255ea9e707SThomas Veerman }
6265ea9e707SThomas Veerman
6275ea9e707SThomas Veerman
6285ea9e707SThomas Veerman /*
6295ea9e707SThomas Veerman * NAME
6305ea9e707SThomas Veerman * fnematch
6315ea9e707SThomas Veerman *
6325ea9e707SThomas Veerman * DESCRIPTION
6335ea9e707SThomas Veerman * A stream-fed version of nematch which transfers characters to a
6345ea9e707SThomas Veerman * null-terminated buffer. All characters up to and including the last
6355ea9e707SThomas Veerman * character of the matching text or EOF are placed in the buffer. If
6365ea9e707SThomas Veerman * a match is found, patbeg and patlen are set appropriately.
6375ea9e707SThomas Veerman *
6385ea9e707SThomas Veerman * RETURN VALUES
6395ea9e707SThomas Veerman * 0 No match found.
6405ea9e707SThomas Veerman * 1 Match found.
6415ea9e707SThomas Veerman */
6425ea9e707SThomas Veerman
fnematch(fa * pfa,FILE * f,uschar ** pbuf,int * pbufsize,int quantum)6435ea9e707SThomas Veerman int fnematch(fa *pfa, FILE *f, uschar **pbuf, int *pbufsize, int quantum)
6445ea9e707SThomas Veerman {
6455ea9e707SThomas Veerman uschar *buf = *pbuf;
6465ea9e707SThomas Veerman int bufsize = *pbufsize;
6475ea9e707SThomas Veerman int c, i, j, k, ns, s;
6485ea9e707SThomas Veerman
6495ea9e707SThomas Veerman s = pfa->initstat;
6505ea9e707SThomas Veerman assert(s < pfa->state_count);
6515ea9e707SThomas Veerman patlen = 0;
6525ea9e707SThomas Veerman
6535ea9e707SThomas Veerman /*
6545ea9e707SThomas Veerman * All indices relative to buf.
6555ea9e707SThomas Veerman * i <= j <= k <= bufsize
6565ea9e707SThomas Veerman *
6575ea9e707SThomas Veerman * i: origin of active substring
6585ea9e707SThomas Veerman * j: current character
6595ea9e707SThomas Veerman * k: destination of next getc()
6605ea9e707SThomas Veerman */
6615ea9e707SThomas Veerman i = -1, k = 0;
6625ea9e707SThomas Veerman do {
6635ea9e707SThomas Veerman j = i++;
6645ea9e707SThomas Veerman do {
6655ea9e707SThomas Veerman if (++j == k) {
6665ea9e707SThomas Veerman if (k == bufsize)
6675ea9e707SThomas Veerman if (!adjbuf(&buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
6685ea9e707SThomas Veerman FATAL("stream '%.30s...' too long", buf);
6695ea9e707SThomas Veerman buf[k++] = (c = getc(f)) != EOF ? c : 0;
6705ea9e707SThomas Veerman }
6715ea9e707SThomas Veerman c = buf[j];
6725ea9e707SThomas Veerman /* assert(c < NCHARS); */
6735ea9e707SThomas Veerman
6745ea9e707SThomas Veerman if ((ns = pfa->gototab[s][c]) != 0)
6755ea9e707SThomas Veerman s = ns;
6765ea9e707SThomas Veerman else
6775ea9e707SThomas Veerman s = cgoto(pfa, s, c);
6785ea9e707SThomas Veerman assert(s < pfa->state_count);
6795ea9e707SThomas Veerman
6805ea9e707SThomas Veerman if (pfa->out[s]) { /* final state */
6815ea9e707SThomas Veerman patlen = j - i + 1;
6825ea9e707SThomas Veerman if (c == 0) /* don't count $ */
6835ea9e707SThomas Veerman patlen--;
6845ea9e707SThomas Veerman }
6855ea9e707SThomas Veerman } while (buf[j] && s != 1);
6865ea9e707SThomas Veerman s = 2;
6875ea9e707SThomas Veerman } while (buf[i] && !patlen);
6885ea9e707SThomas Veerman
6895ea9e707SThomas Veerman /* adjbuf() may have relocated a resized buffer. Inform the world. */
6905ea9e707SThomas Veerman *pbuf = buf;
6915ea9e707SThomas Veerman *pbufsize = bufsize;
6925ea9e707SThomas Veerman
6935ea9e707SThomas Veerman if (patlen) {
6945ea9e707SThomas Veerman patbeg = buf + i;
6955ea9e707SThomas Veerman /*
6965ea9e707SThomas Veerman * Under no circumstances is the last character fed to
6975ea9e707SThomas Veerman * the automaton part of the match. It is EOF's nullbyte,
6985ea9e707SThomas Veerman * or it sent the automaton into a state with no further
6995ea9e707SThomas Veerman * transitions available (s==1), or both. Room for a
7005ea9e707SThomas Veerman * terminating nullbyte is guaranteed.
7015ea9e707SThomas Veerman *
7025ea9e707SThomas Veerman * ungetc any chars after the end of matching text
7035ea9e707SThomas Veerman * (except for EOF's nullbyte, if present) and null
7045ea9e707SThomas Veerman * terminate the buffer.
7055ea9e707SThomas Veerman */
7065ea9e707SThomas Veerman do
7075ea9e707SThomas Veerman if (buf[--k] && ungetc(buf[k], f) == EOF)
7085ea9e707SThomas Veerman FATAL("unable to ungetc '%c'", buf[k]);
7095ea9e707SThomas Veerman while (k > i + patlen);
7105ea9e707SThomas Veerman buf[k] = 0;
7115ea9e707SThomas Veerman return 1;
7125ea9e707SThomas Veerman }
7135ea9e707SThomas Veerman else
7145ea9e707SThomas Veerman return 0;
7155ea9e707SThomas Veerman }
7165ea9e707SThomas Veerman
reparse(const char * p)7175ea9e707SThomas Veerman Node *reparse(const char *p) /* parses regular expression pointed to by p */
7185ea9e707SThomas Veerman { /* uses relex() to scan regular expression */
7195ea9e707SThomas Veerman Node *np;
7205ea9e707SThomas Veerman
7215ea9e707SThomas Veerman dprintf( ("reparse <%s>\n", p) );
7225ea9e707SThomas Veerman lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */
7235ea9e707SThomas Veerman rtok = relex();
7245ea9e707SThomas Veerman /* GNU compatibility: an empty regexp matches anything */
7255ea9e707SThomas Veerman if (rtok == '\0') {
7265ea9e707SThomas Veerman /* FATAL("empty regular expression"); previous */
7275ea9e707SThomas Veerman return(op2(EMPTYRE, NIL, NIL));
7285ea9e707SThomas Veerman }
7295ea9e707SThomas Veerman np = regexp();
7305ea9e707SThomas Veerman if (rtok != '\0')
7315ea9e707SThomas Veerman FATAL("syntax error in regular expression %s at %s", lastre, prestr);
7325ea9e707SThomas Veerman return(np);
7335ea9e707SThomas Veerman }
7345ea9e707SThomas Veerman
regexp(void)7355ea9e707SThomas Veerman Node *regexp(void) /* top-level parse of reg expr */
7365ea9e707SThomas Veerman {
7375ea9e707SThomas Veerman return (alt(concat(primary())));
7385ea9e707SThomas Veerman }
7395ea9e707SThomas Veerman
primary(void)7405ea9e707SThomas Veerman Node *primary(void)
7415ea9e707SThomas Veerman {
7425ea9e707SThomas Veerman Node *np;
7435ea9e707SThomas Veerman
7445ea9e707SThomas Veerman switch (rtok) {
7455ea9e707SThomas Veerman case CHAR:
7465ea9e707SThomas Veerman np = op2(CHAR, NIL, itonp(rlxval));
7475ea9e707SThomas Veerman rtok = relex();
7485ea9e707SThomas Veerman return (unary(np));
7495ea9e707SThomas Veerman case ALL:
7505ea9e707SThomas Veerman rtok = relex();
7515ea9e707SThomas Veerman return (unary(op2(ALL, NIL, NIL)));
7525ea9e707SThomas Veerman case EMPTYRE:
7535ea9e707SThomas Veerman rtok = relex();
7545ea9e707SThomas Veerman return (unary(op2(ALL, NIL, NIL)));
7555ea9e707SThomas Veerman case DOT:
7565ea9e707SThomas Veerman rtok = relex();
7575ea9e707SThomas Veerman return (unary(op2(DOT, NIL, NIL)));
7585ea9e707SThomas Veerman case CCL:
7595ea9e707SThomas Veerman np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr));
7605ea9e707SThomas Veerman rtok = relex();
7615ea9e707SThomas Veerman return (unary(np));
7625ea9e707SThomas Veerman case NCCL:
7635ea9e707SThomas Veerman np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr));
7645ea9e707SThomas Veerman rtok = relex();
7655ea9e707SThomas Veerman return (unary(np));
7665ea9e707SThomas Veerman case '^':
7675ea9e707SThomas Veerman rtok = relex();
7685ea9e707SThomas Veerman return (unary(op2(CHAR, NIL, itonp(HAT))));
7695ea9e707SThomas Veerman case '$':
7705ea9e707SThomas Veerman rtok = relex();
7715ea9e707SThomas Veerman return (unary(op2(CHAR, NIL, NIL)));
7725ea9e707SThomas Veerman case '(':
7735ea9e707SThomas Veerman rtok = relex();
7745ea9e707SThomas Veerman if (rtok == ')') { /* special pleading for () */
7755ea9e707SThomas Veerman rtok = relex();
7765ea9e707SThomas Veerman return unary(op2(CCL, NIL, (Node *) tostring("")));
7775ea9e707SThomas Veerman }
7785ea9e707SThomas Veerman np = regexp();
7795ea9e707SThomas Veerman if (rtok == ')') {
7805ea9e707SThomas Veerman rtok = relex();
7815ea9e707SThomas Veerman return (unary(np));
7825ea9e707SThomas Veerman }
7835ea9e707SThomas Veerman else
7845ea9e707SThomas Veerman FATAL("syntax error in regular expression %s at %s", lastre, prestr);
7855ea9e707SThomas Veerman default:
7865ea9e707SThomas Veerman FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
7875ea9e707SThomas Veerman }
7885ea9e707SThomas Veerman return 0; /*NOTREACHED*/
7895ea9e707SThomas Veerman }
7905ea9e707SThomas Veerman
concat(Node * np)7915ea9e707SThomas Veerman Node *concat(Node *np)
7925ea9e707SThomas Veerman {
7935ea9e707SThomas Veerman switch (rtok) {
7945ea9e707SThomas Veerman case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
7955ea9e707SThomas Veerman return (concat(op2(CAT, np, primary())));
7965ea9e707SThomas Veerman }
7975ea9e707SThomas Veerman return (np);
7985ea9e707SThomas Veerman }
7995ea9e707SThomas Veerman
alt(Node * np)8005ea9e707SThomas Veerman Node *alt(Node *np)
8015ea9e707SThomas Veerman {
8025ea9e707SThomas Veerman if (rtok == OR) {
8035ea9e707SThomas Veerman rtok = relex();
8045ea9e707SThomas Veerman return (alt(op2(OR, np, concat(primary()))));
8055ea9e707SThomas Veerman }
8065ea9e707SThomas Veerman return (np);
8075ea9e707SThomas Veerman }
8085ea9e707SThomas Veerman
unary(Node * np)8095ea9e707SThomas Veerman Node *unary(Node *np)
8105ea9e707SThomas Veerman {
8115ea9e707SThomas Veerman switch (rtok) {
8125ea9e707SThomas Veerman case STAR:
8135ea9e707SThomas Veerman rtok = relex();
8145ea9e707SThomas Veerman return (unary(op2(STAR, np, NIL)));
8155ea9e707SThomas Veerman case PLUS:
8165ea9e707SThomas Veerman rtok = relex();
8175ea9e707SThomas Veerman return (unary(op2(PLUS, np, NIL)));
8185ea9e707SThomas Veerman case QUEST:
8195ea9e707SThomas Veerman rtok = relex();
8205ea9e707SThomas Veerman return (unary(op2(QUEST, np, NIL)));
8215ea9e707SThomas Veerman default:
8225ea9e707SThomas Veerman return (np);
8235ea9e707SThomas Veerman }
8245ea9e707SThomas Veerman }
8255ea9e707SThomas Veerman
8265ea9e707SThomas Veerman /*
8275ea9e707SThomas Veerman * Character class definitions conformant to the POSIX locale as
8285ea9e707SThomas Veerman * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
8295ea9e707SThomas Veerman * and operating character sets are both ASCII (ISO646) or supersets
8305ea9e707SThomas Veerman * thereof.
8315ea9e707SThomas Veerman *
8325ea9e707SThomas Veerman * Note that to avoid overflowing the temporary buffer used in
8335ea9e707SThomas Veerman * relex(), the expanded character class (prior to range expansion)
8345ea9e707SThomas Veerman * must be less than twice the size of their full name.
8355ea9e707SThomas Veerman */
8365ea9e707SThomas Veerman
8375ea9e707SThomas Veerman /* Because isblank doesn't show up in any of the header files on any
8385ea9e707SThomas Veerman * system i use, it's defined here. if some other locale has a richer
8395ea9e707SThomas Veerman * definition of "blank", define HAS_ISBLANK and provide your own
8405ea9e707SThomas Veerman * version.
8415ea9e707SThomas Veerman * the parentheses here are an attempt to find a path through the maze
8425ea9e707SThomas Veerman * of macro definition and/or function and/or version provided. thanks
8435ea9e707SThomas Veerman * to nelson beebe for the suggestion; let's see if it works everywhere.
8445ea9e707SThomas Veerman */
8455ea9e707SThomas Veerman
8465ea9e707SThomas Veerman /* #define HAS_ISBLANK */
8475ea9e707SThomas Veerman
8485ea9e707SThomas Veerman static const struct charclass {
8495ea9e707SThomas Veerman const char *cc_name;
8505ea9e707SThomas Veerman int cc_namelen;
8515ea9e707SThomas Veerman int (*cc_func)(int);
8525ea9e707SThomas Veerman } charclasses[] = {
8535ea9e707SThomas Veerman { "alnum", 5, isalnum },
8545ea9e707SThomas Veerman { "alpha", 5, isalpha },
855*84d9c625SLionel Sambuc #ifndef HAS_ISBLANK
856*84d9c625SLionel Sambuc { "blank", 5, isspace }, /* was isblank */
857*84d9c625SLionel Sambuc #else
8585ea9e707SThomas Veerman { "blank", 5, isblank },
859*84d9c625SLionel Sambuc #endif
8605ea9e707SThomas Veerman { "cntrl", 5, iscntrl },
8615ea9e707SThomas Veerman { "digit", 5, isdigit },
8625ea9e707SThomas Veerman { "graph", 5, isgraph },
8635ea9e707SThomas Veerman { "lower", 5, islower },
8645ea9e707SThomas Veerman { "print", 5, isprint },
8655ea9e707SThomas Veerman { "punct", 5, ispunct },
8665ea9e707SThomas Veerman { "space", 5, isspace },
8675ea9e707SThomas Veerman { "upper", 5, isupper },
8685ea9e707SThomas Veerman { "xdigit", 6, isxdigit },
8695ea9e707SThomas Veerman { NULL, 0, NULL },
8705ea9e707SThomas Veerman };
8715ea9e707SThomas Veerman
8725ea9e707SThomas Veerman
relex(void)8735ea9e707SThomas Veerman int relex(void) /* lexical analyzer for reparse */
8745ea9e707SThomas Veerman {
8755ea9e707SThomas Veerman int c, n;
8765ea9e707SThomas Veerman int cflag;
8775ea9e707SThomas Veerman static uschar *buf = 0;
8785ea9e707SThomas Veerman static int bufsz = 100;
8795ea9e707SThomas Veerman uschar *bp;
8805ea9e707SThomas Veerman const struct charclass *cc;
8815ea9e707SThomas Veerman int i;
8825ea9e707SThomas Veerman
8835ea9e707SThomas Veerman switch (c = *prestr++) {
8845ea9e707SThomas Veerman case '|': return OR;
8855ea9e707SThomas Veerman case '*': return STAR;
8865ea9e707SThomas Veerman case '+': return PLUS;
8875ea9e707SThomas Veerman case '?': return QUEST;
8885ea9e707SThomas Veerman case '.': return DOT;
8895ea9e707SThomas Veerman case '\0': prestr--; return '\0';
8905ea9e707SThomas Veerman case '^':
8915ea9e707SThomas Veerman case '$':
8925ea9e707SThomas Veerman case '(':
8935ea9e707SThomas Veerman case ')':
8945ea9e707SThomas Veerman return c;
8955ea9e707SThomas Veerman case '\\':
8965ea9e707SThomas Veerman rlxval = quoted(&prestr);
8975ea9e707SThomas Veerman return CHAR;
8985ea9e707SThomas Veerman default:
8995ea9e707SThomas Veerman rlxval = c;
9005ea9e707SThomas Veerman return CHAR;
9015ea9e707SThomas Veerman case '[':
9025ea9e707SThomas Veerman if (buf == 0 && (buf = malloc(bufsz)) == NULL)
9035ea9e707SThomas Veerman FATAL("out of space in reg expr %.10s..", lastre);
9045ea9e707SThomas Veerman bp = buf;
9055ea9e707SThomas Veerman if (*prestr == '^') {
9065ea9e707SThomas Veerman cflag = 1;
9075ea9e707SThomas Veerman prestr++;
9085ea9e707SThomas Veerman }
9095ea9e707SThomas Veerman else
9105ea9e707SThomas Veerman cflag = 0;
9115ea9e707SThomas Veerman n = 2 * strlen((const char *) prestr)+1;
9125ea9e707SThomas Veerman if (!adjbuf(&buf, &bufsz, n, n, &bp, "relex1"))
9135ea9e707SThomas Veerman FATAL("out of space for reg expr %.10s...", lastre);
9145ea9e707SThomas Veerman for (; ; ) {
9155ea9e707SThomas Veerman if ((c = *prestr++) == '\\') {
9165ea9e707SThomas Veerman *bp++ = '\\';
9175ea9e707SThomas Veerman if ((c = *prestr++) == '\0')
9185ea9e707SThomas Veerman FATAL("nonterminated character class %.20s...", lastre);
9195ea9e707SThomas Veerman *bp++ = c;
9205ea9e707SThomas Veerman /* } else if (c == '\n') { */
9215ea9e707SThomas Veerman /* FATAL("newline in character class %.20s...", lastre); */
9225ea9e707SThomas Veerman } else if (c == '[' && *prestr == ':') {
9235ea9e707SThomas Veerman /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
9245ea9e707SThomas Veerman for (cc = charclasses; cc->cc_name; cc++)
9255ea9e707SThomas Veerman if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
9265ea9e707SThomas Veerman break;
9275ea9e707SThomas Veerman if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
9285ea9e707SThomas Veerman prestr[2 + cc->cc_namelen] == ']') {
9295ea9e707SThomas Veerman prestr += cc->cc_namelen + 3;
9305ea9e707SThomas Veerman for (i = 1; i < NCHARS; i++) {
9315ea9e707SThomas Veerman if (!adjbuf(&buf, &bufsz, bp-buf+1, 100, &bp, "relex2"))
9325ea9e707SThomas Veerman FATAL("out of space for reg expr %.10s...", lastre);
9335ea9e707SThomas Veerman if (cc->cc_func(i)) {
9345ea9e707SThomas Veerman *bp++ = i;
9355ea9e707SThomas Veerman n++;
9365ea9e707SThomas Veerman }
9375ea9e707SThomas Veerman }
9385ea9e707SThomas Veerman } else
9395ea9e707SThomas Veerman *bp++ = c;
9405ea9e707SThomas Veerman } else if (c == '\0') {
9415ea9e707SThomas Veerman FATAL("nonterminated character class %.20s", lastre);
9425ea9e707SThomas Veerman } else if (bp == buf) { /* 1st char is special */
9435ea9e707SThomas Veerman *bp++ = c;
9445ea9e707SThomas Veerman } else if (c == ']') {
9455ea9e707SThomas Veerman *bp++ = 0;
9465ea9e707SThomas Veerman rlxstr = (uschar *) tostring((char *) buf);
9475ea9e707SThomas Veerman if (cflag == 0)
9485ea9e707SThomas Veerman return CCL;
9495ea9e707SThomas Veerman else
9505ea9e707SThomas Veerman return NCCL;
9515ea9e707SThomas Veerman } else
9525ea9e707SThomas Veerman *bp++ = c;
9535ea9e707SThomas Veerman }
9545ea9e707SThomas Veerman }
9555ea9e707SThomas Veerman }
9565ea9e707SThomas Veerman
cgoto(fa * f,int s,int c)9575ea9e707SThomas Veerman int cgoto(fa *f, int s, int c)
9585ea9e707SThomas Veerman {
9595ea9e707SThomas Veerman int i, j, k;
9605ea9e707SThomas Veerman int *p, *q;
9615ea9e707SThomas Veerman
9625ea9e707SThomas Veerman assert(c == HAT || c < NCHARS);
9635ea9e707SThomas Veerman while (f->accept >= maxsetvec) /* guessing here! */
9645ea9e707SThomas Veerman resizesetvec("out of space in cgoto()");
9655ea9e707SThomas Veerman for (i = 0; i <= f->accept; i++)
9665ea9e707SThomas Veerman setvec[i] = 0;
9675ea9e707SThomas Veerman setcnt = 0;
9685ea9e707SThomas Veerman resize_state(f, s);
9695ea9e707SThomas Veerman /* compute positions of gototab[s,c] into setvec */
9705ea9e707SThomas Veerman p = f->posns[s];
9715ea9e707SThomas Veerman for (i = 1; i <= *p; i++) {
9725ea9e707SThomas Veerman if ((k = f->re[p[i]].ltype) != FINAL) {
9735ea9e707SThomas Veerman if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
9745ea9e707SThomas Veerman || (k == DOT && c != 0 && c != HAT)
9755ea9e707SThomas Veerman || (k == ALL && c != 0)
9765ea9e707SThomas Veerman || (k == EMPTYRE && c != 0)
9775ea9e707SThomas Veerman || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
9785ea9e707SThomas Veerman || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
9795ea9e707SThomas Veerman q = f->re[p[i]].lfollow;
9805ea9e707SThomas Veerman for (j = 1; j <= *q; j++) {
9815ea9e707SThomas Veerman if (q[j] >= maxsetvec)
9825ea9e707SThomas Veerman resizesetvec("cgoto overflow");
9835ea9e707SThomas Veerman if (setvec[q[j]] == 0) {
9845ea9e707SThomas Veerman setcnt++;
9855ea9e707SThomas Veerman setvec[q[j]] = 1;
9865ea9e707SThomas Veerman }
9875ea9e707SThomas Veerman }
9885ea9e707SThomas Veerman }
9895ea9e707SThomas Veerman }
9905ea9e707SThomas Veerman }
9915ea9e707SThomas Veerman /* determine if setvec is a previous state */
9925ea9e707SThomas Veerman tmpset[0] = setcnt;
9935ea9e707SThomas Veerman j = 1;
9945ea9e707SThomas Veerman for (i = f->accept; i >= 0; i--)
9955ea9e707SThomas Veerman if (setvec[i]) {
9965ea9e707SThomas Veerman tmpset[j++] = i;
9975ea9e707SThomas Veerman }
9985ea9e707SThomas Veerman
9995ea9e707SThomas Veerman resize_state(f, f->curstat > s ? f->curstat : s);
10005ea9e707SThomas Veerman /* tmpset == previous state? */
10015ea9e707SThomas Veerman for (i = 1; i <= f->curstat; i++) {
10025ea9e707SThomas Veerman p = f->posns[i];
10035ea9e707SThomas Veerman if ((k = tmpset[0]) != p[0])
10045ea9e707SThomas Veerman goto different;
10055ea9e707SThomas Veerman for (j = 1; j <= k; j++)
10065ea9e707SThomas Veerman if (tmpset[j] != p[j])
10075ea9e707SThomas Veerman goto different;
10085ea9e707SThomas Veerman /* setvec is state i */
10095ea9e707SThomas Veerman if (c != HAT)
10105ea9e707SThomas Veerman f->gototab[s][c] = i;
10115ea9e707SThomas Veerman return i;
10125ea9e707SThomas Veerman different:;
10135ea9e707SThomas Veerman }
10145ea9e707SThomas Veerman
10155ea9e707SThomas Veerman /* add tmpset to current set of states */
10165ea9e707SThomas Veerman ++(f->curstat);
10175ea9e707SThomas Veerman resize_state(f, f->curstat);
10185ea9e707SThomas Veerman for (i = 0; i < NCHARS; i++)
10195ea9e707SThomas Veerman f->gototab[f->curstat][i] = 0;
10205ea9e707SThomas Veerman xfree(f->posns[f->curstat]);
10215ea9e707SThomas Veerman if ((p = calloc(1, (setcnt+1)*sizeof(int))) == NULL)
10225ea9e707SThomas Veerman overflo("out of space in cgoto");
10235ea9e707SThomas Veerman
10245ea9e707SThomas Veerman f->posns[f->curstat] = p;
10255ea9e707SThomas Veerman if (c != HAT)
10265ea9e707SThomas Veerman f->gototab[s][c] = f->curstat;
10275ea9e707SThomas Veerman for (i = 0; i <= setcnt; i++)
10285ea9e707SThomas Veerman p[i] = tmpset[i];
10295ea9e707SThomas Veerman if (setvec[f->accept])
10305ea9e707SThomas Veerman f->out[f->curstat] = 1;
10315ea9e707SThomas Veerman else
10325ea9e707SThomas Veerman f->out[f->curstat] = 0;
10335ea9e707SThomas Veerman return f->curstat;
10345ea9e707SThomas Veerman }
10355ea9e707SThomas Veerman
10365ea9e707SThomas Veerman
freefa(fa * f)10375ea9e707SThomas Veerman void freefa(fa *f) /* free a finite automaton */
10385ea9e707SThomas Veerman {
10395ea9e707SThomas Veerman int i;
10405ea9e707SThomas Veerman
10415ea9e707SThomas Veerman if (f == NULL)
10425ea9e707SThomas Veerman return;
10435ea9e707SThomas Veerman for (i = 0; i < f->state_count; i++) {
10445ea9e707SThomas Veerman xfree(f->gototab[i])
10455ea9e707SThomas Veerman xfree(f->posns[i]);
10465ea9e707SThomas Veerman }
10475ea9e707SThomas Veerman for (i = 0; i <= f->accept; i++) {
10485ea9e707SThomas Veerman xfree(f->re[i].lfollow);
10495ea9e707SThomas Veerman if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
10505ea9e707SThomas Veerman xfree((f->re[i].lval.np));
10515ea9e707SThomas Veerman }
10525ea9e707SThomas Veerman xfree(f->restr);
10535ea9e707SThomas Veerman xfree(f->out);
10545ea9e707SThomas Veerman xfree(f->posns);
10555ea9e707SThomas Veerman xfree(f->gototab);
10565ea9e707SThomas Veerman xfree(f);
10575ea9e707SThomas Veerman }
1058