10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
70Sstevel@tonic-gate * with the License.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate * See the License for the specific language governing permissions
120Sstevel@tonic-gate * and limitations under the License.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * CDDL HEADER END
210Sstevel@tonic-gate */
22*289Snakanon
23*289Snakanon /*
24*289Snakanon * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25*289Snakanon * Use is subject to license terms.
26*289Snakanon */
27*289Snakanon
280Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
290Sstevel@tonic-gate /* All Rights Reserved */
300Sstevel@tonic-gate
31*289Snakanon #pragma ident "%Z%%M% %I% %E% SMI"
320Sstevel@tonic-gate
330Sstevel@tonic-gate #define DEBUG
340Sstevel@tonic-gate
350Sstevel@tonic-gate #include "awk.h"
360Sstevel@tonic-gate #include "y.tab.h"
370Sstevel@tonic-gate
380Sstevel@tonic-gate #define HAT (NCHARS-1) /* matches ^ in regular expr */
390Sstevel@tonic-gate /* NCHARS is 2**n */
40*289Snakanon #define MAXLIN (3 * LINE_MAX)
410Sstevel@tonic-gate
42*289Snakanon #define type(v) (v)->nobj
43*289Snakanon #define left(v) (v)->narg[0]
44*289Snakanon #define right(v) (v)->narg[1]
45*289Snakanon #define parent(v) (v)->nnext
460Sstevel@tonic-gate
47*289Snakanon #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
48*289Snakanon #define UNARY case STAR: case PLUS: case QUEST:
490Sstevel@tonic-gate
50*289Snakanon /*
51*289Snakanon * encoding in tree Nodes:
52*289Snakanon * leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
53*289Snakanon * left is index, right contains value or pointer to value
54*289Snakanon * unary (STAR, PLUS, QUEST): left is child, right is null
55*289Snakanon * binary (CAT, OR): left and right are children
56*289Snakanon * parent contains pointer to parent
57*289Snakanon */
580Sstevel@tonic-gate
59*289Snakanon int setvec[MAXLIN];
60*289Snakanon int tmpset[MAXLIN];
61*289Snakanon Node *point[MAXLIN];
620Sstevel@tonic-gate
630Sstevel@tonic-gate int rtok; /* next token in current re */
640Sstevel@tonic-gate int rlxval;
650Sstevel@tonic-gate uchar *rlxstr;
660Sstevel@tonic-gate uchar *prestr; /* current position in current re */
670Sstevel@tonic-gate uchar *lastre; /* origin of last re */
680Sstevel@tonic-gate
690Sstevel@tonic-gate static int setcnt;
700Sstevel@tonic-gate static int poscnt;
710Sstevel@tonic-gate
720Sstevel@tonic-gate uchar *patbeg;
730Sstevel@tonic-gate int patlen;
740Sstevel@tonic-gate
750Sstevel@tonic-gate #define NFA 20 /* cache this many dynamic fa's */
760Sstevel@tonic-gate fa *fatab[NFA];
770Sstevel@tonic-gate int nfatab = 0; /* entries in fatab */
780Sstevel@tonic-gate
79*289Snakanon static fa *mkdfa(uchar *, int);
80*289Snakanon static int makeinit(fa *, int);
81*289Snakanon static void penter(Node *);
82*289Snakanon static void freetr(Node *);
83*289Snakanon static void overflo(char *);
84*289Snakanon static void cfoll(fa *, Node *);
85*289Snakanon static void follow(Node *);
86*289Snakanon static Node *reparse(uchar *);
87*289Snakanon static int relex(void);
88*289Snakanon static void freefa(fa *);
89*289Snakanon static int cgoto(fa *, int, int);
90*289Snakanon
91*289Snakanon fa *
makedfa(uchar * s,int anchor)92*289Snakanon makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */
930Sstevel@tonic-gate {
940Sstevel@tonic-gate int i, use, nuse;
950Sstevel@tonic-gate fa *pfa;
960Sstevel@tonic-gate
970Sstevel@tonic-gate if (compile_time) /* a constant for sure */
98*289Snakanon return (mkdfa(s, anchor));
99*289Snakanon for (i = 0; i < nfatab; i++) { /* is it there already? */
100*289Snakanon if (fatab[i]->anchor == anchor &&
101*289Snakanon strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
1020Sstevel@tonic-gate fatab[i]->use++;
103*289Snakanon return (fatab[i]);
104*289Snakanon }
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate pfa = mkdfa(s, anchor);
1070Sstevel@tonic-gate if (nfatab < NFA) { /* room for another */
1080Sstevel@tonic-gate fatab[nfatab] = pfa;
1090Sstevel@tonic-gate fatab[nfatab]->use = 1;
1100Sstevel@tonic-gate nfatab++;
111*289Snakanon return (pfa);
1120Sstevel@tonic-gate }
1130Sstevel@tonic-gate use = fatab[0]->use; /* replace least-recently used */
1140Sstevel@tonic-gate nuse = 0;
1150Sstevel@tonic-gate for (i = 1; i < nfatab; i++)
1160Sstevel@tonic-gate if (fatab[i]->use < use) {
1170Sstevel@tonic-gate use = fatab[i]->use;
1180Sstevel@tonic-gate nuse = i;
1190Sstevel@tonic-gate }
1200Sstevel@tonic-gate freefa(fatab[nuse]);
1210Sstevel@tonic-gate fatab[nuse] = pfa;
1220Sstevel@tonic-gate pfa->use = 1;
123*289Snakanon return (pfa);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate
126*289Snakanon fa *
mkdfa(uchar * s,int anchor)127*289Snakanon mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */
128*289Snakanon /* anchor = 1 for anchored matches, else 0 */
1290Sstevel@tonic-gate {
130*289Snakanon Node *p, *p1;
1310Sstevel@tonic-gate fa *f;
1320Sstevel@tonic-gate
1330Sstevel@tonic-gate p = reparse(s);
1340Sstevel@tonic-gate p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
1350Sstevel@tonic-gate /* put ALL STAR in front of reg. exp. */
1360Sstevel@tonic-gate p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
1370Sstevel@tonic-gate /* put FINAL after reg. exp. */
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate poscnt = 0;
1400Sstevel@tonic-gate penter(p1); /* enter parent pointers and leaf indices */
141*289Snakanon if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
1420Sstevel@tonic-gate overflo("no room for fa");
143*289Snakanon /* penter has computed number of positions in re */
144*289Snakanon f->accept = poscnt-1;
1450Sstevel@tonic-gate cfoll(f, p1); /* set up follow sets */
1460Sstevel@tonic-gate freetr(p1);
147*289Snakanon if ((f->posns[0] =
148*289Snakanon (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
1490Sstevel@tonic-gate overflo("out of space in makedfa");
150*289Snakanon }
151*289Snakanon if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
1520Sstevel@tonic-gate overflo("out of space in makedfa");
1530Sstevel@tonic-gate *f->posns[1] = 0;
1540Sstevel@tonic-gate f->initstat = makeinit(f, anchor);
1550Sstevel@tonic-gate f->anchor = anchor;
1560Sstevel@tonic-gate f->restr = tostring(s);
157*289Snakanon return (f);
1580Sstevel@tonic-gate }
1590Sstevel@tonic-gate
160*289Snakanon static int
makeinit(fa * f,int anchor)161*289Snakanon makeinit(fa *f, int anchor)
1620Sstevel@tonic-gate {
1630Sstevel@tonic-gate register int i, k;
1640Sstevel@tonic-gate
1650Sstevel@tonic-gate f->curstat = 2;
1660Sstevel@tonic-gate f->out[2] = 0;
1670Sstevel@tonic-gate f->reset = 0;
1680Sstevel@tonic-gate k = *(f->re[0].lfollow);
169*289Snakanon xfree(f->posns[2]);
170*289Snakanon if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
1710Sstevel@tonic-gate overflo("out of space in makeinit");
172*289Snakanon for (i = 0; i <= k; i++) {
1730Sstevel@tonic-gate (f->posns[2])[i] = (f->re[0].lfollow)[i];
1740Sstevel@tonic-gate }
1750Sstevel@tonic-gate if ((f->posns[2])[1] == f->accept)
1760Sstevel@tonic-gate f->out[2] = 1;
177*289Snakanon for (i = 0; i < NCHARS; i++)
1780Sstevel@tonic-gate f->gototab[2][i] = 0;
1790Sstevel@tonic-gate f->curstat = cgoto(f, 2, HAT);
1800Sstevel@tonic-gate if (anchor) {
1810Sstevel@tonic-gate *f->posns[2] = k-1; /* leave out position 0 */
182*289Snakanon for (i = 0; i < k; i++) {
1830Sstevel@tonic-gate (f->posns[0])[i] = (f->posns[2])[i];
1840Sstevel@tonic-gate }
1850Sstevel@tonic-gate
1860Sstevel@tonic-gate f->out[0] = f->out[2];
1870Sstevel@tonic-gate if (f->curstat != 2)
1880Sstevel@tonic-gate --(*f->posns[f->curstat]);
1890Sstevel@tonic-gate }
190*289Snakanon return (f->curstat);
1910Sstevel@tonic-gate }
1920Sstevel@tonic-gate
193*289Snakanon void
penter(Node * p)194*289Snakanon penter(Node *p) /* set up parent pointers and leaf indices */
1950Sstevel@tonic-gate {
196*289Snakanon switch (type(p)) {
1970Sstevel@tonic-gate LEAF
1980Sstevel@tonic-gate left(p) = (Node *) poscnt;
1990Sstevel@tonic-gate point[poscnt++] = p;
2000Sstevel@tonic-gate break;
2010Sstevel@tonic-gate UNARY
2020Sstevel@tonic-gate penter(left(p));
2030Sstevel@tonic-gate parent(left(p)) = p;
2040Sstevel@tonic-gate break;
2050Sstevel@tonic-gate case CAT:
2060Sstevel@tonic-gate case OR:
2070Sstevel@tonic-gate penter(left(p));
2080Sstevel@tonic-gate penter(right(p));
2090Sstevel@tonic-gate parent(left(p)) = p;
2100Sstevel@tonic-gate parent(right(p)) = p;
2110Sstevel@tonic-gate break;
2120Sstevel@tonic-gate default:
2130Sstevel@tonic-gate ERROR "unknown type %d in penter", type(p) FATAL;
2140Sstevel@tonic-gate break;
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate
218*289Snakanon static void
freetr(Node * p)219*289Snakanon freetr(Node *p) /* free parse tree */
2200Sstevel@tonic-gate {
2210Sstevel@tonic-gate switch (type(p)) {
2220Sstevel@tonic-gate LEAF
2230Sstevel@tonic-gate xfree(p);
2240Sstevel@tonic-gate break;
2250Sstevel@tonic-gate UNARY
2260Sstevel@tonic-gate freetr(left(p));
2270Sstevel@tonic-gate xfree(p);
2280Sstevel@tonic-gate break;
2290Sstevel@tonic-gate case CAT:
2300Sstevel@tonic-gate case OR:
2310Sstevel@tonic-gate freetr(left(p));
2320Sstevel@tonic-gate freetr(right(p));
2330Sstevel@tonic-gate xfree(p);
2340Sstevel@tonic-gate break;
2350Sstevel@tonic-gate default:
2360Sstevel@tonic-gate ERROR "unknown type %d in freetr", type(p) FATAL;
2370Sstevel@tonic-gate break;
2380Sstevel@tonic-gate }
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
241*289Snakanon uchar *
cclenter(uchar * p)242*289Snakanon cclenter(uchar *p)
2430Sstevel@tonic-gate {
2440Sstevel@tonic-gate register int i, c;
245*289Snakanon uchar *op, *chars, *ret;
246*289Snakanon size_t bsize;
2470Sstevel@tonic-gate
248*289Snakanon init_buf(&chars, &bsize, LINE_INCR);
2490Sstevel@tonic-gate op = p;
2500Sstevel@tonic-gate i = 0;
2510Sstevel@tonic-gate while ((c = *p++) != 0) {
2520Sstevel@tonic-gate if (c == '\\') {
2530Sstevel@tonic-gate if ((c = *p++) == 't')
2540Sstevel@tonic-gate c = '\t';
2550Sstevel@tonic-gate else if (c == 'n')
2560Sstevel@tonic-gate c = '\n';
2570Sstevel@tonic-gate else if (c == 'f')
2580Sstevel@tonic-gate c = '\f';
2590Sstevel@tonic-gate else if (c == 'r')
2600Sstevel@tonic-gate c = '\r';
2610Sstevel@tonic-gate else if (c == 'b')
2620Sstevel@tonic-gate c = '\b';
2630Sstevel@tonic-gate else if (c == '\\')
2640Sstevel@tonic-gate c = '\\';
2650Sstevel@tonic-gate else if (isdigit(c)) {
2660Sstevel@tonic-gate int n = c - '0';
2670Sstevel@tonic-gate if (isdigit(*p)) {
2680Sstevel@tonic-gate n = 8 * n + *p++ - '0';
2690Sstevel@tonic-gate if (isdigit(*p))
2700Sstevel@tonic-gate n = 8 * n + *p++ - '0';
2710Sstevel@tonic-gate }
2720Sstevel@tonic-gate c = n;
2730Sstevel@tonic-gate } /* else */
2740Sstevel@tonic-gate /* c = c; */
2750Sstevel@tonic-gate } else if (c == '-' && i > 0 && chars[i-1] != 0) {
2760Sstevel@tonic-gate if (*p != 0) {
2770Sstevel@tonic-gate c = chars[i-1];
278*289Snakanon while ((uchar)c < *p) { /* fails if *p is \\ */
279*289Snakanon expand_buf(&chars, &bsize, i);
2800Sstevel@tonic-gate chars[i++] = ++c;
2810Sstevel@tonic-gate }
2820Sstevel@tonic-gate p++;
2830Sstevel@tonic-gate continue;
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate }
286*289Snakanon expand_buf(&chars, &bsize, i);
2870Sstevel@tonic-gate chars[i++] = c;
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate chars[i++] = '\0';
290*289Snakanon dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
2910Sstevel@tonic-gate xfree(op);
292*289Snakanon ret = tostring(chars);
293*289Snakanon free(chars);
294*289Snakanon return (ret);
2950Sstevel@tonic-gate }
2960Sstevel@tonic-gate
297*289Snakanon static void
overflo(char * s)298*289Snakanon overflo(char *s)
2990Sstevel@tonic-gate {
300*289Snakanon ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
3010Sstevel@tonic-gate }
3020Sstevel@tonic-gate
303*289Snakanon /* enter follow set of each leaf of vertex v into lfollow[leaf] */
304*289Snakanon static void
cfoll(fa * f,Node * v)305*289Snakanon cfoll(fa *f, Node *v)
3060Sstevel@tonic-gate {
3070Sstevel@tonic-gate register int i;
3080Sstevel@tonic-gate register int *p;
3090Sstevel@tonic-gate
310*289Snakanon switch (type(v)) {
3110Sstevel@tonic-gate LEAF
312*289Snakanon f->re[(int)left(v)].ltype = type(v);
313*289Snakanon f->re[(int)left(v)].lval = (int)right(v);
314*289Snakanon for (i = 0; i <= f->accept; i++)
3150Sstevel@tonic-gate setvec[i] = 0;
3160Sstevel@tonic-gate setcnt = 0;
3170Sstevel@tonic-gate follow(v); /* computes setvec and setcnt */
318*289Snakanon if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
3190Sstevel@tonic-gate overflo("follow set overflow");
320*289Snakanon f->re[(int)left(v)].lfollow = p;
3210Sstevel@tonic-gate *p = setcnt;
322*289Snakanon for (i = f->accept; i >= 0; i--) {
323*289Snakanon if (setvec[i] == 1)
324*289Snakanon *++p = i;
325*289Snakanon }
3260Sstevel@tonic-gate break;
3270Sstevel@tonic-gate UNARY
328*289Snakanon cfoll(f, left(v));
3290Sstevel@tonic-gate break;
3300Sstevel@tonic-gate case CAT:
3310Sstevel@tonic-gate case OR:
332*289Snakanon cfoll(f, left(v));
333*289Snakanon cfoll(f, right(v));
3340Sstevel@tonic-gate break;
3350Sstevel@tonic-gate default:
3360Sstevel@tonic-gate ERROR "unknown type %d in cfoll", type(v) FATAL;
3370Sstevel@tonic-gate }
3380Sstevel@tonic-gate }
3390Sstevel@tonic-gate
340*289Snakanon /*
341*289Snakanon * collects initially active leaves of p into setvec
342*289Snakanon * returns 0 or 1 depending on whether p matches empty string
343*289Snakanon */
344*289Snakanon static int
first(Node * p)345*289Snakanon first(Node *p)
3460Sstevel@tonic-gate {
3470Sstevel@tonic-gate register int b;
3480Sstevel@tonic-gate
349*289Snakanon switch (type(p)) {
3500Sstevel@tonic-gate LEAF
351*289Snakanon if (setvec[(int)left(p)] != 1) {
352*289Snakanon setvec[(int)left(p)] = 1;
3530Sstevel@tonic-gate setcnt++;
3540Sstevel@tonic-gate }
355*289Snakanon if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
356*289Snakanon return (0); /* empty CCL */
357*289Snakanon else
358*289Snakanon return (1);
3590Sstevel@tonic-gate case PLUS:
360*289Snakanon if (first(left(p)) == 0)
361*289Snakanon return (0);
362*289Snakanon return (1);
3630Sstevel@tonic-gate case STAR:
3640Sstevel@tonic-gate case QUEST:
365*289Snakanon (void) first(left(p));
366*289Snakanon return (0);
3670Sstevel@tonic-gate case CAT:
368*289Snakanon if (first(left(p)) == 0 && first(right(p)) == 0)
369*289Snakanon return (0);
370*289Snakanon return (1);
3710Sstevel@tonic-gate case OR:
3720Sstevel@tonic-gate b = first(right(p));
373*289Snakanon if (first(left(p)) == 0 || b == 0)
374*289Snakanon return (0);
375*289Snakanon return (1);
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate ERROR "unknown type %d in first", type(p) FATAL;
378*289Snakanon return (-1);
3790Sstevel@tonic-gate }
3800Sstevel@tonic-gate
381*289Snakanon /* collects leaves that can follow v into setvec */
382*289Snakanon static void
follow(Node * v)383*289Snakanon follow(Node *v)
3840Sstevel@tonic-gate {
3850Sstevel@tonic-gate Node *p;
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate if (type(v) == FINAL)
3880Sstevel@tonic-gate return;
3890Sstevel@tonic-gate p = parent(v);
3900Sstevel@tonic-gate switch (type(p)) {
3910Sstevel@tonic-gate case STAR:
3920Sstevel@tonic-gate case PLUS:
393*289Snakanon (void) first(v);
3940Sstevel@tonic-gate follow(p);
3950Sstevel@tonic-gate return;
3960Sstevel@tonic-gate
3970Sstevel@tonic-gate case OR:
3980Sstevel@tonic-gate case QUEST:
3990Sstevel@tonic-gate follow(p);
4000Sstevel@tonic-gate return;
4010Sstevel@tonic-gate
4020Sstevel@tonic-gate case CAT:
4030Sstevel@tonic-gate if (v == left(p)) { /* v is left child of p */
4040Sstevel@tonic-gate if (first(right(p)) == 0) {
4050Sstevel@tonic-gate follow(p);
4060Sstevel@tonic-gate return;
4070Sstevel@tonic-gate }
408*289Snakanon } else /* v is right child */
4090Sstevel@tonic-gate follow(p);
4100Sstevel@tonic-gate return;
4110Sstevel@tonic-gate default:
4120Sstevel@tonic-gate ERROR "unknown type %d in follow", type(p) FATAL;
4130Sstevel@tonic-gate break;
4140Sstevel@tonic-gate }
4150Sstevel@tonic-gate }
4160Sstevel@tonic-gate
417*289Snakanon static int
member(uchar c,uchar * s)418*289Snakanon member(uchar c, uchar *s) /* is c in s? */
4190Sstevel@tonic-gate {
4200Sstevel@tonic-gate while (*s)
4210Sstevel@tonic-gate if (c == *s++)
422*289Snakanon return (1);
423*289Snakanon return (0);
4240Sstevel@tonic-gate }
4250Sstevel@tonic-gate
4260Sstevel@tonic-gate
427*289Snakanon int
match(fa * f,uchar * p)428*289Snakanon match(fa *f, uchar *p)
4290Sstevel@tonic-gate {
4300Sstevel@tonic-gate register int s, ns;
4310Sstevel@tonic-gate
432*289Snakanon s = f->reset ? makeinit(f, 0) : f->initstat;
4330Sstevel@tonic-gate if (f->out[s])
434*289Snakanon return (1);
4350Sstevel@tonic-gate do {
436*289Snakanon if ((ns = f->gototab[s][*p]) != 0)
437*289Snakanon s = ns;
4380Sstevel@tonic-gate else
439*289Snakanon s = cgoto(f, s, *p);
4400Sstevel@tonic-gate if (f->out[s])
441*289Snakanon return (1);
4420Sstevel@tonic-gate } while (*p++ != 0);
443*289Snakanon return (0);
4440Sstevel@tonic-gate }
4450Sstevel@tonic-gate
446*289Snakanon int
pmatch(fa * f,uchar * p)447*289Snakanon pmatch(fa *f, uchar *p)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate register int s, ns;
4500Sstevel@tonic-gate register uchar *q;
4510Sstevel@tonic-gate int i, k;
4520Sstevel@tonic-gate
453*289Snakanon if (f->reset) {
454*289Snakanon f->initstat = s = makeinit(f, 1);
455*289Snakanon } else {
456*289Snakanon s = f->initstat;
457*289Snakanon }
4580Sstevel@tonic-gate patbeg = p;
4590Sstevel@tonic-gate patlen = -1;
4600Sstevel@tonic-gate do {
4610Sstevel@tonic-gate q = p;
4620Sstevel@tonic-gate do {
4630Sstevel@tonic-gate if (f->out[s]) /* final state */
4640Sstevel@tonic-gate patlen = q-p;
465*289Snakanon if ((ns = f->gototab[s][*q]) != 0)
466*289Snakanon s = ns;
4670Sstevel@tonic-gate else
468*289Snakanon s = cgoto(f, s, *q);
469*289Snakanon if (s == 1) { /* no transition */
4700Sstevel@tonic-gate if (patlen >= 0) {
4710Sstevel@tonic-gate patbeg = p;
472*289Snakanon return (1);
473*289Snakanon } else
4740Sstevel@tonic-gate goto nextin; /* no match */
475*289Snakanon }
4760Sstevel@tonic-gate } while (*q++ != 0);
4770Sstevel@tonic-gate if (f->out[s])
478*289Snakanon patlen = q - p - 1; /* don't count $ */
4790Sstevel@tonic-gate if (patlen >= 0) {
4800Sstevel@tonic-gate patbeg = p;
481*289Snakanon return (1);
4820Sstevel@tonic-gate }
4830Sstevel@tonic-gate nextin:
4840Sstevel@tonic-gate s = 2;
4850Sstevel@tonic-gate if (f->reset) {
486*289Snakanon for (i = 2; i <= f->curstat; i++)
4870Sstevel@tonic-gate xfree(f->posns[i]);
488*289Snakanon k = *f->posns[0];
489*289Snakanon if ((f->posns[2] =
490*289Snakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
4910Sstevel@tonic-gate overflo("out of space in pmatch");
492*289Snakanon }
493*289Snakanon for (i = 0; i <= k; i++)
4940Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i];
4950Sstevel@tonic-gate f->initstat = f->curstat = 2;
4960Sstevel@tonic-gate f->out[2] = f->out[0];
497*289Snakanon for (i = 0; i < NCHARS; i++)
4980Sstevel@tonic-gate f->gototab[2][i] = 0;
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate } while (*p++ != 0);
5010Sstevel@tonic-gate return (0);
5020Sstevel@tonic-gate }
5030Sstevel@tonic-gate
504*289Snakanon int
nematch(fa * f,uchar * p)505*289Snakanon nematch(fa *f, uchar *p)
5060Sstevel@tonic-gate {
5070Sstevel@tonic-gate register int s, ns;
5080Sstevel@tonic-gate register uchar *q;
5090Sstevel@tonic-gate int i, k;
5100Sstevel@tonic-gate
5110Sstevel@tonic-gate if (f->reset) {
512*289Snakanon f->initstat = s = makeinit(f, 1);
5130Sstevel@tonic-gate } else {
5140Sstevel@tonic-gate s = f->initstat;
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate patlen = -1;
5170Sstevel@tonic-gate while (*p) {
5180Sstevel@tonic-gate q = p;
5190Sstevel@tonic-gate do {
5200Sstevel@tonic-gate if (f->out[s]) /* final state */
5210Sstevel@tonic-gate patlen = q-p;
522*289Snakanon if ((ns = f->gototab[s][*q]) != 0)
523*289Snakanon s = ns;
5240Sstevel@tonic-gate else
525*289Snakanon s = cgoto(f, s, *q);
526*289Snakanon if (s == 1) { /* no transition */
5270Sstevel@tonic-gate if (patlen > 0) {
5280Sstevel@tonic-gate patbeg = p;
529*289Snakanon return (1);
530*289Snakanon } else
5310Sstevel@tonic-gate goto nnextin; /* no nonempty match */
532*289Snakanon }
5330Sstevel@tonic-gate } while (*q++ != 0);
5340Sstevel@tonic-gate if (f->out[s])
535*289Snakanon patlen = q - p - 1; /* don't count $ */
536*289Snakanon if (patlen > 0) {
5370Sstevel@tonic-gate patbeg = p;
538*289Snakanon return (1);
5390Sstevel@tonic-gate }
5400Sstevel@tonic-gate nnextin:
5410Sstevel@tonic-gate s = 2;
5420Sstevel@tonic-gate if (f->reset) {
543*289Snakanon for (i = 2; i <= f->curstat; i++)
5440Sstevel@tonic-gate xfree(f->posns[i]);
545*289Snakanon k = *f->posns[0];
546*289Snakanon if ((f->posns[2] =
547*289Snakanon (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
5480Sstevel@tonic-gate overflo("out of state space");
549*289Snakanon }
550*289Snakanon for (i = 0; i <= k; i++)
5510Sstevel@tonic-gate (f->posns[2])[i] = (f->posns[0])[i];
5520Sstevel@tonic-gate f->initstat = f->curstat = 2;
5530Sstevel@tonic-gate f->out[2] = f->out[0];
554*289Snakanon for (i = 0; i < NCHARS; i++)
5550Sstevel@tonic-gate f->gototab[2][i] = 0;
5560Sstevel@tonic-gate }
557*289Snakanon p++;
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate return (0);
5600Sstevel@tonic-gate }
5610Sstevel@tonic-gate
562*289Snakanon static Node *regexp(void), *primary(void), *concat(Node *);
563*289Snakanon static Node *alt(Node *), *unary(Node *);
5640Sstevel@tonic-gate
565*289Snakanon static Node *
reparse(uchar * p)566*289Snakanon reparse(uchar *p)
5670Sstevel@tonic-gate {
5680Sstevel@tonic-gate /* parses regular expression pointed to by p */
5690Sstevel@tonic-gate /* uses relex() to scan regular expression */
5700Sstevel@tonic-gate Node *np;
5710Sstevel@tonic-gate
572*289Snakanon dprintf(("reparse <%s>\n", p));
5730Sstevel@tonic-gate lastre = prestr = p; /* prestr points to string to be parsed */
5740Sstevel@tonic-gate rtok = relex();
5750Sstevel@tonic-gate if (rtok == '\0')
5760Sstevel@tonic-gate ERROR "empty regular expression" FATAL;
5770Sstevel@tonic-gate np = regexp();
578*289Snakanon if (rtok == '\0') {
579*289Snakanon return (np);
580*289Snakanon } else {
581*289Snakanon ERROR "syntax error in regular expression %s at %s",
582*289Snakanon lastre, prestr FATAL;
583*289Snakanon }
5840Sstevel@tonic-gate /*NOTREACHED*/
585*289Snakanon return (NULL);
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate
588*289Snakanon static Node *
regexp(void)589*289Snakanon regexp(void)
5900Sstevel@tonic-gate {
5910Sstevel@tonic-gate return (alt(concat(primary())));
5920Sstevel@tonic-gate }
5930Sstevel@tonic-gate
594*289Snakanon static Node *
primary(void)595*289Snakanon primary(void)
5960Sstevel@tonic-gate {
5970Sstevel@tonic-gate Node *np;
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate switch (rtok) {
6000Sstevel@tonic-gate case CHAR:
601*289Snakanon np = op2(CHAR, NIL, (Node *)rlxval);
6020Sstevel@tonic-gate rtok = relex();
6030Sstevel@tonic-gate return (unary(np));
6040Sstevel@tonic-gate case ALL:
6050Sstevel@tonic-gate rtok = relex();
6060Sstevel@tonic-gate return (unary(op2(ALL, NIL, NIL)));
6070Sstevel@tonic-gate case DOT:
6080Sstevel@tonic-gate rtok = relex();
6090Sstevel@tonic-gate return (unary(op2(DOT, NIL, NIL)));
6100Sstevel@tonic-gate case CCL:
611*289Snakanon /*LINTED align*/
612*289Snakanon np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
6130Sstevel@tonic-gate rtok = relex();
6140Sstevel@tonic-gate return (unary(np));
6150Sstevel@tonic-gate case NCCL:
616*289Snakanon /*LINTED align*/
617*289Snakanon np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
6180Sstevel@tonic-gate rtok = relex();
6190Sstevel@tonic-gate return (unary(np));
6200Sstevel@tonic-gate case '^':
6210Sstevel@tonic-gate rtok = relex();
622*289Snakanon return (unary(op2(CHAR, NIL, (Node *)HAT)));
6230Sstevel@tonic-gate case '$':
6240Sstevel@tonic-gate rtok = relex();
6250Sstevel@tonic-gate return (unary(op2(CHAR, NIL, NIL)));
6260Sstevel@tonic-gate case '(':
6270Sstevel@tonic-gate rtok = relex();
6280Sstevel@tonic-gate if (rtok == ')') { /* special pleading for () */
6290Sstevel@tonic-gate rtok = relex();
630*289Snakanon return (unary(op2(CCL, NIL,
631*289Snakanon /*LINTED align*/
632*289Snakanon (Node *)tostring((uchar *)""))));
6330Sstevel@tonic-gate }
6340Sstevel@tonic-gate np = regexp();
6350Sstevel@tonic-gate if (rtok == ')') {
6360Sstevel@tonic-gate rtok = relex();
6370Sstevel@tonic-gate return (unary(np));
638*289Snakanon } else {
639*289Snakanon ERROR "syntax error in regular expression %s at %s",
640*289Snakanon lastre, prestr FATAL;
6410Sstevel@tonic-gate }
6420Sstevel@tonic-gate default:
643*289Snakanon ERROR "illegal primary in regular expression %s at %s",
644*289Snakanon lastre, prestr FATAL;
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate /*NOTREACHED*/
647*289Snakanon return (NULL);
6480Sstevel@tonic-gate }
6490Sstevel@tonic-gate
650*289Snakanon static Node *
concat(Node * np)651*289Snakanon concat(Node *np)
6520Sstevel@tonic-gate {
6530Sstevel@tonic-gate switch (rtok) {
6540Sstevel@tonic-gate case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
6550Sstevel@tonic-gate return (concat(op2(CAT, np, primary())));
6560Sstevel@tonic-gate default:
6570Sstevel@tonic-gate return (np);
6580Sstevel@tonic-gate }
6590Sstevel@tonic-gate }
6600Sstevel@tonic-gate
661*289Snakanon static Node *
alt(Node * np)662*289Snakanon alt(Node *np)
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate if (rtok == OR) {
6650Sstevel@tonic-gate rtok = relex();
6660Sstevel@tonic-gate return (alt(op2(OR, np, concat(primary()))));
6670Sstevel@tonic-gate }
6680Sstevel@tonic-gate return (np);
6690Sstevel@tonic-gate }
6700Sstevel@tonic-gate
671*289Snakanon static Node *
unary(Node * np)672*289Snakanon unary(Node *np)
6730Sstevel@tonic-gate {
6740Sstevel@tonic-gate switch (rtok) {
6750Sstevel@tonic-gate case STAR:
6760Sstevel@tonic-gate rtok = relex();
6770Sstevel@tonic-gate return (unary(op2(STAR, np, NIL)));
6780Sstevel@tonic-gate case PLUS:
6790Sstevel@tonic-gate rtok = relex();
6800Sstevel@tonic-gate return (unary(op2(PLUS, np, NIL)));
6810Sstevel@tonic-gate case QUEST:
6820Sstevel@tonic-gate rtok = relex();
6830Sstevel@tonic-gate return (unary(op2(QUEST, np, NIL)));
6840Sstevel@tonic-gate default:
6850Sstevel@tonic-gate return (np);
6860Sstevel@tonic-gate }
6870Sstevel@tonic-gate }
6880Sstevel@tonic-gate
689*289Snakanon static int
relex(void)690*289Snakanon relex(void) /* lexical analyzer for reparse */
6910Sstevel@tonic-gate {
6920Sstevel@tonic-gate register int c;
693*289Snakanon uchar *cbuf;
6940Sstevel@tonic-gate int clen, cflag;
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate switch (c = *prestr++) {
6970Sstevel@tonic-gate case '|': return OR;
6980Sstevel@tonic-gate case '*': return STAR;
6990Sstevel@tonic-gate case '+': return PLUS;
7000Sstevel@tonic-gate case '?': return QUEST;
7010Sstevel@tonic-gate case '.': return DOT;
7020Sstevel@tonic-gate case '\0': prestr--; return '\0';
7030Sstevel@tonic-gate case '^':
7040Sstevel@tonic-gate case '$':
7050Sstevel@tonic-gate case '(':
7060Sstevel@tonic-gate case ')':
707*289Snakanon return (c);
7080Sstevel@tonic-gate case '\\':
7090Sstevel@tonic-gate if ((c = *prestr++) == 't')
7100Sstevel@tonic-gate c = '\t';
7110Sstevel@tonic-gate else if (c == 'n')
7120Sstevel@tonic-gate c = '\n';
7130Sstevel@tonic-gate else if (c == 'f')
7140Sstevel@tonic-gate c = '\f';
7150Sstevel@tonic-gate else if (c == 'r')
7160Sstevel@tonic-gate c = '\r';
7170Sstevel@tonic-gate else if (c == 'b')
7180Sstevel@tonic-gate c = '\b';
7190Sstevel@tonic-gate else if (c == '\\')
7200Sstevel@tonic-gate c = '\\';
7210Sstevel@tonic-gate else if (isdigit(c)) {
7220Sstevel@tonic-gate int n = c - '0';
7230Sstevel@tonic-gate if (isdigit(*prestr)) {
7240Sstevel@tonic-gate n = 8 * n + *prestr++ - '0';
7250Sstevel@tonic-gate if (isdigit(*prestr))
7260Sstevel@tonic-gate n = 8 * n + *prestr++ - '0';
7270Sstevel@tonic-gate }
7280Sstevel@tonic-gate c = n;
7290Sstevel@tonic-gate } /* else it's now in c */
7300Sstevel@tonic-gate rlxval = c;
731*289Snakanon return (CHAR);
7320Sstevel@tonic-gate default:
7330Sstevel@tonic-gate rlxval = c;
734*289Snakanon return (CHAR);
735*289Snakanon case '[':
7360Sstevel@tonic-gate clen = 0;
7370Sstevel@tonic-gate if (*prestr == '^') {
7380Sstevel@tonic-gate cflag = 1;
7390Sstevel@tonic-gate prestr++;
740*289Snakanon } else
7410Sstevel@tonic-gate cflag = 0;
742*289Snakanon init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
7430Sstevel@tonic-gate for (;;) {
7440Sstevel@tonic-gate if ((c = *prestr++) == '\\') {
7450Sstevel@tonic-gate cbuf[clen++] = '\\';
746*289Snakanon if ((c = *prestr++) == '\0') {
747*289Snakanon ERROR
748*289Snakanon "nonterminated character class %s", lastre FATAL;
749*289Snakanon }
7500Sstevel@tonic-gate cbuf[clen++] = c;
7510Sstevel@tonic-gate } else if (c == ']') {
7520Sstevel@tonic-gate cbuf[clen] = 0;
7530Sstevel@tonic-gate rlxstr = tostring(cbuf);
754*289Snakanon free(cbuf);
7550Sstevel@tonic-gate if (cflag == 0)
756*289Snakanon return (CCL);
7570Sstevel@tonic-gate else
758*289Snakanon return (NCCL);
7590Sstevel@tonic-gate } else if (c == '\n') {
760*289Snakanon ERROR "newline in character class %s...",
761*289Snakanon lastre FATAL;
7620Sstevel@tonic-gate } else if (c == '\0') {
763*289Snakanon ERROR "nonterminated character class %s",
764*289Snakanon lastre FATAL;
7650Sstevel@tonic-gate } else
7660Sstevel@tonic-gate cbuf[clen++] = c;
7670Sstevel@tonic-gate }
768*289Snakanon /*NOTREACHED*/
7690Sstevel@tonic-gate }
770*289Snakanon return (0);
7710Sstevel@tonic-gate }
7720Sstevel@tonic-gate
773*289Snakanon static int
cgoto(fa * f,int s,int c)774*289Snakanon cgoto(fa *f, int s, int c)
7750Sstevel@tonic-gate {
7760Sstevel@tonic-gate register int i, j, k;
7770Sstevel@tonic-gate register int *p, *q;
7780Sstevel@tonic-gate
779*289Snakanon for (i = 0; i <= f->accept; i++)
7800Sstevel@tonic-gate setvec[i] = 0;
7810Sstevel@tonic-gate setcnt = 0;
7820Sstevel@tonic-gate /* compute positions of gototab[s,c] into setvec */
7830Sstevel@tonic-gate p = f->posns[s];
784*289Snakanon for (i = 1; i <= *p; i++) {
7850Sstevel@tonic-gate if ((k = f->re[p[i]].ltype) != FINAL) {
786*289Snakanon if (k == CHAR && c == f->re[p[i]].lval ||
787*289Snakanon k == DOT && c != 0 && c != HAT ||
788*289Snakanon k == ALL && c != 0 ||
789*289Snakanon k == CCL &&
790*289Snakanon member(c, (uchar *)f->re[p[i]].lval) ||
791*289Snakanon k == NCCL &&
792*289Snakanon !member(c, (uchar *)f->re[p[i]].lval) &&
793*289Snakanon c != 0 && c != HAT) {
794*289Snakanon q = f->re[p[i]].lfollow;
795*289Snakanon for (j = 1; j <= *q; j++) {
796*289Snakanon if (setvec[q[j]] == 0) {
797*289Snakanon setcnt++;
798*289Snakanon setvec[q[j]] = 1;
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate }
801*289Snakanon }
8020Sstevel@tonic-gate }
8030Sstevel@tonic-gate }
8040Sstevel@tonic-gate /* determine if setvec is a previous state */
8050Sstevel@tonic-gate tmpset[0] = setcnt;
8060Sstevel@tonic-gate j = 1;
8070Sstevel@tonic-gate for (i = f->accept; i >= 0; i--)
8080Sstevel@tonic-gate if (setvec[i]) {
8090Sstevel@tonic-gate tmpset[j++] = i;
8100Sstevel@tonic-gate }
8110Sstevel@tonic-gate /* tmpset == previous state? */
812*289Snakanon for (i = 1; i <= f->curstat; i++) {
8130Sstevel@tonic-gate p = f->posns[i];
8140Sstevel@tonic-gate if ((k = tmpset[0]) != p[0])
8150Sstevel@tonic-gate goto different;
8160Sstevel@tonic-gate for (j = 1; j <= k; j++)
8170Sstevel@tonic-gate if (tmpset[j] != p[j])
8180Sstevel@tonic-gate goto different;
8190Sstevel@tonic-gate /* setvec is state i */
8200Sstevel@tonic-gate f->gototab[s][c] = i;
821*289Snakanon return (i);
8220Sstevel@tonic-gate different:;
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate /* add tmpset to current set of states */
8260Sstevel@tonic-gate if (f->curstat >= NSTATES-1) {
8270Sstevel@tonic-gate f->curstat = 2;
8280Sstevel@tonic-gate f->reset = 1;
829*289Snakanon for (i = 2; i < NSTATES; i++)
8300Sstevel@tonic-gate xfree(f->posns[i]);
831*289Snakanon } else
8320Sstevel@tonic-gate ++(f->curstat);
833*289Snakanon for (i = 0; i < NCHARS; i++)
8340Sstevel@tonic-gate f->gototab[f->curstat][i] = 0;
8350Sstevel@tonic-gate xfree(f->posns[f->curstat]);
836*289Snakanon if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
8370Sstevel@tonic-gate overflo("out of space in cgoto");
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate f->posns[f->curstat] = p;
8400Sstevel@tonic-gate f->gototab[s][c] = f->curstat;
8410Sstevel@tonic-gate for (i = 0; i <= setcnt; i++)
8420Sstevel@tonic-gate p[i] = tmpset[i];
8430Sstevel@tonic-gate if (setvec[f->accept])
8440Sstevel@tonic-gate f->out[f->curstat] = 1;
8450Sstevel@tonic-gate else
8460Sstevel@tonic-gate f->out[f->curstat] = 0;
847*289Snakanon return (f->curstat);
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate
850*289Snakanon static void
freefa(fa * f)851*289Snakanon freefa(fa *f)
8520Sstevel@tonic-gate {
8530Sstevel@tonic-gate
8540Sstevel@tonic-gate register int i;
8550Sstevel@tonic-gate
8560Sstevel@tonic-gate if (f == NULL)
8570Sstevel@tonic-gate return;
858*289Snakanon for (i = 0; i <= f->curstat; i++)
8590Sstevel@tonic-gate xfree(f->posns[i]);
860*289Snakanon for (i = 0; i <= f->accept; i++)
8610Sstevel@tonic-gate xfree(f->re[i].lfollow);
8620Sstevel@tonic-gate xfree(f->restr);
8630Sstevel@tonic-gate xfree(f);
8640Sstevel@tonic-gate }
865