1*3e12c5d1SDavid du Colombier #include <stdlib.h>
2*3e12c5d1SDavid du Colombier #include <stdio.h>
3*3e12c5d1SDavid du Colombier #include "regexp.h"
4*3e12c5d1SDavid du Colombier #include "regcomp.h"
5*3e12c5d1SDavid du Colombier
6*3e12c5d1SDavid du Colombier static Resublist sempty; /* empty set of matches */
7*3e12c5d1SDavid du Colombier
8*3e12c5d1SDavid du Colombier /*
9*3e12c5d1SDavid du Colombier * return 0 if no match
10*3e12c5d1SDavid du Colombier * >0 if a match
11*3e12c5d1SDavid du Colombier * <0 if we ran out of _relist space
12*3e12c5d1SDavid du Colombier */
13*3e12c5d1SDavid du Colombier static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,char * starts,char * eol,wchar_t startchar)14*3e12c5d1SDavid du Colombier regexec1(Reprog *progp, /* program to run */
15*3e12c5d1SDavid du Colombier char *bol, /* string to run machine on */
16*3e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
17*3e12c5d1SDavid du Colombier int ms, /* number of elements at mp */
18*3e12c5d1SDavid du Colombier char *starts,
19*3e12c5d1SDavid du Colombier char *eol,
20*3e12c5d1SDavid du Colombier wchar_t startchar)
21*3e12c5d1SDavid du Colombier {
22*3e12c5d1SDavid du Colombier int flag=0;
23*3e12c5d1SDavid du Colombier Reinst *inst;
24*3e12c5d1SDavid du Colombier Relist *tlp;
25*3e12c5d1SDavid du Colombier char *s;
26*3e12c5d1SDavid du Colombier int i, checkstart;
27*3e12c5d1SDavid du Colombier wchar_t r, *rp, *ep;
28*3e12c5d1SDavid du Colombier int n;
29*3e12c5d1SDavid du Colombier Relist* tl; /* This list, next list */
30*3e12c5d1SDavid du Colombier Relist* nl;
31*3e12c5d1SDavid du Colombier Relist* tle; /* ends of this and next list */
32*3e12c5d1SDavid du Colombier Relist* nle;
33*3e12c5d1SDavid du Colombier int match;
34*3e12c5d1SDavid du Colombier
35*3e12c5d1SDavid du Colombier match = 0;
36*3e12c5d1SDavid du Colombier checkstart = startchar;
37*3e12c5d1SDavid du Colombier sempty.m[0].s.sp = 0;
38*3e12c5d1SDavid du Colombier if(mp!=0)
39*3e12c5d1SDavid du Colombier for(i=0; i<ms; i++)
40*3e12c5d1SDavid du Colombier mp[i].s.sp = mp[i].e.ep = 0;
41*3e12c5d1SDavid du Colombier _relist[0][0].inst = _relist[1][0].inst = 0;
42*3e12c5d1SDavid du Colombier
43*3e12c5d1SDavid du Colombier /* Execute machine once for each character, including terminal NUL */
44*3e12c5d1SDavid du Colombier s = starts;
45*3e12c5d1SDavid du Colombier do{
46*3e12c5d1SDavid du Colombier /* fast check for first char */
47*3e12c5d1SDavid du Colombier r = *(unsigned char*)s;
48*3e12c5d1SDavid du Colombier if(checkstart && r != startchar){
49*3e12c5d1SDavid du Colombier s++;
50*3e12c5d1SDavid du Colombier continue;
51*3e12c5d1SDavid du Colombier }
52*3e12c5d1SDavid du Colombier
53*3e12c5d1SDavid du Colombier if(r < Runeself)
54*3e12c5d1SDavid du Colombier n = 1;
55*3e12c5d1SDavid du Colombier else {
56*3e12c5d1SDavid du Colombier n = mbtowc(&r, s, MB_CUR_MAX);
57*3e12c5d1SDavid du Colombier if (n <= 0)
58*3e12c5d1SDavid du Colombier n = 1;
59*3e12c5d1SDavid du Colombier }
60*3e12c5d1SDavid du Colombier
61*3e12c5d1SDavid du Colombier /* switch run lists */
62*3e12c5d1SDavid du Colombier tl = _relist[flag];
63*3e12c5d1SDavid du Colombier tle = _reliste[flag];
64*3e12c5d1SDavid du Colombier nl = _relist[flag^=1];
65*3e12c5d1SDavid du Colombier nle = _reliste[flag];
66*3e12c5d1SDavid du Colombier nl->inst = 0;
67*3e12c5d1SDavid du Colombier
68*3e12c5d1SDavid du Colombier /* Add first instruction to current list */
69*3e12c5d1SDavid du Colombier if(match == 0){
70*3e12c5d1SDavid du Colombier sempty.m[0].s.sp = s;
71*3e12c5d1SDavid du Colombier _renewthread(tl, progp->startinst, &sempty);
72*3e12c5d1SDavid du Colombier }
73*3e12c5d1SDavid du Colombier
74*3e12c5d1SDavid du Colombier /* Execute machine until current list is empty */
75*3e12c5d1SDavid du Colombier for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
76*3e12c5d1SDavid du Colombier if(s == eol)
77*3e12c5d1SDavid du Colombier break;
78*3e12c5d1SDavid du Colombier
79*3e12c5d1SDavid du Colombier for(inst = tlp->inst; ; inst = inst->l.next){
80*3e12c5d1SDavid du Colombier switch(inst->type){
81*3e12c5d1SDavid du Colombier case RUNE: /* regular character */
82*3e12c5d1SDavid du Colombier if(inst->r.r == r)
83*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
84*3e12c5d1SDavid du Colombier return -1;
85*3e12c5d1SDavid du Colombier break;
86*3e12c5d1SDavid du Colombier case LBRA:
87*3e12c5d1SDavid du Colombier tlp->se.m[inst->r.subid].s.sp = s;
88*3e12c5d1SDavid du Colombier continue;
89*3e12c5d1SDavid du Colombier case RBRA:
90*3e12c5d1SDavid du Colombier tlp->se.m[inst->r.subid].e.ep = s;
91*3e12c5d1SDavid du Colombier continue;
92*3e12c5d1SDavid du Colombier case ANY:
93*3e12c5d1SDavid du Colombier if(r != '\n')
94*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
95*3e12c5d1SDavid du Colombier return -1;
96*3e12c5d1SDavid du Colombier break;
97*3e12c5d1SDavid du Colombier case ANYNL:
98*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
99*3e12c5d1SDavid du Colombier return -1;
100*3e12c5d1SDavid du Colombier break;
101*3e12c5d1SDavid du Colombier case BOL:
102*3e12c5d1SDavid du Colombier if(s == bol || *(s-1) == '\n')
103*3e12c5d1SDavid du Colombier continue;
104*3e12c5d1SDavid du Colombier break;
105*3e12c5d1SDavid du Colombier case EOL:
106*3e12c5d1SDavid du Colombier if(r == 0 || r == '\n')
107*3e12c5d1SDavid du Colombier continue;
108*3e12c5d1SDavid du Colombier break;
109*3e12c5d1SDavid du Colombier case CCLASS:
110*3e12c5d1SDavid du Colombier ep = inst->r.cp->end;
111*3e12c5d1SDavid du Colombier for(rp = inst->r.cp->spans; rp < ep; rp += 2)
112*3e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1]){
113*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
114*3e12c5d1SDavid du Colombier return -1;
115*3e12c5d1SDavid du Colombier break;
116*3e12c5d1SDavid du Colombier }
117*3e12c5d1SDavid du Colombier break;
118*3e12c5d1SDavid du Colombier case NCCLASS:
119*3e12c5d1SDavid du Colombier ep = inst->r.cp->end;
120*3e12c5d1SDavid du Colombier for(rp = inst->r.cp->spans; rp < ep; rp += 2)
121*3e12c5d1SDavid du Colombier if(r >= rp[0] && r <= rp[1])
122*3e12c5d1SDavid du Colombier break;
123*3e12c5d1SDavid du Colombier if(rp == ep)
124*3e12c5d1SDavid du Colombier if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
125*3e12c5d1SDavid du Colombier return -1;
126*3e12c5d1SDavid du Colombier break;
127*3e12c5d1SDavid du Colombier case OR:
128*3e12c5d1SDavid du Colombier /* evaluate right choice later */
129*3e12c5d1SDavid du Colombier if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
130*3e12c5d1SDavid du Colombier return -1;
131*3e12c5d1SDavid du Colombier /* efficiency: advance and re-evaluate */
132*3e12c5d1SDavid du Colombier continue;
133*3e12c5d1SDavid du Colombier case END: /* Match! */
134*3e12c5d1SDavid du Colombier match = 1;
135*3e12c5d1SDavid du Colombier tlp->se.m[0].e.ep = s;
136*3e12c5d1SDavid du Colombier if(mp != 0)
137*3e12c5d1SDavid du Colombier _renewmatch(mp, ms, &tlp->se);
138*3e12c5d1SDavid du Colombier break;
139*3e12c5d1SDavid du Colombier }
140*3e12c5d1SDavid du Colombier break;
141*3e12c5d1SDavid du Colombier }
142*3e12c5d1SDavid du Colombier }
143*3e12c5d1SDavid du Colombier checkstart = startchar && nl->inst==0;
144*3e12c5d1SDavid du Colombier s += n;
145*3e12c5d1SDavid du Colombier }while(r);
146*3e12c5d1SDavid du Colombier return match;
147*3e12c5d1SDavid du Colombier }
148*3e12c5d1SDavid du Colombier
149*3e12c5d1SDavid du Colombier extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)150*3e12c5d1SDavid du Colombier regexec(Reprog *progp, /* program to run */
151*3e12c5d1SDavid du Colombier char *bol, /* string to run machine on */
152*3e12c5d1SDavid du Colombier Resub *mp, /* subexpression elements */
153*3e12c5d1SDavid du Colombier int ms) /* number of elements at mp */
154*3e12c5d1SDavid du Colombier {
155*3e12c5d1SDavid du Colombier char *starts; /* where to start match */
156*3e12c5d1SDavid du Colombier char *eol; /* where to end match */
157*3e12c5d1SDavid du Colombier wchar_t startchar;
158*3e12c5d1SDavid du Colombier int rv;
159*3e12c5d1SDavid du Colombier
160*3e12c5d1SDavid du Colombier /*
161*3e12c5d1SDavid du Colombier * use user-specified starting/ending location if specified
162*3e12c5d1SDavid du Colombier */
163*3e12c5d1SDavid du Colombier starts = bol;
164*3e12c5d1SDavid du Colombier eol = 0;
165*3e12c5d1SDavid du Colombier if(mp && ms>0){
166*3e12c5d1SDavid du Colombier if(mp->s.sp)
167*3e12c5d1SDavid du Colombier starts = mp->s.sp;
168*3e12c5d1SDavid du Colombier if(mp->e.ep)
169*3e12c5d1SDavid du Colombier eol = mp->e.ep;
170*3e12c5d1SDavid du Colombier }
171*3e12c5d1SDavid du Colombier startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
172*3e12c5d1SDavid du Colombier ? progp->startinst->r.r : 0;
173*3e12c5d1SDavid du Colombier
174*3e12c5d1SDavid du Colombier /* keep trying till we have enough list space to terminate */
175*3e12c5d1SDavid du Colombier for(;;){
176*3e12c5d1SDavid du Colombier if(_relist[0] == 0){
177*3e12c5d1SDavid du Colombier _relist[0] = malloc(2*_relistsize*sizeof(Relist));
178*3e12c5d1SDavid du Colombier _relist[1] = _relist[0] + _relistsize;
179*3e12c5d1SDavid du Colombier _reliste[0] = _relist[0] + _relistsize - 1;
180*3e12c5d1SDavid du Colombier _reliste[1] = _relist[1] + _relistsize - 1;
181*3e12c5d1SDavid du Colombier if(_relist[0] == 0)
182*3e12c5d1SDavid du Colombier regerror("_relist overflow");
183*3e12c5d1SDavid du Colombier }
184*3e12c5d1SDavid du Colombier rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
185*3e12c5d1SDavid du Colombier if(rv >= 0)
186*3e12c5d1SDavid du Colombier return rv;
187*3e12c5d1SDavid du Colombier free(_relist[0]);
188*3e12c5d1SDavid du Colombier _relist[0] = 0;
189*3e12c5d1SDavid du Colombier _relistsize += LISTINCREMENT;
190*3e12c5d1SDavid du Colombier }
191*3e12c5d1SDavid du Colombier }
192