xref: /plan9/sys/src/libregexp/rregexec.c (revision e7f22645def4e2dcbe46c9676efd757ebbfcef69)
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 /*
7  *  return	0 if no match
8  *		>0 if a match
9  *		<0 if we ran out of _relist space
10  */
11 static int
rregexec1(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)12 rregexec1(Reprog *progp,	/* program to run */
13 	Rune *bol,		/* string to run machine on */
14 	Resub *mp,		/* subexpression elements */
15 	int ms,			/* number of elements at mp */
16 	Reljunk *j)
17 {
18 	int flag=0;
19 	Reinst *inst;
20 	Relist *tlp;
21 	Rune *s;
22 	int i, checkstart;
23 	Rune r, *rp, *ep;
24 	Relist* tl;		/* This list, next list */
25 	Relist* nl;
26 	Relist* tle;		/* ends of this and next list */
27 	Relist* nle;
28 	int match;
29 	Rune *p;
30 
31 	match = 0;
32 	checkstart = j->startchar;
33 	if(mp)
34 		for(i=0; i<ms; i++) {
35 			mp[i].rsp = 0;
36 			mp[i].rep = 0;
37 		}
38 	j->relist[0][0].inst = 0;
39 	j->relist[1][0].inst = 0;
40 
41 	/* Execute machine once for each character, including terminal NUL */
42 	s = j->rstarts;
43 	do{
44 		/* fast check for first char */
45 		if(checkstart) {
46 			switch(j->starttype) {
47 			case RUNE:
48 				p = runestrchr(s, j->startchar);
49 				if(p == 0 || s == j->reol)
50 					return match;
51 				s = p;
52 				break;
53 			case BOL:
54 				if(s == bol)
55 					break;
56 				p = runestrchr(s, '\n');
57 				if(p == 0 || s == j->reol)
58 					return match;
59 				s = p+1;
60 				break;
61 			}
62 		}
63 
64 		r = *s;
65 
66 		/* switch run lists */
67 		tl = j->relist[flag];
68 		tle = j->reliste[flag];
69 		nl = j->relist[flag^=1];
70 		nle = j->reliste[flag];
71 		nl->inst = 0;
72 
73 		/* Add first instruction to current list */
74 		_rrenewemptythread(tl, progp->startinst, ms, s);
75 
76 		/* Execute machine until current list is empty */
77 		for(tlp=tl; tlp->inst; tlp++){
78 			for(inst=tlp->inst; ; inst = inst->next){
79 				switch(inst->type){
80 				case RUNE:	/* regular character */
81 					if(inst->r == r)
82 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
83 							return -1;
84 					break;
85 				case LBRA:
86 					tlp->se.m[inst->subid].rsp = s;
87 					continue;
88 				case RBRA:
89 					tlp->se.m[inst->subid].rep = s;
90 					continue;
91 				case ANY:
92 					if(r != '\n')
93 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
94 							return -1;
95 					break;
96 				case ANYNL:
97 					if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
98 							return -1;
99 					break;
100 				case BOL:
101 					if(s == bol || *(s-1) == '\n')
102 						continue;
103 					break;
104 				case EOL:
105 					if(s == j->reol || r == 0 || r == '\n')
106 						continue;
107 					break;
108 				case CCLASS:
109 					ep = inst->cp->end;
110 					for(rp = inst->cp->spans; rp < ep; rp += 2)
111 						if(r >= rp[0] && r <= rp[1]){
112 							if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
113 								return -1;
114 							break;
115 						}
116 					break;
117 				case NCCLASS:
118 					ep = inst->cp->end;
119 					for(rp = inst->cp->spans; rp < ep; rp += 2)
120 						if(r >= rp[0] && r <= rp[1])
121 							break;
122 					if(rp == ep)
123 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
124 							return -1;
125 					break;
126 				case OR:
127 					/* evaluate right choice later */
128 					if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
129 						return -1;
130 					/* efficiency: advance and re-evaluate */
131 					continue;
132 				case END:	/* Match! */
133 					match = 1;
134 					tlp->se.m[0].rep = s;
135 					if(mp != 0)
136 						_renewmatch(mp, ms, &tlp->se);
137 					break;
138 				}
139 				break;
140 			}
141 		}
142 		if(s == j->reol)
143 			break;
144 		checkstart = j->startchar && nl->inst==0;
145 		s++;
146 	}while(r);
147 	return match;
148 }
149 
150 static int
rregexec2(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)151 rregexec2(Reprog *progp,	/* program to run */
152 	Rune *bol,	/* string to run machine on */
153 	Resub *mp,	/* subexpression elements */
154 	int ms,		/* number of elements at mp */
155 	Reljunk *j
156 )
157 {
158 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
159 
160 	/* mark space */
161 	j->relist[0] = relist0;
162 	j->relist[1] = relist1;
163 	j->reliste[0] = relist0 + nelem(relist0) - 2;
164 	j->reliste[1] = relist1 + nelem(relist1) - 2;
165 
166 	return rregexec1(progp, bol, mp, ms, j);
167 }
168 
169 extern int
rregexec(Reprog * progp,Rune * bol,Resub * mp,int ms)170 rregexec(Reprog *progp,	/* program to run */
171 	Rune *bol,	/* string to run machine on */
172 	Resub *mp,	/* subexpression elements */
173 	int ms)		/* number of elements at mp */
174 {
175 	Reljunk j;
176 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
177 	int rv;
178 
179 	/*
180  	 *  use user-specified starting/ending location if specified
181 	 */
182 	j.rstarts = bol;
183 	j.reol = 0;
184 	if(mp && ms>0){
185 		if(mp->sp)
186 			j.rstarts = mp->rsp;
187 		if(mp->ep)
188 			j.reol = mp->rep;
189 	}
190 	j.starttype = 0;
191 	j.startchar = 0;
192 	if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
193 		j.starttype = RUNE;
194 		j.startchar = progp->startinst->r;
195 	}
196 	if(progp->startinst->type == BOL)
197 		j.starttype = BOL;
198 
199 	/* mark space */
200 	j.relist[0] = relist0;
201 	j.relist[1] = relist1;
202 	j.reliste[0] = relist0 + nelem(relist0) - 2;
203 	j.reliste[1] = relist1 + nelem(relist1) - 2;
204 
205 	rv = rregexec1(progp, bol, mp, ms, &j);
206 	if(rv >= 0)
207 		return rv;
208 	rv = rregexec2(progp, bol, mp, ms, &j);
209 	if(rv >= 0)
210 		return rv;
211 	return -1;
212 }
213