xref: /plan9-contrib/sys/src/libregexp/rregexec.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 /*
7  *  return	0 if no match
8  *		>0 if a match
9  *		<0 if we ran out of _relist space
10  */
11 static int
12 rregexec1(Reprog *progp,	/* program to run */
13 	Rune *bol,		/* string to run machine on */
14 	Resub *mp,		/* subexpression elements */
15 	int ms,			/* number of elements at mp */
16 	Reljunk *j)
17 {
18 	int flag=0;
19 	Reinst *inst;
20 	Relist *tlp;
21 	Rune *s;
22 	int i, checkstart;
23 	Rune r, *rp, *ep;
24 	Relist* tl;		/* This list, next list */
25 	Relist* nl;
26 	Relist* tle;		/* ends of this and next list */
27 	Relist* nle;
28 	int match;
29 
30 	match = 0;
31 	checkstart = j->startchar;
32 	if(mp)
33 		for(i=0; i<ms; i++) {
34 			mp[i].rsp = 0;
35 			mp[i].rep = 0;
36 		}
37 	j->relist[0][0].inst = 0;
38 	j->relist[1][0].inst = 0;
39 
40 	/* Execute machine once for each character, including terminal NUL */
41 	s = j->rstarts;
42 	do{
43 
44 		/* fast check for first char */
45 		if(checkstart) {
46 			switch(j->starttype) {
47 			case RUNE:
48 				while(*s != j->startchar) {
49 					if(*s == 0)
50 						return match;
51 					s++;
52 				}
53 				break;
54 			case BOL:
55 				if(s == bol)
56 					break;
57 				while(*s != '\n') {
58 					if(*s == 0)
59 						return match;
60 					s++;
61 				}
62 				break;
63 			}
64 		}
65 
66 		r = *s;
67 
68 		/* switch run lists */
69 		tl = j->relist[flag];
70 		tle = j->reliste[flag];
71 		nl = j->relist[flag^=1];
72 		nle = j->reliste[flag];
73 		nl->inst = 0;
74 
75 		/* Add first instruction to current list */
76 		_rrenewemptythread(tl, progp->startinst, s);
77 
78 		/* Execute machine until current list is empty */
79 		for(tlp=tl; tlp->inst; tlp++){
80 			if(s == j->reol)
81 				break;
82 
83 			for(inst=tlp->inst; ; inst = inst->next){
84 				switch(inst->type){
85 				case RUNE:	/* regular character */
86 					if(inst->r == r)
87 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
88 							return -1;
89 					break;
90 				case LBRA:
91 					tlp->se.m[inst->subid].rsp = s;
92 					continue;
93 				case RBRA:
94 					tlp->se.m[inst->subid].rep = s;
95 					continue;
96 				case ANY:
97 					if(r != '\n')
98 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
99 							return -1;
100 					break;
101 				case ANYNL:
102 					if(_renewthread(nl, inst->next, &tlp->se)==nle)
103 							return -1;
104 					break;
105 				case BOL:
106 					if(s == bol || *(s-1) == '\n')
107 						continue;
108 					break;
109 				case EOL:
110 					if(r == 0 || r == '\n')
111 						continue;
112 					break;
113 				case CCLASS:
114 					ep = inst->cp->end;
115 					for(rp = inst->cp->spans; rp < ep; rp += 2)
116 						if(r >= rp[0] && r <= rp[1]){
117 							if(_renewthread(nl, inst->next, &tlp->se)==nle)
118 								return -1;
119 							break;
120 						}
121 					break;
122 				case NCCLASS:
123 					ep = inst->cp->end;
124 					for(rp = inst->cp->spans; rp < ep; rp += 2)
125 						if(r >= rp[0] && r <= rp[1])
126 							break;
127 					if(rp == ep)
128 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
129 							return -1;
130 					break;
131 				case OR:
132 					/* evaluate right choice later */
133 					if(_renewthread(tlp, inst->right, &tlp->se) == tle)
134 						return -1;
135 					/* efficiency: advance and re-evaluate */
136 					continue;
137 				case END:	/* Match! */
138 					match = 1;
139 					tlp->se.m[0].rep = s;
140 					if(mp != 0)
141 						_renewmatch(mp, ms, &tlp->se);
142 					break;
143 				}
144 				break;
145 			}
146 		}
147 		checkstart = j->startchar && nl->inst==0;
148 		s++;
149 	}while(r);
150 	return match;
151 }
152 
153 static int
154 rregexec2(Reprog *progp,	/* program to run */
155 	Rune *bol,	/* string to run machine on */
156 	Resub *mp,	/* subexpression elements */
157 	int ms,		/* number of elements at mp */
158 	Reljunk *j
159 )
160 {
161 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
162 
163 	/* mark space */
164 	j->relist[0] = relist0;
165 	j->relist[1] = relist1;
166 	j->reliste[0] = relist0 + nelem(relist0) - 2;
167 	j->reliste[1] = relist1 + nelem(relist1) - 2;
168 
169 	return rregexec1(progp, bol, mp, ms, j);
170 }
171 
172 extern int
173 rregexec(Reprog *progp,	/* program to run */
174 	Rune *bol,	/* string to run machine on */
175 	Resub *mp,	/* subexpression elements */
176 	int ms)		/* number of elements at mp */
177 {
178 	Reljunk j;
179 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
180 	int rv;
181 
182 	/*
183  	 *  use user-specified starting/ending location if specified
184 	 */
185 	j.rstarts = bol;
186 	j.reol = 0;
187 	if(mp && ms>0){
188 		if(mp->sp)
189 			j.rstarts = mp->rsp;
190 		if(mp->ep)
191 			j.reol = mp->rep;
192 	}
193 	j.starttype = 0;
194 	j.startchar = 0;
195 	if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
196 		j.starttype = RUNE;
197 		j.startchar = progp->startinst->r;
198 	}
199 	if(progp->startinst->type == BOL)
200 		j.starttype = BOL;
201 
202 	/* mark space */
203 	j.relist[0] = relist0;
204 	j.relist[1] = relist1;
205 	j.reliste[0] = relist0 + nelem(relist0) - 2;
206 	j.reliste[1] = relist1 + nelem(relist1) - 2;
207 
208 	rv = rregexec1(progp, bol, mp, ms, &j);
209 	if(rv >= 0)
210 		return rv;
211 	rv = rregexec2(progp, bol, mp, ms, &j);
212 	if(rv >= 0)
213 		return rv;
214 	return -1;
215 }
216