xref: /plan9/sys/src/libregexp/rregexec.c (revision e34dbc32159cf6b992e3c875383330eae3abc0be)
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 /*
7  *  return	0 if no match
8  *		>0 if a match
9  *		<0 if we ran out of _relist space
10  */
11 static int
12 rregexec1(Reprog *progp,	/* program to run */
13 	Rune *bol,		/* string to run machine on */
14 	Resub *mp,		/* subexpression elements */
15 	int ms,			/* number of elements at mp */
16 	Reljunk *j)
17 {
18 	int flag=0;
19 	Reinst *inst;
20 	Relist *tlp;
21 	Rune *s;
22 	int i, checkstart;
23 	Rune r, *rp, *ep;
24 	Relist* tl;		/* This list, next list */
25 	Relist* nl;
26 	Relist* tle;		/* ends of this and next list */
27 	Relist* nle;
28 	int match;
29 
30 	match = 0;
31 	checkstart = j->startchar;
32 	if(mp)
33 		for(i=0; i<ms; i++) {
34 			mp[i].rsp = 0;
35 			mp[i].rep = 0;
36 		}
37 	j->relist[0][0].inst = 0;
38 	j->relist[1][0].inst = 0;
39 
40 	/* Execute machine once for each character, including terminal NUL */
41 	s = j->rstarts;
42 	do{
43 
44 		/* fast check for first char */
45 		if(checkstart) {
46 			switch(j->starttype) {
47 			case RUNE:
48 				while(*s != j->startchar) {
49 					if(*s == 0 || s == j->reol)
50 						return match;
51 					s++;
52 				}
53 				break;
54 			case BOL:
55 				if(s == bol)
56 					break;
57 				while(*s != '\n') {
58 					if(*s == 0 || s == j->reol)
59 						return match;
60 					s++;
61 				}
62 				break;
63 			}
64 		}
65 
66 		r = *s;
67 
68 		/* switch run lists */
69 		tl = j->relist[flag];
70 		tle = j->reliste[flag];
71 		nl = j->relist[flag^=1];
72 		nle = j->reliste[flag];
73 		nl->inst = 0;
74 
75 		/* Add first instruction to current list */
76 		_rrenewemptythread(tl, progp->startinst, ms, s);
77 
78 		/* Execute machine until current list is empty */
79 		for(tlp=tl; tlp->inst; tlp++){
80 			for(inst=tlp->inst; ; inst = inst->next){
81 				switch(inst->type){
82 				case RUNE:	/* regular character */
83 					if(inst->r == r)
84 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
85 							return -1;
86 					break;
87 				case LBRA:
88 					tlp->se.m[inst->subid].rsp = s;
89 					continue;
90 				case RBRA:
91 					tlp->se.m[inst->subid].rep = s;
92 					continue;
93 				case ANY:
94 					if(r != '\n')
95 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
96 							return -1;
97 					break;
98 				case ANYNL:
99 					if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
100 							return -1;
101 					break;
102 				case BOL:
103 					if(s == bol || *(s-1) == '\n')
104 						continue;
105 					break;
106 				case EOL:
107 					if(s == j->reol || r == 0 || r == '\n')
108 						continue;
109 					break;
110 				case CCLASS:
111 					ep = inst->cp->end;
112 					for(rp = inst->cp->spans; rp < ep; rp += 2)
113 						if(r >= rp[0] && r <= rp[1]){
114 							if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
115 								return -1;
116 							break;
117 						}
118 					break;
119 				case NCCLASS:
120 					ep = inst->cp->end;
121 					for(rp = inst->cp->spans; rp < ep; rp += 2)
122 						if(r >= rp[0] && r <= rp[1])
123 							break;
124 					if(rp == ep)
125 						if(_renewthread(nl, inst->next, ms, &tlp->se)==nle)
126 							return -1;
127 					break;
128 				case OR:
129 					/* evaluate right choice later */
130 					if(_renewthread(tlp, inst->right, ms, &tlp->se) == tle)
131 						return -1;
132 					/* efficiency: advance and re-evaluate */
133 					continue;
134 				case END:	/* Match! */
135 					match = 1;
136 					tlp->se.m[0].rep = s;
137 					if(mp != 0)
138 						_renewmatch(mp, ms, &tlp->se);
139 					break;
140 				}
141 				break;
142 			}
143 		}
144 		if(s == j->reol)
145 			break;
146 		checkstart = j->startchar && nl->inst==0;
147 		s++;
148 	}while(r);
149 	return match;
150 }
151 
152 static int
153 rregexec2(Reprog *progp,	/* program to run */
154 	Rune *bol,	/* string to run machine on */
155 	Resub *mp,	/* subexpression elements */
156 	int ms,		/* number of elements at mp */
157 	Reljunk *j
158 )
159 {
160 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
161 
162 	/* mark space */
163 	j->relist[0] = relist0;
164 	j->relist[1] = relist1;
165 	j->reliste[0] = relist0 + nelem(relist0) - 2;
166 	j->reliste[1] = relist1 + nelem(relist1) - 2;
167 
168 	return rregexec1(progp, bol, mp, ms, j);
169 }
170 
171 extern int
172 rregexec(Reprog *progp,	/* program to run */
173 	Rune *bol,	/* string to run machine on */
174 	Resub *mp,	/* subexpression elements */
175 	int ms)		/* number of elements at mp */
176 {
177 	Reljunk j;
178 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
179 	int rv;
180 
181 	/*
182  	 *  use user-specified starting/ending location if specified
183 	 */
184 	j.rstarts = bol;
185 	j.reol = 0;
186 	if(mp && ms>0){
187 		if(mp->sp)
188 			j.rstarts = mp->rsp;
189 		if(mp->ep)
190 			j.reol = mp->rep;
191 	}
192 	j.starttype = 0;
193 	j.startchar = 0;
194 	if(progp->startinst->type == RUNE && progp->startinst->r < Runeself) {
195 		j.starttype = RUNE;
196 		j.startchar = progp->startinst->r;
197 	}
198 	if(progp->startinst->type == BOL)
199 		j.starttype = BOL;
200 
201 	/* mark space */
202 	j.relist[0] = relist0;
203 	j.relist[1] = relist1;
204 	j.reliste[0] = relist0 + nelem(relist0) - 2;
205 	j.reliste[1] = relist1 + nelem(relist1) - 2;
206 
207 	rv = rregexec1(progp, bol, mp, ms, &j);
208 	if(rv >= 0)
209 		return rv;
210 	rv = rregexec2(progp, bol, mp, ms, &j);
211 	if(rv >= 0)
212 		return rv;
213 	return -1;
214 }
215