xref: /plan9/sys/src/libregexp/regexec.c (revision 3e12c5d1bb89fc02707907988834ef147769ddaf)
1 #include <u.h>
2 #include <libc.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 static Resublist sempty;		/* empty set of matches */
7 
8 /*
9  *  return	0 if no match
10  *		>0 if a match
11  *		<0 if we ran out of _relist space
12  */
13 static int
14 regexec1(Reprog *progp,	/* program to run */
15 	char *bol,	/* string to run machine on */
16 	Resub *mp,	/* subexpression elements */
17 	int ms,		/* number of elements at mp */
18 	char *starts,
19 	char *eol,
20 	Rune startchar)
21 {
22 	int flag=0;
23 	Reinst *inst;
24 	Relist *tlp;
25 	char *s;
26 	int i, checkstart;
27 	Rune r, *rp, *ep;
28 	int n;
29 	Relist* tl;		/* This list, next list */
30 	Relist* nl;
31 	Relist* tle;		/* ends of this and next list */
32 	Relist* nle;
33 	int match;
34 	char *p;
35 
36 	match = 0;
37 	checkstart = startchar;
38 	sempty.m[0].sp = 0;
39 	if(mp!=0)
40 		for(i=0; i<ms; i++)
41 			mp[i].sp = mp[i].ep = 0;
42 	_relist[0][0].inst = _relist[1][0].inst = 0;
43 
44 	/* Execute machine once for each character, including terminal NUL */
45 	s = starts;
46 	do{
47 		/* fast check for first char */
48 		if(checkstart){
49 			p = utfrune(s, startchar);
50 			if(p == 0)
51 				return match;
52 			s = p;
53 		}
54 		r = *(uchar*)s;
55 		if(r < Runeself)
56 			n = 1;
57 		else
58 			n = chartorune(&r, s);
59 
60 		/* switch run lists */
61 		tl = _relist[flag];
62 		tle = _reliste[flag];
63 		nl = _relist[flag^=1];
64 		nle = _reliste[flag];
65 		nl->inst = 0;
66 
67 		/* Add first instruction to current list */
68 		if(match == 0){
69 			sempty.m[0].sp = s;
70 			_renewthread(tl, progp->startinst, &sempty);
71 		}
72 
73 		/* Execute machine until current list is empty */
74 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
75 			if(s == eol)
76 				break;
77 
78 			for(inst = tlp->inst; ; inst = inst->next){
79 				switch(inst->type){
80 				case RUNE:	/* regular character */
81 					if(inst->r == r)
82 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
83 							return -1;
84 					break;
85 				case LBRA:
86 					tlp->se.m[inst->subid].sp = s;
87 					continue;
88 				case RBRA:
89 					tlp->se.m[inst->subid].ep = s;
90 					continue;
91 				case ANY:
92 					if(r != '\n')
93 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
94 							return -1;
95 					break;
96 				case ANYNL:
97 					if(_renewthread(nl, inst->next, &tlp->se)==nle)
98 							return -1;
99 					break;
100 				case BOL:
101 					if(s == bol || *(s-1) == '\n')
102 						continue;
103 					break;
104 				case EOL:
105 					if(r == 0 || r == '\n')
106 						continue;
107 					break;
108 				case CCLASS:
109 					ep = inst->cp->end;
110 					for(rp = inst->cp->spans; rp < ep; rp += 2)
111 						if(r >= rp[0] && r <= rp[1]){
112 							if(_renewthread(nl, inst->next, &tlp->se)==nle)
113 								return -1;
114 							break;
115 						}
116 					break;
117 				case NCCLASS:
118 					ep = inst->cp->end;
119 					for(rp = inst->cp->spans; rp < ep; rp += 2)
120 						if(r >= rp[0] && r <= rp[1])
121 							break;
122 					if(rp == ep)
123 						if(_renewthread(nl, inst->next, &tlp->se)==nle)
124 							return -1;
125 					break;
126 				case OR:
127 					/* evaluate right choice later */
128 					if(_renewthread(tlp, inst->right, &tlp->se) == tle)
129 						return -1;
130 					/* efficiency: advance and re-evaluate */
131 					continue;
132 				case END:	/* Match! */
133 					match = 1;
134 					tlp->se.m[0].ep = s;
135 					if(mp != 0)
136 						_renewmatch(mp, ms, &tlp->se);
137 					break;
138 				}
139 				break;
140 			}
141 		}
142 		checkstart = startchar && nl->inst==0;
143 		s += n;
144 	}while(r);
145 	return match;
146 }
147 
148 extern int
149 regexec(Reprog *progp,	/* program to run */
150 	char *bol,	/* string to run machine on */
151 	Resub *mp,	/* subexpression elements */
152 	int ms)		/* number of elements at mp */
153 {
154 	char *starts;	/* where to start match */
155 	char *eol;	/* where to end match */
156 	Rune startchar;
157 	int rv;
158 
159 	/*
160  	 *  use user-specified starting/ending location if specified
161 	 */
162 	starts = bol;
163 	eol = 0;
164 	if(mp && ms>0){
165 		if(mp->sp)
166 			starts = mp->sp;
167 		if(mp->ep)
168 			eol = mp->ep;
169 	}
170 	startchar = (progp->startinst->type == RUNE && progp->startinst->r < Runeself)
171 		? progp->startinst->r : 0;
172 
173 	/* keep trying till we have enough list space to terminate */
174 	for(;;){
175 		if(_relist[0] == 0){
176 			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
177 			_relist[1] = _relist[0] + _relistsize;
178 			_reliste[0] = _relist[0] + _relistsize - 1;
179 			_reliste[1] = _relist[1] + _relistsize - 1;
180 			if(_relist[0] == 0)
181 				regerror("_relist overflow");
182 		}
183 		rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
184 		if(rv >= 0)
185 			return rv;
186 		free(_relist[0]);
187 		_relist[0] = 0;
188 		_relistsize += LISTINCREMENT;
189 	}
190 	return -1;
191 }
192