xref: /plan9/sys/src/ape/lib/regexp/regexec.c (revision 3e12c5d1bb89fc02707907988834ef147769ddaf)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 static Resublist sempty;		/* empty set of matches */
7 
8 /*
9  *  return	0 if no match
10  *		>0 if a match
11  *		<0 if we ran out of _relist space
12  */
13 static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,char * starts,char * eol,wchar_t startchar)14 regexec1(Reprog *progp,	/* program to run */
15 	char *bol,	/* string to run machine on */
16 	Resub *mp,	/* subexpression elements */
17 	int ms,		/* number of elements at mp */
18 	char *starts,
19 	char *eol,
20 	wchar_t startchar)
21 {
22 	int flag=0;
23 	Reinst *inst;
24 	Relist *tlp;
25 	char *s;
26 	int i, checkstart;
27 	wchar_t r, *rp, *ep;
28 	int n;
29 	Relist* tl;		/* This list, next list */
30 	Relist* nl;
31 	Relist* tle;		/* ends of this and next list */
32 	Relist* nle;
33 	int match;
34 
35 	match = 0;
36 	checkstart = startchar;
37 	sempty.m[0].s.sp = 0;
38 	if(mp!=0)
39 		for(i=0; i<ms; i++)
40 			mp[i].s.sp = mp[i].e.ep = 0;
41 	_relist[0][0].inst = _relist[1][0].inst = 0;
42 
43 	/* Execute machine once for each character, including terminal NUL */
44 	s = starts;
45 	do{
46 		/* fast check for first char */
47 		r = *(unsigned char*)s;
48 		if(checkstart && r != startchar){
49 			s++;
50 			continue;
51 		}
52 
53 		if(r < Runeself)
54 			n = 1;
55 		else {
56 			n = mbtowc(&r, s, MB_CUR_MAX);
57 			if (n <= 0)
58 				n = 1;
59 		}
60 
61 		/* switch run lists */
62 		tl = _relist[flag];
63 		tle = _reliste[flag];
64 		nl = _relist[flag^=1];
65 		nle = _reliste[flag];
66 		nl->inst = 0;
67 
68 		/* Add first instruction to current list */
69 		if(match == 0){
70 			sempty.m[0].s.sp = s;
71 			_renewthread(tl, progp->startinst, &sempty);
72 		}
73 
74 		/* Execute machine until current list is empty */
75 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
76 			if(s == eol)
77 				break;
78 
79 			for(inst = tlp->inst; ; inst = inst->l.next){
80 				switch(inst->type){
81 				case RUNE:	/* regular character */
82 					if(inst->r.r == r)
83 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
84 							return -1;
85 					break;
86 				case LBRA:
87 					tlp->se.m[inst->r.subid].s.sp = s;
88 					continue;
89 				case RBRA:
90 					tlp->se.m[inst->r.subid].e.ep = s;
91 					continue;
92 				case ANY:
93 					if(r != '\n')
94 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
95 							return -1;
96 					break;
97 				case ANYNL:
98 					if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
99 						return -1;
100 					break;
101 				case BOL:
102 					if(s == bol || *(s-1) == '\n')
103 						continue;
104 					break;
105 				case EOL:
106 					if(r == 0 || r == '\n')
107 						continue;
108 					break;
109 				case CCLASS:
110 					ep = inst->r.cp->end;
111 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
112 						if(r >= rp[0] && r <= rp[1]){
113 							if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
114 								return -1;
115 							break;
116 						}
117 					break;
118 				case NCCLASS:
119 					ep = inst->r.cp->end;
120 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
121 						if(r >= rp[0] && r <= rp[1])
122 							break;
123 					if(rp == ep)
124 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
125 							return -1;
126 					break;
127 				case OR:
128 					/* evaluate right choice later */
129 					if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
130 						return -1;
131 					/* efficiency: advance and re-evaluate */
132 					continue;
133 				case END:	/* Match! */
134 					match = 1;
135 					tlp->se.m[0].e.ep = s;
136 					if(mp != 0)
137 						_renewmatch(mp, ms, &tlp->se);
138 					break;
139 				}
140 				break;
141 			}
142 		}
143 		checkstart = startchar && nl->inst==0;
144 		s += n;
145 	}while(r);
146 	return match;
147 }
148 
149 extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)150 regexec(Reprog *progp,	/* program to run */
151 	char *bol,	/* string to run machine on */
152 	Resub *mp,	/* subexpression elements */
153 	int ms)		/* number of elements at mp */
154 {
155 	char *starts;	/* where to start match */
156 	char *eol;	/* where to end match */
157 	wchar_t startchar;
158 	int rv;
159 
160 	/*
161  	 *  use user-specified starting/ending location if specified
162 	 */
163 	starts = bol;
164 	eol = 0;
165 	if(mp && ms>0){
166 		if(mp->s.sp)
167 			starts = mp->s.sp;
168 		if(mp->e.ep)
169 			eol = mp->e.ep;
170 	}
171 	startchar = (progp->startinst->type == RUNE && progp->startinst->r.r < Runeself)
172 		? progp->startinst->r.r : 0;
173 
174 	/* keep trying till we have enough list space to terminate */
175 	for(;;){
176 		if(_relist[0] == 0){
177 			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
178 			_relist[1] = _relist[0] + _relistsize;
179 			_reliste[0] = _relist[0] + _relistsize - 1;
180 			_reliste[1] = _relist[1] + _relistsize - 1;
181 			if(_relist[0] == 0)
182 				regerror("_relist overflow");
183 		}
184 		rv = regexec1(progp, bol, mp, ms, starts, eol, startchar);
185 		if(rv >= 0)
186 			return rv;
187 		free(_relist[0]);
188 		_relist[0] = 0;
189 		_relistsize += LISTINCREMENT;
190 	}
191 }
192