xref: /plan9/sys/src/ape/lib/regexp/rregexec.c (revision 781103c4074deb8af160e8a0da2742ba6b29dc2b)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 static Resublist sempty;		/* empty set of matches */
7 
8 /*
9  *  return	0 if no match
10  *		>0 if a match
11  *		<0 if we ran out of _relist space
12  */
13 static int
rregexec1(Reprog * progp,wchar_t * bol,Resub * mp,int ms,wchar_t * starts,wchar_t * eol,wchar_t startchar)14 rregexec1(Reprog *progp,	/* program to run */
15 	wchar_t *bol,		/* string to run machine on */
16 	Resub *mp,		/* subexpression elements */
17 	int ms,			/* number of elements at mp */
18 	wchar_t *starts,
19 	wchar_t *eol,
20 	wchar_t startchar)
21 {
22 	int flag=0;
23 	Reinst *inst;
24 	Relist *tlp;
25 	wchar_t *s;
26 	int i, checkstart;
27 	wchar_t r, *rp, *ep;
28 	Relist* tl;		/* This list, next list */
29 	Relist* nl;
30 	Relist* tle;		/* ends of this and next list */
31 	Relist* nle;
32 	int match;
33 
34 	match = 0;
35 	checkstart = startchar;
36 	sempty.m[0].s.rsp = 0;
37 	if(mp!=0)
38 		for(i=0; i<ms; i++)
39 			mp[i].s.rsp = mp[i].e.rep = 0;
40 	_relist[0][0].inst = _relist[1][0].inst = 0;
41 
42 	/* Execute machine once for each character, including terminal NUL */
43 	s = starts;
44 	do{
45 		r = *s;
46 
47 		/* fast check for first char */
48 		if(checkstart && r!=startchar){
49 			s++;
50 			continue;
51 		}
52 
53 		/* switch run lists */
54 		tl = _relist[flag];
55 		tle = _reliste[flag];
56 		nl = _relist[flag^=1];
57 		nle = _reliste[flag];
58 		nl->inst = 0;
59 
60 		/* Add first instruction to current list */
61 		sempty.m[0].s.rsp = s;
62 		_renewthread(tl, progp->startinst, &sempty);
63 
64 		/* Execute machine until current list is empty */
65 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
66 			if(s == eol)
67 				break;
68 
69 			for(inst=tlp->inst; ; inst = inst->l.next){
70 				switch(inst->type){
71 				case RUNE:	/* regular character */
72 					if(inst->r.r == r)
73 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
74 							return -1;
75 					break;
76 				case LBRA:
77 					tlp->se.m[inst->r.subid].s.rsp = s;
78 					continue;
79 				case RBRA:
80 					tlp->se.m[inst->r.subid].e.rep = s;
81 					continue;
82 				case ANY:
83 					if(r != '\n')
84 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
85 							return -1;
86 					break;
87 				case ANYNL:
88 					if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
89 							return -1;
90 					break;
91 				case BOL:
92 					if(s == bol || *(s-1) == '\n')
93 						continue;
94 					break;
95 				case EOL:
96 					if(r == 0 || r == '\n')
97 						continue;
98 					break;
99 				case CCLASS:
100 					ep = inst->r.cp->end;
101 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
102 						if(r >= rp[0] && r <= rp[1]){
103 							if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
104 								return -1;
105 							break;
106 						}
107 					break;
108 				case NCCLASS:
109 					ep = inst->r.cp->end;
110 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
111 						if(r >= rp[0] && r <= rp[1])
112 							break;
113 					if(rp == ep)
114 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
115 							return -1;
116 					break;
117 				case OR:
118 					/* evaluate right choice later */
119 					if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
120 						return -1;
121 					/* efficiency: advance and re-evaluate */
122 					continue;
123 				case END:	/* Match! */
124 					match = 1;
125 					tlp->se.m[0].e.rep = s;
126 					if(mp != 0)
127 						_renewmatch(mp, ms, &tlp->se);
128 					break;
129 				}
130 				break;
131 			}
132 		}
133 		checkstart = startchar && nl->inst==0;
134 		s++;
135 	}while(r);
136 	return match;
137 }
138 
139 extern int
rregexec(Reprog * progp,wchar_t * bol,Resub * mp,int ms)140 rregexec(Reprog *progp,	/* program to run */
141 	wchar_t *bol,	/* string to run machine on */
142 	Resub *mp,	/* subexpression elements */
143 	int ms)		/* number of elements at mp */
144 {
145 	wchar_t *starts;	/* where to start match */
146 	wchar_t *eol;	/* where to end match */
147 	wchar_t startchar;
148 	int rv;
149 
150 	/*
151  	 *  use user-specified starting/ending location if specified
152 	 */
153 	starts = bol;
154 	eol = 0;
155 	if(mp && ms>0){
156 		if(mp->s.rsp)
157 			starts = mp->s.rsp;
158 		if(mp->e.rep)
159 			eol = mp->e.rep;
160 	}
161 	startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
162 
163 	/* keep trying till we have enough list space to terminate */
164 	for(;;){
165 		if(_relist[0] == 0){
166 			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
167 			_relist[1] = _relist[0] + _relistsize;
168 			_reliste[0] = _relist[0] + _relistsize - 1;
169 			_reliste[1] = _relist[1] + _relistsize - 1;
170 			if(_relist[0] == 0)
171 				regerror("_relist overflow");
172 		}
173 		rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
174 		if(rv >= 0)
175 			return rv;
176 		free(_relist[0]);
177 		_relist[0] = 0;
178 		_relistsize += LISTINCREMENT;
179 	}
180 }
181