xref: /plan9-contrib/sys/src/ape/lib/regexp/rregexec.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include "regexp.h"
4 #include "regcomp.h"
5 
6 static Resublist sempty;		/* empty set of matches */
7 
8 /*
9  *  return	0 if no match
10  *		>0 if a match
11  *		<0 if we ran out of _relist space
12  */
13 static int
14 rregexec1(Reprog *progp,	/* program to run */
15 	wchar_t *bol,		/* string to run machine on */
16 	Resub *mp,		/* subexpression elements */
17 	int ms,			/* number of elements at mp */
18 	wchar_t *starts,
19 	wchar_t *eol,
20 	wchar_t startchar)
21 {
22 	int flag=0;
23 	Reinst *inst;
24 	Relist *tlp;
25 	wchar_t *s;
26 	int i, checkstart;
27 	wchar_t r, *rp, *ep;
28 	int n;
29 	Relist* tl;		/* This list, next list */
30 	Relist* nl;
31 	Relist* tle;		/* ends of this and next list */
32 	Relist* nle;
33 	int match;
34 
35 	match = 0;
36 	checkstart = startchar;
37 	sempty.m[0].s.rsp = 0;
38 	if(mp!=0)
39 		for(i=0; i<ms; i++)
40 			mp[i].s.rsp = mp[i].e.rep = 0;
41 	_relist[0][0].inst = _relist[1][0].inst = 0;
42 
43 	/* Execute machine once for each character, including terminal NUL */
44 	s = starts;
45 	do{
46 		r = *s;
47 
48 		/* fast check for first char */
49 		if(checkstart && r!=startchar){
50 			s++;
51 			continue;
52 		}
53 
54 		/* switch run lists */
55 		tl = _relist[flag];
56 		tle = _reliste[flag];
57 		nl = _relist[flag^=1];
58 		nle = _reliste[flag];
59 		nl->inst = 0;
60 
61 		/* Add first instruction to current list */
62 		sempty.m[0].s.rsp = s;
63 		_renewthread(tl, progp->startinst, &sempty);
64 
65 		/* Execute machine until current list is empty */
66 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
67 			if(s == eol)
68 				break;
69 
70 			for(inst=tlp->inst; ; inst = inst->l.next){
71 				switch(inst->type){
72 				case RUNE:	/* regular character */
73 					if(inst->r.r == r)
74 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
75 							return -1;
76 					break;
77 				case LBRA:
78 					tlp->se.m[inst->r.subid].s.rsp = s;
79 					continue;
80 				case RBRA:
81 					tlp->se.m[inst->r.subid].e.rep = s;
82 					continue;
83 				case ANY:
84 					if(r != '\n')
85 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
86 							return -1;
87 					break;
88 				case ANYNL:
89 					if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
90 							return -1;
91 					break;
92 				case BOL:
93 					if(s == bol || *(s-1) == '\n')
94 						continue;
95 					break;
96 				case EOL:
97 					if(r == 0 || r == '\n')
98 						continue;
99 					break;
100 				case CCLASS:
101 					ep = inst->r.cp->end;
102 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
103 						if(r >= rp[0] && r <= rp[1]){
104 							if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
105 								return -1;
106 							break;
107 						}
108 					break;
109 				case NCCLASS:
110 					ep = inst->r.cp->end;
111 					for(rp = inst->r.cp->spans; rp < ep; rp += 2)
112 						if(r >= rp[0] && r <= rp[1])
113 							break;
114 					if(rp == ep)
115 						if(_renewthread(nl, inst->l.next, &tlp->se)==nle)
116 							return -1;
117 					break;
118 				case OR:
119 					/* evaluate right choice later */
120 					if(_renewthread(tlp, inst->r.right, &tlp->se) == tle)
121 						return -1;
122 					/* efficiency: advance and re-evaluate */
123 					continue;
124 				case END:	/* Match! */
125 					match = 1;
126 					tlp->se.m[0].e.rep = s;
127 					if(mp != 0)
128 						_renewmatch(mp, ms, &tlp->se);
129 					break;
130 				}
131 				break;
132 			}
133 		}
134 		checkstart = startchar && nl->inst==0;
135 		s++;
136 	}while(r);
137 	return match;
138 }
139 
140 extern int
141 rregexec(Reprog *progp,	/* program to run */
142 	wchar_t *bol,	/* string to run machine on */
143 	Resub *mp,	/* subexpression elements */
144 	int ms)		/* number of elements at mp */
145 {
146 	wchar_t *starts;	/* where to start match */
147 	wchar_t *eol;	/* where to end match */
148 	wchar_t startchar;
149 	int rv;
150 
151 	/*
152  	 *  use user-specified starting/ending location if specified
153 	 */
154 	starts = bol;
155 	eol = 0;
156 	if(mp && ms>0){
157 		if(mp->s.rsp)
158 			starts = mp->s.rsp;
159 		if(mp->e.rep)
160 			eol = mp->e.rep;
161 	}
162 	startchar = progp->startinst->type == RUNE ? progp->startinst->r.r : 0;
163 
164 	/* keep trying till we have enough list space to terminate */
165 	for(;;){
166 		if(_relist[0] == 0){
167 			_relist[0] = malloc(2*_relistsize*sizeof(Relist));
168 			_relist[1] = _relist[0] + _relistsize;
169 			_reliste[0] = _relist[0] + _relistsize - 1;
170 			_reliste[1] = _relist[1] + _relistsize - 1;
171 			if(_relist[0] == 0)
172 				regerror("_relist overflow");
173 		}
174 		rv = rregexec1(progp, bol, mp, ms, starts, eol, startchar);
175 		if(rv >= 0)
176 			return rv;
177 		free(_relist[0]);
178 		_relist[0] = 0;
179 		_relistsize += LISTINCREMENT;
180 	}
181 }
182