xref: /inferno-os/utils/libregexp/rregexec.c (revision 7e00430948d8af545f880e82bb30cd3ee50deb04)
1 #include <lib9.h>
2 #include "regexp.h"
3 #include "regcomp.h"
4 
5 /*
6  *  return	0 if no match
7  *		>0 if a match
8  *		<0 if we ran out of _relist space
9  */
10 static int
11 rregexec1(Reprog *progp,	/* program to run */
12 	Rune *bol,		/* string to run machine on */
13 	Resub *mp,		/* subexpression elements */
14 	int ms,			/* number of elements at mp */
15 	Reljunk *j)
16 {
17 	int flag=0;
18 	Reinst *inst;
19 	Relist *tlp;
20 	Rune *s;
21 	int i, checkstart;
22 	Rune r, *rp, *ep;
23 	Relist* tl;		/* This list, next list */
24 	Relist* nl;
25 	Relist* tle;		/* ends of this and next list */
26 	Relist* nle;
27 	int match;
28 
29 	match = 0;
30 	checkstart = j->startchar;
31 	if(mp)
32 		for(i=0; i<ms; i++) {
33 			mp[i].s.rsp = 0;
34 			mp[i].e.rep = 0;
35 		}
36 	j->relist[0][0].inst = 0;
37 	j->relist[1][0].inst = 0;
38 
39 	/* Execute machine once for each character, including terminal NUL */
40 	s = j->rstarts;
41 	do{
42 
43 		/* fast check for first char */
44 		if(checkstart) {
45 			switch(j->starttype) {
46 			case RUNE:
47 				while(*s != j->startchar) {
48 					if(*s == 0 || s == j->reol)
49 						return match;
50 					s++;
51 				}
52 				break;
53 			case BOL:
54 				if(s == bol)
55 					break;
56 				while(*s != '\n') {
57 					if(*s == 0 || s == j->reol)
58 						return match;
59 					s++;
60 				}
61 				break;
62 			}
63 		}
64 
65 		r = *s;
66 
67 		/* switch run lists */
68 		tl = j->relist[flag];
69 		tle = j->reliste[flag];
70 		nl = j->relist[flag^=1];
71 		nle = j->reliste[flag];
72 		nl->inst = 0;
73 
74 		/* Add first instruction to current list */
75 		_rrenewemptythread(tl, progp->startinst, s);
76 
77 		/* Execute machine until current list is empty */
78 		for(tlp=tl; tlp->inst; tlp++){
79 			for(inst=tlp->inst; ; inst = inst->u2.next){
80 				switch(inst->type){
81 				case RUNE:	/* regular character */
82 					if(inst->u1.r == r)
83 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
84 							return -1;
85 					break;
86 				case LBRA:
87 					tlp->se.m[inst->u1.subid].s.rsp = s;
88 					continue;
89 				case RBRA:
90 					tlp->se.m[inst->u1.subid].e.rep = s;
91 					continue;
92 				case ANY:
93 					if(r != '\n')
94 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
95 							return -1;
96 					break;
97 				case ANYNL:
98 					if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
99 							return -1;
100 					break;
101 				case BOL:
102 					if(s == bol || *(s-1) == '\n')
103 						continue;
104 					break;
105 				case EOL:
106 					if(s == j->reol || r == 0 || r == '\n')
107 						continue;
108 					break;
109 				case CCLASS:
110 					ep = inst->u1.cp->end;
111 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
112 						if(r >= rp[0] && r <= rp[1]){
113 							if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
114 								return -1;
115 							break;
116 						}
117 					break;
118 				case NCCLASS:
119 					ep = inst->u1.cp->end;
120 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
121 						if(r >= rp[0] && r <= rp[1])
122 							break;
123 					if(rp == ep)
124 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
125 							return -1;
126 					break;
127 				case OR:
128 					/* evaluate right choice later */
129 					if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
130 						return -1;
131 					/* efficiency: advance and re-evaluate */
132 					continue;
133 				case END:	/* Match! */
134 					match = 1;
135 					tlp->se.m[0].e.rep = s;
136 					if(mp != 0)
137 						_renewmatch(mp, ms, &tlp->se);
138 					break;
139 				}
140 				break;
141 			}
142 		}
143 		if(s == j->reol)
144 			break;
145 		checkstart = j->startchar && nl->inst==0;
146 		s++;
147 	}while(r);
148 	return match;
149 }
150 
151 static int
152 rregexec2(Reprog *progp,	/* program to run */
153 	Rune *bol,	/* string to run machine on */
154 	Resub *mp,	/* subexpression elements */
155 	int ms,		/* number of elements at mp */
156 	Reljunk *j
157 )
158 {
159 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
160 
161 	/* mark space */
162 	j->relist[0] = relist0;
163 	j->relist[1] = relist1;
164 	j->reliste[0] = relist0 + nelem(relist0) - 2;
165 	j->reliste[1] = relist1 + nelem(relist1) - 2;
166 
167 	return rregexec1(progp, bol, mp, ms, j);
168 }
169 
170 extern int
171 rregexec(Reprog *progp,	/* program to run */
172 	Rune *bol,	/* string to run machine on */
173 	Resub *mp,	/* subexpression elements */
174 	int ms)		/* number of elements at mp */
175 {
176 	Reljunk j;
177 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
178 	int rv;
179 
180 	/*
181  	 *  use user-specified starting/ending location if specified
182 	 */
183 	j.rstarts = bol;
184 	j.reol = 0;
185 	if(mp && ms>0){
186 		if(mp->s.sp)
187 			j.rstarts = mp->s.rsp;
188 		if(mp->e.ep)
189 			j.reol = mp->e.rep;
190 	}
191 	j.starttype = 0;
192 	j.startchar = 0;
193 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
194 		j.starttype = RUNE;
195 		j.startchar = progp->startinst->u1.r;
196 	}
197 	if(progp->startinst->type == BOL)
198 		j.starttype = BOL;
199 
200 	/* mark space */
201 	j.relist[0] = relist0;
202 	j.relist[1] = relist1;
203 	j.reliste[0] = relist0 + nelem(relist0) - 2;
204 	j.reliste[1] = relist1 + nelem(relist1) - 2;
205 
206 	rv = rregexec1(progp, bol, mp, ms, &j);
207 	if(rv >= 0)
208 		return rv;
209 	rv = rregexec2(progp, bol, mp, ms, &j);
210 	if(rv >= 0)
211 		return rv;
212 	return -1;
213 }
214