xref: /inferno-os/utils/libregexp/regexec.c (revision d0e1d143ef6f03c75c008c7ec648859dd260cbab)
1  #include <lib9.h>
2  #include "regexp.h"
3  #include "regcomp.h"
4  
5  
6  /*
7   *  return	0 if no match
8   *		>0 if a match
9   *		<0 if we ran out of _relist space
10   */
11  static int
12  regexec1(Reprog *progp,	/* program to run */
13  	char *bol,	/* string to run machine on */
14  	Resub *mp,	/* subexpression elements */
15  	int ms,		/* number of elements at mp */
16  	Reljunk *j
17  )
18  {
19  	int flag=0;
20  	Reinst *inst;
21  	Relist *tlp;
22  	char *s;
23  	int i, checkstart;
24  	Rune r, *rp, *ep;
25  	int n;
26  	Relist* tl;		/* This list, next list */
27  	Relist* nl;
28  	Relist* tle;		/* ends of this and next list */
29  	Relist* nle;
30  	int match;
31  	char *p;
32  
33  	match = 0;
34  	checkstart = j->starttype;
35  	if(mp)
36  		for(i=0; i<ms; i++) {
37  			mp[i].s.sp = 0;
38  			mp[i].e.ep = 0;
39  		}
40  	j->relist[0][0].inst = 0;
41  	j->relist[1][0].inst = 0;
42  
43  	/* Execute machine once for each character, including terminal NUL */
44  	s = j->starts;
45  	do{
46  		/* fast check for first char */
47  		if(checkstart) {
48  			switch(j->starttype) {
49  			case RUNE:
50  				p = utfrune(s, j->startchar);
51  				if(p == 0 || s == j->eol)
52  					return match;
53  				s = p;
54  				break;
55  			case BOL:
56  				if(s == bol)
57  					break;
58  				p = utfrune(s, '\n');
59  				if(p == 0 || s == j->eol)
60  					return match;
61  				s = p;
62  				break;
63  			}
64  		}
65  		r = *(uchar*)s;
66  		if(r < (Rune)Runeself)
67  			n = 1;
68  		else
69  			n = chartorune(&r, s);
70  
71  		/* switch run lists */
72  		tl = j->relist[flag];
73  		tle = j->reliste[flag];
74  		nl = j->relist[flag^=1];
75  		nle = j->reliste[flag];
76  		nl->inst = 0;
77  
78  		/* Add first instruction to current list */
79  		if(match == 0)
80  			_renewemptythread(tl, progp->startinst, s);
81  
82  		/* Execute machine until current list is empty */
83  		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
84  			for(inst = tlp->inst; ; inst = inst->u2.next){
85  				switch(inst->type){
86  				case RUNE:	/* regular character */
87  					if(inst->u1.r == r){
88  						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
89  							return -1;
90  					}
91  					break;
92  				case LBRA:
93  					tlp->se.m[inst->u1.subid].s.sp = s;
94  					continue;
95  				case RBRA:
96  					tlp->se.m[inst->u1.subid].e.ep = s;
97  					continue;
98  				case ANY:
99  					if(r != '\n')
100  						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
101  							return -1;
102  					break;
103  				case ANYNL:
104  					if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
105  							return -1;
106  					break;
107  				case BOL:
108  					if(s == bol || *(s-1) == '\n')
109  						continue;
110  					break;
111  				case EOL:
112  					if(s == j->eol || r == 0 || r == '\n')
113  						continue;
114  					break;
115  				case CCLASS:
116  					ep = inst->u1.cp->end;
117  					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
118  						if(r >= rp[0] && r <= rp[1]){
119  							if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
120  								return -1;
121  							break;
122  						}
123  					break;
124  				case NCCLASS:
125  					ep = inst->u1.cp->end;
126  					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
127  						if(r >= rp[0] && r <= rp[1])
128  							break;
129  					if(rp == ep)
130  						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
131  							return -1;
132  					break;
133  				case OR:
134  					/* evaluate right choice later */
135  					if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
136  						return -1;
137  					/* efficiency: advance and re-evaluate */
138  					continue;
139  				case END:	/* Match! */
140  					match = 1;
141  					tlp->se.m[0].e.ep = s;
142  					if(mp != 0)
143  						_renewmatch(mp, ms, &tlp->se);
144  					break;
145  				}
146  				break;
147  			}
148  		}
149  		if(s == j->eol)
150  			break;
151  		checkstart = j->starttype && nl->inst==0;
152  		s += n;
153  	}while(r);
154  	return match;
155  }
156  
157  static int
158  regexec2(Reprog *progp,	/* program to run */
159  	char *bol,	/* string to run machine on */
160  	Resub *mp,	/* subexpression elements */
161  	int ms,		/* number of elements at mp */
162  	Reljunk *j
163  )
164  {
165  	Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE];
166  
167  	/* mark space */
168  	j->relist[0] = relist0;
169  	j->relist[1] = relist1;
170  	j->reliste[0] = relist0 + nelem(relist0) - 2;
171  	j->reliste[1] = relist1 + nelem(relist1) - 2;
172  
173  	return regexec1(progp, bol, mp, ms, j);
174  }
175  
176  extern int
177  regexec(Reprog *progp,	/* program to run */
178  	char *bol,	/* string to run machine on */
179  	Resub *mp,	/* subexpression elements */
180  	int ms)		/* number of elements at mp */
181  {
182  	Reljunk j;
183  	Relist relist0[LISTSIZE], relist1[LISTSIZE];
184  	int rv;
185  
186  	/*
187   	 *  use user-specified starting/ending location if specified
188  	 */
189  	j.starts = bol;
190  	j.eol = 0;
191  	if(mp && ms>0){
192  		if(mp->s.sp)
193  			j.starts = mp->s.sp;
194  		if(mp->e.ep)
195  			j.eol = mp->e.ep;
196  	}
197  	j.starttype = 0;
198  	j.startchar = 0;
199  	if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
200  		j.starttype = RUNE;
201  		j.startchar = progp->startinst->u1.r;
202  	}
203  	if(progp->startinst->type == BOL)
204  		j.starttype = BOL;
205  
206  	/* mark space */
207  	j.relist[0] = relist0;
208  	j.relist[1] = relist1;
209  	j.reliste[0] = relist0 + nelem(relist0) - 2;
210  	j.reliste[1] = relist1 + nelem(relist1) - 2;
211  
212  	rv = regexec1(progp, bol, mp, ms, &j);
213  	if(rv >= 0)
214  		return rv;
215  	rv = regexec2(progp, bol, mp, ms, &j);
216  	if(rv >= 0)
217  		return rv;
218  	return -1;
219  }
220