xref: /inferno-os/appl/lib/ecmascript/regexp.b (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsythstrhas(s: string, c: int): ref Val
2*37da2899SCharles.Forsyth{
3*37da2899SCharles.Forsyth	for(i := 0; i < len s; i++)
4*37da2899SCharles.Forsyth		if(s[i] == c)
5*37da2899SCharles.Forsyth			return true;
6*37da2899SCharles.Forsyth	return false;
7*37da2899SCharles.Forsyth}
8*37da2899SCharles.Forsyth
9*37da2899SCharles.Forsythrsplit(r: string): (string, string)
10*37da2899SCharles.Forsyth{
11*37da2899SCharles.Forsyth	esc := 0;
12*37da2899SCharles.Forsyth	i := 1;	# skip '/'
13*37da2899SCharles.Forsyth	for(;;){
14*37da2899SCharles.Forsyth		c := r[i++];
15*37da2899SCharles.Forsyth		if(!esc && c == '/')
16*37da2899SCharles.Forsyth			break;
17*37da2899SCharles.Forsyth		esc = !esc && c == '\\';
18*37da2899SCharles.Forsyth	}
19*37da2899SCharles.Forsyth	return (r[1: i-1], r[i: ]);
20*37da2899SCharles.Forsyth}
21*37da2899SCharles.Forsyth
22*37da2899SCharles.Forsythbadflags(f: string): int
23*37da2899SCharles.Forsyth{
24*37da2899SCharles.Forsyth	g := i := m := 0;
25*37da2899SCharles.Forsyth	for(j := 0; j < len f; j++){
26*37da2899SCharles.Forsyth		case(f[j]){
27*37da2899SCharles.Forsyth			'g' =>
28*37da2899SCharles.Forsyth				g++;
29*37da2899SCharles.Forsyth			'i' =>
30*37da2899SCharles.Forsyth				i++;
31*37da2899SCharles.Forsyth			'm' =>
32*37da2899SCharles.Forsyth				m++;
33*37da2899SCharles.Forsyth			* =>
34*37da2899SCharles.Forsyth				return 1;
35*37da2899SCharles.Forsyth		}
36*37da2899SCharles.Forsyth	}
37*37da2899SCharles.Forsyth	return g > 1 || i > 1 || m > 1;
38*37da2899SCharles.Forsyth}
39*37da2899SCharles.Forsyth
40*37da2899SCharles.Forsythregexpvals(ex: ref Exec, v: ref Val, o: ref Ecmascript->Obj): (string, string, int)
41*37da2899SCharles.Forsyth{
42*37da2899SCharles.Forsyth	if(v != nil){
43*37da2899SCharles.Forsyth		if(v.ty == TRegExp)
44*37da2899SCharles.Forsyth			return (v.rev.p, v.rev.f, v.rev.i);
45*37da2899SCharles.Forsyth		o = v.obj;
46*37da2899SCharles.Forsyth	}
47*37da2899SCharles.Forsyth	p := toString(ex, esget(ex, o, "source", 0));
48*37da2899SCharles.Forsyth	f := "";
49*37da2899SCharles.Forsyth	if(toBoolean(ex, esget(ex, o, "global", 0)) == true)
50*37da2899SCharles.Forsyth		f += "g";
51*37da2899SCharles.Forsyth	if(toBoolean(ex, esget(ex, o, "ignoreCase", 0)) == true)
52*37da2899SCharles.Forsyth		f += "i";
53*37da2899SCharles.Forsyth	if(toBoolean(ex, esget(ex, o, "multiline", 0)) == true)
54*37da2899SCharles.Forsyth		f += "m";
55*37da2899SCharles.Forsyth	i := toInt32(ex, esget(ex, o, "lastIndex", 0));
56*37da2899SCharles.Forsyth	return (p, f, i);
57*37da2899SCharles.Forsyth}
58*37da2899SCharles.Forsyth
59*37da2899SCharles.Forsythnregexp(ex: ref Exec, nil: ref Ecmascript->Obj, args: array of ref Val): ref Ecmascript->Obj
60*37da2899SCharles.Forsyth{
61*37da2899SCharles.Forsyth	pat := biarg(args, 0);
62*37da2899SCharles.Forsyth	flags := biarg(args, 1);
63*37da2899SCharles.Forsyth	(p, f) := ("", "");
64*37da2899SCharles.Forsyth	if(isregexp(pat)){
65*37da2899SCharles.Forsyth		if(flags == undefined)
66*37da2899SCharles.Forsyth			(p, f, nil) = regexpvals(ex, pat, nil);
67*37da2899SCharles.Forsyth		else
68*37da2899SCharles.Forsyth			runtime(ex, TypeError, "flags defined");
69*37da2899SCharles.Forsyth	}
70*37da2899SCharles.Forsyth	else{
71*37da2899SCharles.Forsyth		if(pat == undefined)
72*37da2899SCharles.Forsyth			p = "";
73*37da2899SCharles.Forsyth		else
74*37da2899SCharles.Forsyth			p = toString(ex, pat);
75*37da2899SCharles.Forsyth		if(flags == undefined)
76*37da2899SCharles.Forsyth			f = "";
77*37da2899SCharles.Forsyth		else
78*37da2899SCharles.Forsyth			f = toString(ex, flags);
79*37da2899SCharles.Forsyth	}
80*37da2899SCharles.Forsyth	o := nobj(ex, nil, array[] of { regexpval(p, f, 0) });
81*37da2899SCharles.Forsyth	if(badflags(f))
82*37da2899SCharles.Forsyth		runtime(ex, SyntaxError, "bad regexp flags");
83*37da2899SCharles.Forsyth	regex = ex;
84*37da2899SCharles.Forsyth	(re, err) := compile(p, 1);
85*37da2899SCharles.Forsyth	if(re == nil || err != nil)
86*37da2899SCharles.Forsyth		runtime(ex, SyntaxError, "bad regexp pattern");
87*37da2899SCharles.Forsyth	o.re = re;
88*37da2899SCharles.Forsyth	return o;
89*37da2899SCharles.Forsyth}
90*37da2899SCharles.Forsyth
91*37da2899SCharles.Forsythcregexp(ex: ref Exec, f, nil: ref Ecmascript->Obj, args: array of ref Val): ref Val
92*37da2899SCharles.Forsyth{
93*37da2899SCharles.Forsyth	pat := biarg(args, 0);
94*37da2899SCharles.Forsyth	flags := biarg(args, 1);
95*37da2899SCharles.Forsyth	if(isregexp(pat) && flags == undefined)
96*37da2899SCharles.Forsyth		return pat;
97*37da2899SCharles.Forsyth	return objval(nregexp(ex, f, args));
98*37da2899SCharles.Forsyth}
99*37da2899SCharles.Forsyth
100*37da2899SCharles.Forsythcregexpprotoexec(ex: ref Exec, f, this: ref Ecmascript->Obj, args: array of ref Val): ref Val
101*37da2899SCharles.Forsyth{
102*37da2899SCharles.Forsyth	m: array of (int, int);
103*37da2899SCharles.Forsyth
104*37da2899SCharles.Forsyth	regexpcheck(ex, this, f);
105*37da2899SCharles.Forsyth	s := toString(ex, biarg(args, 0));
106*37da2899SCharles.Forsyth	l := len s;
107*37da2899SCharles.Forsyth	i := toInt32(ex, esget(ex, this, "lastIndex", 0));
108*37da2899SCharles.Forsyth	e := 0;
109*37da2899SCharles.Forsyth	glob := esget(ex, this, "global", 0);
110*37da2899SCharles.Forsyth	multiline := esget(ex, this, "multiline", 0);
111*37da2899SCharles.Forsyth	ignorecase := esget(ex, this, "ignoreCase", 0);
112*37da2899SCharles.Forsyth	if(glob == false)
113*37da2899SCharles.Forsyth		i = 0;
114*37da2899SCharles.Forsyth	for(;;){
115*37da2899SCharles.Forsyth		if(i < 0 || i >= l){
116*37da2899SCharles.Forsyth			esput(ex, this, "lastIndex", numval(real 0), 0);
117*37da2899SCharles.Forsyth			return null;
118*37da2899SCharles.Forsyth		}
119*37da2899SCharles.Forsyth		regex = ex;
120*37da2899SCharles.Forsyth		m = executese(this.re, s, (i, len s), i == 0, 1, multiline == true, ignorecase == true);
121*37da2899SCharles.Forsyth		if(m != nil)
122*37da2899SCharles.Forsyth			break;
123*37da2899SCharles.Forsyth		i++;
124*37da2899SCharles.Forsyth		i = -1;	# no need to loop with executese
125*37da2899SCharles.Forsyth	}
126*37da2899SCharles.Forsyth	(i, e) = m[0];
127*37da2899SCharles.Forsyth	if(glob == true)
128*37da2899SCharles.Forsyth		esput(ex, this, "lastIndex", numval(real e), 0);
129*37da2899SCharles.Forsyth	n := len m;
130*37da2899SCharles.Forsyth	av := array[n] of ref Val;
131*37da2899SCharles.Forsyth	for(j := 0; j < n; j++){
132*37da2899SCharles.Forsyth		(a, b) := m[j];
133*37da2899SCharles.Forsyth		if(a < 0)
134*37da2899SCharles.Forsyth			av[j] = undefined;
135*37da2899SCharles.Forsyth		else
136*37da2899SCharles.Forsyth			av[j] = strval(s[a: b]);
137*37da2899SCharles.Forsyth	}
138*37da2899SCharles.Forsyth	a := narray(ex, nil, av);
139*37da2899SCharles.Forsyth	esput(ex, a, "index", numval(real i), 0);
140*37da2899SCharles.Forsyth	esput(ex, a, "input", strval(s), 0);
141*37da2899SCharles.Forsyth	return objval(a);
142*37da2899SCharles.Forsyth}
143*37da2899SCharles.Forsyth
144*37da2899SCharles.Forsythcregexpprototest(ex: ref Exec, f, this: ref Ecmascript->Obj, args: array of ref Val): ref Val
145*37da2899SCharles.Forsyth{
146*37da2899SCharles.Forsyth	regexpcheck(ex, this, f);
147*37da2899SCharles.Forsyth	v := cregexpprotoexec(ex, f, this, args);
148*37da2899SCharles.Forsyth	if(!isnull(v))
149*37da2899SCharles.Forsyth		return true;
150*37da2899SCharles.Forsyth	return false;
151*37da2899SCharles.Forsyth}
152*37da2899SCharles.Forsyth
153*37da2899SCharles.ForsythcregexpprototoString(ex: ref Exec, f, this: ref Ecmascript->Obj, nil: array of ref Val): ref Val
154*37da2899SCharles.Forsyth{
155*37da2899SCharles.Forsyth	regexpcheck(ex, this, f);
156*37da2899SCharles.Forsyth	(p, fl, nil) := regexpvals(ex, nil, this);
157*37da2899SCharles.Forsyth	return strval("/" + p + "/" + fl);
158*37da2899SCharles.Forsyth}
159*37da2899SCharles.Forsyth
160*37da2899SCharles.Forsythregexpcheck(ex: ref Exec, o: ref Ecmascript->Obj, f: ref Obj)
161*37da2899SCharles.Forsyth{
162*37da2899SCharles.Forsyth	if(f == nil)
163*37da2899SCharles.Forsyth		s := "exec";
164*37da2899SCharles.Forsyth	else
165*37da2899SCharles.Forsyth		s = f.val.str;
166*37da2899SCharles.Forsyth	if(!isregexpobj(o))
167*37da2899SCharles.Forsyth		runtime(ex, TypeError, "RegExp.prototype." + s + " called on non-RegExp object");
168*37da2899SCharles.Forsyth}
169*37da2899SCharles.Forsyth
170*37da2899SCharles.Forsythcstrprotomatch(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val
171*37da2899SCharles.Forsyth{
172*37da2899SCharles.Forsyth	v := biarg(args, 0);
173*37da2899SCharles.Forsyth	if(!isregexp(v))
174*37da2899SCharles.Forsyth		re := nregexp(ex, nil, args);
175*37da2899SCharles.Forsyth	else if(v.ty == TObj)
176*37da2899SCharles.Forsyth		re = v.obj;
177*37da2899SCharles.Forsyth	else
178*37da2899SCharles.Forsyth		re = nobj(ex, nil, args);
179*37da2899SCharles.Forsyth	s := toString(ex, this.val);
180*37da2899SCharles.Forsyth	glob := esget(ex, re, "global", 0);
181*37da2899SCharles.Forsyth	av := array[1] of ref Val;
182*37da2899SCharles.Forsyth	av[0] = strval(s);
183*37da2899SCharles.Forsyth	if(glob == false)
184*37da2899SCharles.Forsyth		return cregexpprotoexec(ex, nil, re, av);
185*37da2899SCharles.Forsyth	li := 0;
186*37da2899SCharles.Forsyth	esput(ex, re, "lastIndex", numval(real li), 0);
187*37da2899SCharles.Forsyth	ms: list of ref Val;
188*37da2899SCharles.Forsyth	for(;;){
189*37da2899SCharles.Forsyth		v = cregexpprotoexec(ex, nil, re, av);
190*37da2899SCharles.Forsyth		if(isnull(v))
191*37da2899SCharles.Forsyth			break;
192*37da2899SCharles.Forsyth		ms = esget(ex, v.obj, "0", 0) :: ms;
193*37da2899SCharles.Forsyth		ni := int toUint32(ex, esget(ex, re, "lastIndex", 0));
194*37da2899SCharles.Forsyth		if(ni == li)
195*37da2899SCharles.Forsyth			esput(ex, re, "lastIndex", numval(real ++li), 0);
196*37da2899SCharles.Forsyth		else
197*37da2899SCharles.Forsyth			li = ni;
198*37da2899SCharles.Forsyth	}
199*37da2899SCharles.Forsyth	n := len ms;
200*37da2899SCharles.Forsyth	av = array[n] of ref Val;
201*37da2899SCharles.Forsyth	for(j := n-1; j >= 0; j--){
202*37da2899SCharles.Forsyth		av[j] = hd ms;
203*37da2899SCharles.Forsyth		ms = tl ms;
204*37da2899SCharles.Forsyth	}
205*37da2899SCharles.Forsyth	return objval(narray(ex, nil, av));
206*37da2899SCharles.Forsyth}
207*37da2899SCharles.Forsyth
208*37da2899SCharles.Forsythcstrprotoreplace(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val
209*37da2899SCharles.Forsyth{
210*37da2899SCharles.Forsyth	re: ref Ecmascript->Obj;
211*37da2899SCharles.Forsyth
212*37da2899SCharles.Forsyth	v := biarg(args, 0);
213*37da2899SCharles.Forsyth	rege := isregexp(v);
214*37da2899SCharles.Forsyth	if(!rege){
215*37da2899SCharles.Forsyth		if(args == nil)
216*37da2899SCharles.Forsyth			re = nregexp(ex, nil, args);
217*37da2899SCharles.Forsyth		else
218*37da2899SCharles.Forsyth			re = nregexp(ex, nil, args[0:1]);
219*37da2899SCharles.Forsyth	}
220*37da2899SCharles.Forsyth	else if(v.ty == TObj)
221*37da2899SCharles.Forsyth		re = v.obj;
222*37da2899SCharles.Forsyth	else
223*37da2899SCharles.Forsyth		re = nobj(ex, nil, args);
224*37da2899SCharles.Forsyth	s := toString(ex, this.val);
225*37da2899SCharles.Forsyth	if(rege)
226*37da2899SCharles.Forsyth		glob := esget(ex, re, "global", 0);
227*37da2899SCharles.Forsyth	else
228*37da2899SCharles.Forsyth		glob = false;
229*37da2899SCharles.Forsyth	av := array[1] of ref Val;
230*37da2899SCharles.Forsyth	av[0] = strval(s);
231*37da2899SCharles.Forsyth	ms: list of ref Val;
232*37da2899SCharles.Forsyth	li := 0;
233*37da2899SCharles.Forsyth	if(glob == true)
234*37da2899SCharles.Forsyth		esput(ex, re, "lastIndex", numval(real li), 0);
235*37da2899SCharles.Forsyth	for(;;){
236*37da2899SCharles.Forsyth		v = cregexpprotoexec(ex, nil, re, av);
237*37da2899SCharles.Forsyth		if(!isnull(v))
238*37da2899SCharles.Forsyth			ms = v :: ms;
239*37da2899SCharles.Forsyth		if(isnull(v) || glob == false)
240*37da2899SCharles.Forsyth			break;
241*37da2899SCharles.Forsyth		ni := int toUint32(ex, esget(ex, re, "lastIndex", 0));
242*37da2899SCharles.Forsyth		if(ni == li)
243*37da2899SCharles.Forsyth			esput(ex, re, "lastIndex", numval(real ++li), 0);
244*37da2899SCharles.Forsyth		else
245*37da2899SCharles.Forsyth			li = ni;
246*37da2899SCharles.Forsyth	}
247*37da2899SCharles.Forsyth	if(ms == nil)
248*37da2899SCharles.Forsyth		return strval(s);
249*37da2899SCharles.Forsyth	ms = rev(ms);
250*37da2899SCharles.Forsyth	if(rege)
251*37da2899SCharles.Forsyth		lcp := int toUint32(ex, esget(ex, (hd ms).obj, "length", 0))-1;
252*37da2899SCharles.Forsyth	else
253*37da2899SCharles.Forsyth		lcp = 0;
254*37da2899SCharles.Forsyth	v = biarg(args, 1);
255*37da2899SCharles.Forsyth	if(isobj(v) && isfuncobj(v.obj)){
256*37da2899SCharles.Forsyth		ns := s;
257*37da2899SCharles.Forsyth		n := len ms;
258*37da2899SCharles.Forsyth		args = array[lcp+3] of ref Val;
259*37da2899SCharles.Forsyth		o := inc := 0;
260*37da2899SCharles.Forsyth		for(i := 0; i < n; i++){
261*37da2899SCharles.Forsyth			a := (hd ms).obj;
262*37da2899SCharles.Forsyth			ms = tl ms;
263*37da2899SCharles.Forsyth			for(j := 0; j <= lcp; j++)
264*37da2899SCharles.Forsyth				args[j] = esget(ex, a, string j, 0);
265*37da2899SCharles.Forsyth			ss := toString(ex, args[0]);
266*37da2899SCharles.Forsyth			o = offset(ss, s, o);
267*37da2899SCharles.Forsyth			args[lcp+1] = numval(real o);
268*37da2899SCharles.Forsyth			args[lcp+2] = strval(s);
269*37da2899SCharles.Forsyth			rs := toString(ex, getValue(ex, escall(ex, v.obj, nil, args, 0)));
270*37da2899SCharles.Forsyth			ns = repl(ns, o+inc, o+inc+len ss, rs);
271*37da2899SCharles.Forsyth			o += len ss;
272*37da2899SCharles.Forsyth			inc += len rs - len ss;
273*37da2899SCharles.Forsyth		}
274*37da2899SCharles.Forsyth		return strval(ns);
275*37da2899SCharles.Forsyth	}
276*37da2899SCharles.Forsyth	else{
277*37da2899SCharles.Forsyth		ps := toString(ex, v);
278*37da2899SCharles.Forsyth		lps := len ps;
279*37da2899SCharles.Forsyth		ns := s;
280*37da2899SCharles.Forsyth		n := len ms;
281*37da2899SCharles.Forsyth		o := inc := 0;
282*37da2899SCharles.Forsyth		for(i := 0; i < n; i++){
283*37da2899SCharles.Forsyth			a := (hd ms).obj;
284*37da2899SCharles.Forsyth			ms = tl ms;
285*37da2899SCharles.Forsyth			ss := toString(ex, esget(ex, a, "0", 0));
286*37da2899SCharles.Forsyth			o = offset(ss, s, o);
287*37da2899SCharles.Forsyth			rs := "";
288*37da2899SCharles.Forsyth			for(j := 0; j < lps; j++){
289*37da2899SCharles.Forsyth				if(ps[j] == '$' && j < lps-1){
290*37da2899SCharles.Forsyth					j++;
291*37da2899SCharles.Forsyth					case(c := ps[j]){
292*37da2899SCharles.Forsyth						'$' =>
293*37da2899SCharles.Forsyth							rs += "$";
294*37da2899SCharles.Forsyth						'&' =>
295*37da2899SCharles.Forsyth							rs += ss;
296*37da2899SCharles.Forsyth						'`' =>
297*37da2899SCharles.Forsyth							rs += s[0: o];
298*37da2899SCharles.Forsyth						''' =>
299*37da2899SCharles.Forsyth							rs += s[o+len ss: ];
300*37da2899SCharles.Forsyth						'0' to '9' =>
301*37da2899SCharles.Forsyth							if(j < lps-1 && isdigit(ps[j+1]))
302*37da2899SCharles.Forsyth								c = 10*(c-'0')+ps[++j]-'0';
303*37da2899SCharles.Forsyth							else
304*37da2899SCharles.Forsyth								c = c-'0';
305*37da2899SCharles.Forsyth							if(c >= 1 && c <= lcp)
306*37da2899SCharles.Forsyth								rs += toString(ex, esget(ex, a, string c, 0));
307*37da2899SCharles.Forsyth					}
308*37da2899SCharles.Forsyth				}
309*37da2899SCharles.Forsyth				else
310*37da2899SCharles.Forsyth					rs += ps[j: j+1];
311*37da2899SCharles.Forsyth			}
312*37da2899SCharles.Forsyth			ns = repl(ns, o+inc, o+inc+len ss, rs);
313*37da2899SCharles.Forsyth			o += len ss;
314*37da2899SCharles.Forsyth			inc += len rs - len ss;
315*37da2899SCharles.Forsyth		}
316*37da2899SCharles.Forsyth		return strval(ns);
317*37da2899SCharles.Forsyth	}
318*37da2899SCharles.Forsyth}
319*37da2899SCharles.Forsyth
320*37da2899SCharles.Forsythcstrprotosearch(ex: ref Exec, nil, this: ref Ecmascript->Obj, args: array of ref Val): ref Val
321*37da2899SCharles.Forsyth{
322*37da2899SCharles.Forsyth	v := biarg(args, 0);
323*37da2899SCharles.Forsyth	if(!isregexp(v))
324*37da2899SCharles.Forsyth		re := nregexp(ex, nil, args);
325*37da2899SCharles.Forsyth	else if(v.ty == TObj)
326*37da2899SCharles.Forsyth		re = v.obj;
327*37da2899SCharles.Forsyth	else
328*37da2899SCharles.Forsyth		re = nobj(ex, nil, args);
329*37da2899SCharles.Forsyth	s := toString(ex, this.val);
330*37da2899SCharles.Forsyth	glob := esget(ex, re, "global", 0);
331*37da2899SCharles.Forsyth	esput(ex, re, "global", false, 0);
332*37da2899SCharles.Forsyth	av := array[1] of ref Val;
333*37da2899SCharles.Forsyth	av[0] = strval(s);
334*37da2899SCharles.Forsyth	v = cregexpprotoexec(ex, nil, re, av);
335*37da2899SCharles.Forsyth	if(isnull(v))
336*37da2899SCharles.Forsyth		r := -1;
337*37da2899SCharles.Forsyth	else{
338*37da2899SCharles.Forsyth		ss := toString(ex, esget(ex, v.obj, "0", 0));
339*37da2899SCharles.Forsyth		r = offset(ss, s, 0);
340*37da2899SCharles.Forsyth	}
341*37da2899SCharles.Forsyth	esput(ex, re, "global", glob, 0);
342*37da2899SCharles.Forsyth	return numval(real r);
343*37da2899SCharles.Forsyth}
344*37da2899SCharles.Forsyth
345*37da2899SCharles.Forsythoffset(ss: string, s: string, m: int): int
346*37da2899SCharles.Forsyth{
347*37da2899SCharles.Forsyth	nn := len ss;
348*37da2899SCharles.Forsyth	n := len s;
349*37da2899SCharles.Forsyth	for(i := m; i <= n-nn; i++){
350*37da2899SCharles.Forsyth		if(s[i: i+nn] == ss)
351*37da2899SCharles.Forsyth			return i;
352*37da2899SCharles.Forsyth	}
353*37da2899SCharles.Forsyth	return -1;
354*37da2899SCharles.Forsyth}
355*37da2899SCharles.Forsyth
356*37da2899SCharles.Forsythrepl(s: string, a: int, b: int, ns: string): string
357*37da2899SCharles.Forsyth{
358*37da2899SCharles.Forsyth	return s[0: a] + ns + s[b: ];
359*37da2899SCharles.Forsyth}
360*37da2899SCharles.Forsyth
361*37da2899SCharles.Forsythrev(ls: list of ref Val): list of ref Val
362*37da2899SCharles.Forsyth{
363*37da2899SCharles.Forsyth	ns: list of ref Val;
364*37da2899SCharles.Forsyth
365*37da2899SCharles.Forsyth	for( ; ls != nil; ls = tl ls)
366*37da2899SCharles.Forsyth		ns = hd ls :: ns;
367*37da2899SCharles.Forsyth	return ns;
368*37da2899SCharles.Forsyth}
369*37da2899SCharles.Forsyth
370*37da2899SCharles.Forsyth#########################################################################
371*37da2899SCharles.Forsyth# regex.b originally
372*37da2899SCharles.Forsyth
373*37da2899SCharles.Forsyth# normally imported identifiers
374*37da2899SCharles.Forsyth
375*37da2899SCharles.Forsyth# internal identifiers, not normally imported
376*37da2899SCharles.Forsyth
377*37da2899SCharles.ForsythALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN, LPN0, RPN0, LPN1, RPN1, LPN2, RPN2, BEET, BEEF, MNCLO, LCP, IDLE: con (1<<16)+iota;
378*37da2899SCharles.Forsyth
379*37da2899SCharles.Forsyth# syntax
380*37da2899SCharles.Forsyth
381*37da2899SCharles.Forsyth# RE	ALT		regular expression
382*37da2899SCharles.Forsyth#	NUL
383*37da2899SCharles.Forsyth# ALT	CAT		alternation
384*37da2899SCharles.Forsyth# 	CAT | ALT
385*37da2899SCharles.Forsyth#
386*37da2899SCharles.Forsyth# CAT	DUP		catenation
387*37da2899SCharles.Forsyth# 	DUP CAT
388*37da2899SCharles.Forsyth#
389*37da2899SCharles.Forsyth# DUP	PRIM		possibly duplicated primary
390*37da2899SCharles.Forsyth# 	PCLO
391*37da2899SCharles.Forsyth# 	CLO
392*37da2899SCharles.Forsyth# 	OPT
393*37da2899SCharles.Forsyth#
394*37da2899SCharles.Forsyth# PCLO	PRIM +		1 or more
395*37da2899SCharles.Forsyth# CLO	PRIM *		0 or more
396*37da2899SCharles.Forsyth# OPT	PRIM ?		0 or 1
397*37da2899SCharles.Forsyth#
398*37da2899SCharles.Forsyth# PRIM	( RE )
399*37da2899SCharles.Forsyth#	()
400*37da2899SCharles.Forsyth# 	DOT		any character
401*37da2899SCharles.Forsyth# 	CHAR		a single character
402*37da2899SCharles.Forsyth#	ESC		escape sequence
403*37da2899SCharles.Forsyth# 	[ SET ]		character set
404*37da2899SCharles.Forsyth# 	NUL		null string
405*37da2899SCharles.Forsyth# 	HAT		beginning of string
406*37da2899SCharles.Forsyth# 	DOL		end of string
407*37da2899SCharles.Forsyth#
408*37da2899SCharles.Forsyth
409*37da2899SCharles.Forsythregex: ref Exec;
410*37da2899SCharles.Forsyth
411*37da2899SCharles.ForsythNIL : con -1;		# a refRex constant
412*37da2899SCharles.ForsythNONE: con -2;		# ditto, for an un-set value
413*37da2899SCharles.ForsythBAD: con 1<<16;		# a non-character
414*37da2899SCharles.ForsythHUGE: con (1<<31) - 1;
415*37da2899SCharles.Forsyth
416*37da2899SCharles.Forsyth# the data structures of re.m would like to be ref-linked, but are
417*37da2899SCharles.Forsyth# circular (see fn walk), thus instead of pointers we use indexes
418*37da2899SCharles.Forsyth# into an array (arena) of nodes of the syntax tree of a regular expression.
419*37da2899SCharles.Forsyth# from a storage-allocation standpoint, this replaces many small
420*37da2899SCharles.Forsyth# allocations of one size with one big one of variable size.
421*37da2899SCharles.Forsyth
422*37da2899SCharles.ForsythReStr: adt {
423*37da2899SCharles.Forsyth	s : string;
424*37da2899SCharles.Forsyth	i : int;	# cursor postion
425*37da2899SCharles.Forsyth	n : int;	# number of chars left; -1 on error
426*37da2899SCharles.Forsyth	peek : fn(s: self ref ReStr): int;
427*37da2899SCharles.Forsyth	next : fn(s: self ref ReStr): int;
428*37da2899SCharles.Forsyth	unput: fn(s: self ref ReStr);
429*37da2899SCharles.Forsyth};
430*37da2899SCharles.Forsyth
431*37da2899SCharles.ForsythReStr.peek(s: self ref ReStr): int
432*37da2899SCharles.Forsyth{
433*37da2899SCharles.Forsyth	if(s.n <= 0)
434*37da2899SCharles.Forsyth		return BAD;
435*37da2899SCharles.Forsyth	return s.s[s.i];
436*37da2899SCharles.Forsyth}
437*37da2899SCharles.Forsyth
438*37da2899SCharles.ForsythReStr.next(s: self ref ReStr): int
439*37da2899SCharles.Forsyth{
440*37da2899SCharles.Forsyth	if(s.n <= 0)
441*37da2899SCharles.Forsyth		syntax("bad regular expression");
442*37da2899SCharles.Forsyth	s.n--;
443*37da2899SCharles.Forsyth	return s.s[s.i++];
444*37da2899SCharles.Forsyth}
445*37da2899SCharles.Forsyth
446*37da2899SCharles.ForsythReStr.unput(s: self ref ReStr)
447*37da2899SCharles.Forsyth{
448*37da2899SCharles.Forsyth	s.n++;
449*37da2899SCharles.Forsyth	s.i--;
450*37da2899SCharles.Forsyth}
451*37da2899SCharles.Forsyth
452*37da2899SCharles.ForsythnewRe(kind: int, left, right: refRex, set: ref Set, ar: ref Arena, pno: int, greedy: int): refRex
453*37da2899SCharles.Forsyth{
454*37da2899SCharles.Forsyth	ar.rex[ar.ptr] = Rex(kind, left, right, set, pno, greedy, nil);
455*37da2899SCharles.Forsyth	return ar.ptr++;
456*37da2899SCharles.Forsyth}
457*37da2899SCharles.Forsyth
458*37da2899SCharles.Forsyth# parse a regex by recursive descent to get a syntax tree
459*37da2899SCharles.Forsyth
460*37da2899SCharles.Forsythre(s: ref ReStr, ar: ref Arena): refRex
461*37da2899SCharles.Forsyth{
462*37da2899SCharles.Forsyth	left := cat(s, ar);
463*37da2899SCharles.Forsyth	if(left==NIL || s.peek()!='|')
464*37da2899SCharles.Forsyth		return left;
465*37da2899SCharles.Forsyth	s.next();
466*37da2899SCharles.Forsyth	right := re(s, ar);
467*37da2899SCharles.Forsyth	if(right == NIL)
468*37da2899SCharles.Forsyth		return NIL;
469*37da2899SCharles.Forsyth	return newRe(ALT, left, right, nil, ar, 0, 0);
470*37da2899SCharles.Forsyth}
471*37da2899SCharles.Forsyth
472*37da2899SCharles.Forsythcat(s: ref ReStr, ar: ref Arena): refRex
473*37da2899SCharles.Forsyth{
474*37da2899SCharles.Forsyth	left := dup(s, ar);
475*37da2899SCharles.Forsyth	if(left == NIL)
476*37da2899SCharles.Forsyth		return left;
477*37da2899SCharles.Forsyth	right := cat(s, ar);
478*37da2899SCharles.Forsyth	if(right == NIL)
479*37da2899SCharles.Forsyth		return left;
480*37da2899SCharles.Forsyth	return newRe(CAT, left, right, nil, ar, 0, 0);
481*37da2899SCharles.Forsyth}
482*37da2899SCharles.Forsyth
483*37da2899SCharles.Forsythdup(s: ref ReStr, ar: ref Arena): refRex
484*37da2899SCharles.Forsyth{
485*37da2899SCharles.Forsyth	n1, n2: int;
486*37da2899SCharles.Forsyth
487*37da2899SCharles.Forsyth	case s.peek() {
488*37da2899SCharles.Forsyth	BAD or ')' or ']' or '|' or '?' or '*' or '+' =>
489*37da2899SCharles.Forsyth		return NIL;
490*37da2899SCharles.Forsyth	}
491*37da2899SCharles.Forsyth	prim: refRex;
492*37da2899SCharles.Forsyth	case kind:=s.next() {
493*37da2899SCharles.Forsyth	'(' =>	if(ar.pno < 0) {
494*37da2899SCharles.Forsyth			if(s.peek() == ')') {
495*37da2899SCharles.Forsyth				s.next();
496*37da2899SCharles.Forsyth				prim = newRe(NUL, NONE, NONE, nil, ar, 0, 0);
497*37da2899SCharles.Forsyth			} else {
498*37da2899SCharles.Forsyth				prim = re(s, ar);
499*37da2899SCharles.Forsyth				if(prim==NIL || s.next()!=')')
500*37da2899SCharles.Forsyth					syntax("( with no )");
501*37da2899SCharles.Forsyth			}
502*37da2899SCharles.Forsyth		} else {
503*37da2899SCharles.Forsyth			pno := ++ar.pno;
504*37da2899SCharles.Forsyth			lp := newRe(LPN, NONE, NONE, nil, ar, pno, 0);
505*37da2899SCharles.Forsyth			rp := newRe(RPN, NONE, NONE, nil, ar, pno, 0);
506*37da2899SCharles.Forsyth			if(s.peek() == ')') {
507*37da2899SCharles.Forsyth				s.next();
508*37da2899SCharles.Forsyth				prim = newRe(CAT, lp, rp, nil, ar, 0, 0);
509*37da2899SCharles.Forsyth			} else {
510*37da2899SCharles.Forsyth				if(s.peek() == '?'){
511*37da2899SCharles.Forsyth					s.next();
512*37da2899SCharles.Forsyth					case s.next(){
513*37da2899SCharles.Forsyth						':' => ar.rex[lp].kind = LPN0;
514*37da2899SCharles.Forsyth							ar.rex[rp].kind = RPN0;
515*37da2899SCharles.Forsyth						'=' => ar.rex[lp].kind = LPN1;
516*37da2899SCharles.Forsyth							ar.rex[rp].kind = RPN1;
517*37da2899SCharles.Forsyth						'!' => ar.rex[lp].kind = LPN2;
518*37da2899SCharles.Forsyth							ar.rex[rp].kind = RPN2;
519*37da2899SCharles.Forsyth						* => syntax("bad char after ?");
520*37da2899SCharles.Forsyth					}
521*37da2899SCharles.Forsyth				}
522*37da2899SCharles.Forsyth				prim = re(s, ar);
523*37da2899SCharles.Forsyth				if(prim==NIL || s.next()!=')')
524*37da2899SCharles.Forsyth					syntax("( with no )");
525*37da2899SCharles.Forsyth				else {
526*37da2899SCharles.Forsyth					prim = newRe(CAT, prim, rp, nil, ar, 0, 0);
527*37da2899SCharles.Forsyth					prim = newRe(CAT, lp, prim, nil, ar, 0, 0);
528*37da2899SCharles.Forsyth				}
529*37da2899SCharles.Forsyth			}
530*37da2899SCharles.Forsyth		}
531*37da2899SCharles.Forsyth	'[' =>	prim = newRe(SET, NONE, NONE, newSet(s), ar, 0, 0);
532*37da2899SCharles.Forsyth	* =>	case kind {
533*37da2899SCharles.Forsyth		'.' =>		kind = DOT;
534*37da2899SCharles.Forsyth		'^' =>	kind = HAT;
535*37da2899SCharles.Forsyth		'$' =>	kind = DOL;
536*37da2899SCharles.Forsyth		}
537*37da2899SCharles.Forsyth		(c, set, op) := esc(s, kind, 0);
538*37da2899SCharles.Forsyth		if(set != nil)
539*37da2899SCharles.Forsyth			prim = newRe(SET, NONE, NONE, set, ar, 0, 0);
540*37da2899SCharles.Forsyth		else if(op == LCP){
541*37da2899SCharles.Forsyth			if(c > ar.pno)
542*37da2899SCharles.Forsyth				syntax("\num too big");
543*37da2899SCharles.Forsyth			prim = newRe(LCP, NONE, NONE, nil, ar, 0, 0);
544*37da2899SCharles.Forsyth			ar.rex[prim].ns = ref Nstate(c, c);
545*37da2899SCharles.Forsyth		}
546*37da2899SCharles.Forsyth		else
547*37da2899SCharles.Forsyth			prim = newRe(c, NONE, NONE, nil, ar, 0, 0);
548*37da2899SCharles.Forsyth	}
549*37da2899SCharles.Forsyth	case s.peek() {
550*37da2899SCharles.Forsyth	'*' =>	kind = CLO;
551*37da2899SCharles.Forsyth	'+' =>	kind = PCLO;
552*37da2899SCharles.Forsyth	'?' =>	kind = OPT;
553*37da2899SCharles.Forsyth	'{' =>	s.next();
554*37da2899SCharles.Forsyth		(n1, n2) = drange(s);
555*37da2899SCharles.Forsyth		kind = MNCLO;
556*37da2899SCharles.Forsyth		if(s.peek() != '}')
557*37da2899SCharles.Forsyth			syntax("{ with no }");
558*37da2899SCharles.Forsyth	* =>	return prim;
559*37da2899SCharles.Forsyth	}
560*37da2899SCharles.Forsyth	s.next();
561*37da2899SCharles.Forsyth	greedy := 1;
562*37da2899SCharles.Forsyth	if(s.peek() == '?'){
563*37da2899SCharles.Forsyth		# non-greedy op
564*37da2899SCharles.Forsyth		greedy = 0;
565*37da2899SCharles.Forsyth		s.next();
566*37da2899SCharles.Forsyth	}
567*37da2899SCharles.Forsyth	prim = newRe(kind, prim, NONE, nil, ar, 0, greedy);
568*37da2899SCharles.Forsyth	if(kind == MNCLO)
569*37da2899SCharles.Forsyth		ns := ar.rex[prim].ns = ref Nstate(n1, n2);
570*37da2899SCharles.Forsyth	return prim;
571*37da2899SCharles.Forsyth}
572*37da2899SCharles.Forsyth
573*37da2899SCharles.Forsythesc(s: ref ReStr, char: int, inset: int): (int, ref Set, int)
574*37da2899SCharles.Forsyth{
575*37da2899SCharles.Forsyth	set: ref Set;
576*37da2899SCharles.Forsyth
577*37da2899SCharles.Forsyth	op := 0;
578*37da2899SCharles.Forsyth	if(char == '\\') {
579*37da2899SCharles.Forsyth		char = s.next();
580*37da2899SCharles.Forsyth		case char {
581*37da2899SCharles.Forsyth		'b' =>
582*37da2899SCharles.Forsyth				if(inset)
583*37da2899SCharles.Forsyth					char = '\b';
584*37da2899SCharles.Forsyth				else
585*37da2899SCharles.Forsyth					char = BEET;
586*37da2899SCharles.Forsyth		'B' =>	if(inset)
587*37da2899SCharles.Forsyth					syntax("\\B in set");
588*37da2899SCharles.Forsyth				else
589*37da2899SCharles.Forsyth					char = BEEF;
590*37da2899SCharles.Forsyth		'f' =>		char = '\u000c';
591*37da2899SCharles.Forsyth		'n' =>	char = '\n';
592*37da2899SCharles.Forsyth		'r' =>		char = '\r';
593*37da2899SCharles.Forsyth		't' =>		char = '\t';
594*37da2899SCharles.Forsyth		'v' =>	char = '\v';
595*37da2899SCharles.Forsyth		'0' to '9' =>
596*37da2899SCharles.Forsyth				s.unput();
597*37da2899SCharles.Forsyth				char = digits(s);
598*37da2899SCharles.Forsyth				if(char == 0)
599*37da2899SCharles.Forsyth					char = '\0';
600*37da2899SCharles.Forsyth				else if(inset)
601*37da2899SCharles.Forsyth					syntax("\num in set");
602*37da2899SCharles.Forsyth				else
603*37da2899SCharles.Forsyth					op = LCP;
604*37da2899SCharles.Forsyth		'x' =>	char = hexdigits(s, 2);
605*37da2899SCharles.Forsyth		'u' =>	char = hexdigits(s, 4);
606*37da2899SCharles.Forsyth		'c' =>	char = s.next()%32;
607*37da2899SCharles.Forsyth		'd' or 'D' =>
608*37da2899SCharles.Forsyth				set = newset('0', '9');
609*37da2899SCharles.Forsyth				if(char == 'D')
610*37da2899SCharles.Forsyth					set.neg = 1;
611*37da2899SCharles.Forsyth		's' or 'S' =>
612*37da2899SCharles.Forsyth				set = newset(' ', ' ');
613*37da2899SCharles.Forsyth				addsets(set, "\t\v\u000c\u00a0\n\r\u2028\u2029");
614*37da2899SCharles.Forsyth				if(char == 'S')
615*37da2899SCharles.Forsyth					set.neg = 1;
616*37da2899SCharles.Forsyth		'w' or 'W' =>
617*37da2899SCharles.Forsyth				set = newset('0', '9');
618*37da2899SCharles.Forsyth				addset(set, 'a', 'z');
619*37da2899SCharles.Forsyth				addset(set, 'A', 'Z');
620*37da2899SCharles.Forsyth				addset(set, '_', '_');
621*37da2899SCharles.Forsyth				if(char == 'W')
622*37da2899SCharles.Forsyth					set.neg = 1;
623*37da2899SCharles.Forsyth		* =>
624*37da2899SCharles.Forsyth				;
625*37da2899SCharles.Forsyth		}
626*37da2899SCharles.Forsyth	}
627*37da2899SCharles.Forsyth	if(char == -1){
628*37da2899SCharles.Forsyth		if(inset)
629*37da2899SCharles.Forsyth			syntax("bad set");
630*37da2899SCharles.Forsyth		else
631*37da2899SCharles.Forsyth			syntax("bad character");
632*37da2899SCharles.Forsyth	}
633*37da2899SCharles.Forsyth	return (char, set, op);
634*37da2899SCharles.Forsyth}
635*37da2899SCharles.Forsyth
636*37da2899SCharles.Forsythisdigit(c: int): int
637*37da2899SCharles.Forsyth{
638*37da2899SCharles.Forsyth	return c >= '0' && c <= '9';
639*37da2899SCharles.Forsyth}
640*37da2899SCharles.Forsyth
641*37da2899SCharles.Forsythislower(c: int): int
642*37da2899SCharles.Forsyth{
643*37da2899SCharles.Forsyth	return c >= 'a' && c <= 'z';
644*37da2899SCharles.Forsyth}
645*37da2899SCharles.Forsyth
646*37da2899SCharles.Forsythisupper(c: int): int
647*37da2899SCharles.Forsyth{
648*37da2899SCharles.Forsyth	return c >= 'A' && c <= 'Z';
649*37da2899SCharles.Forsyth}
650*37da2899SCharles.Forsyth
651*37da2899SCharles.Forsythisalpha(c: int): int
652*37da2899SCharles.Forsyth{
653*37da2899SCharles.Forsyth	return islower(c) || isupper(c);
654*37da2899SCharles.Forsyth}
655*37da2899SCharles.Forsyth
656*37da2899SCharles.Forsythhexdigit(c: int): int
657*37da2899SCharles.Forsyth{
658*37da2899SCharles.Forsyth	if(isdigit(c))
659*37da2899SCharles.Forsyth		return c-'0';
660*37da2899SCharles.Forsyth	if('a' <= c && c <= 'f')
661*37da2899SCharles.Forsyth		return c-'a'+10;
662*37da2899SCharles.Forsyth	if('A' <= c && c <= 'F')
663*37da2899SCharles.Forsyth		return c-'A'+10;
664*37da2899SCharles.Forsyth	return -1;
665*37da2899SCharles.Forsyth}
666*37da2899SCharles.Forsyth
667*37da2899SCharles.Forsythdigits(s: ref ReStr): int
668*37da2899SCharles.Forsyth{
669*37da2899SCharles.Forsyth	n := 0;
670*37da2899SCharles.Forsyth	while(isdigit(s.peek()))
671*37da2899SCharles.Forsyth		n = 10*n + s.next() -'0';
672*37da2899SCharles.Forsyth	return n;
673*37da2899SCharles.Forsyth}
674*37da2899SCharles.Forsyth
675*37da2899SCharles.Forsythhexdigits(s: ref ReStr, n: int): int
676*37da2899SCharles.Forsyth{
677*37da2899SCharles.Forsyth	x := 0;
678*37da2899SCharles.Forsyth	for(i := 0; i < n; i++){
679*37da2899SCharles.Forsyth		v := hexdigit(s.next());
680*37da2899SCharles.Forsyth		if(v < 0)
681*37da2899SCharles.Forsyth			return -1;
682*37da2899SCharles.Forsyth		x = 16*x+v;
683*37da2899SCharles.Forsyth	}
684*37da2899SCharles.Forsyth	return x;
685*37da2899SCharles.Forsyth}
686*37da2899SCharles.Forsyth
687*37da2899SCharles.Forsythdrange(s: ref ReStr): (int, int)
688*37da2899SCharles.Forsyth{
689*37da2899SCharles.Forsyth	n1 := n2 := -1;
690*37da2899SCharles.Forsyth	if(isdigit(s.peek()))
691*37da2899SCharles.Forsyth		n1 = digits(s);
692*37da2899SCharles.Forsyth	if(s.peek() == ','){
693*37da2899SCharles.Forsyth		s.next();
694*37da2899SCharles.Forsyth		if(isdigit(s.peek()))
695*37da2899SCharles.Forsyth			n2 = digits(s);
696*37da2899SCharles.Forsyth		else
697*37da2899SCharles.Forsyth			n2 = HUGE;
698*37da2899SCharles.Forsyth	}
699*37da2899SCharles.Forsyth	else
700*37da2899SCharles.Forsyth		n2 = n1;
701*37da2899SCharles.Forsyth	if(n1 < 0 || n1 > n2)
702*37da2899SCharles.Forsyth		syntax("bad number range");
703*37da2899SCharles.Forsyth	return (n1, n2);
704*37da2899SCharles.Forsyth}
705*37da2899SCharles.Forsyth
706*37da2899SCharles.Forsyth# walk the tree adjusting pointers to refer to
707*37da2899SCharles.Forsyth# next state of the finite state machine
708*37da2899SCharles.Forsyth
709*37da2899SCharles.Forsythwalk(r: refRex, succ: refRex, ar: ref Arena)
710*37da2899SCharles.Forsyth{
711*37da2899SCharles.Forsyth	if(r==NONE)
712*37da2899SCharles.Forsyth		return;
713*37da2899SCharles.Forsyth	rex := ar.rex[r];
714*37da2899SCharles.Forsyth	case rex.kind {
715*37da2899SCharles.Forsyth	ALT =>	walk(rex.left, succ, ar);
716*37da2899SCharles.Forsyth		walk(rex.right, succ, ar);
717*37da2899SCharles.Forsyth		return;
718*37da2899SCharles.Forsyth	CAT =>	walk(rex.left, rex.right, ar);
719*37da2899SCharles.Forsyth		walk(rex.right, succ, ar);
720*37da2899SCharles.Forsyth		ar.rex[r] = ar.rex[rex.left];	# optimization
721*37da2899SCharles.Forsyth		return;
722*37da2899SCharles.Forsyth	CLO or PCLO =>
723*37da2899SCharles.Forsyth		end := newRe(OPT, r, succ, nil, ar, 0, rex.greedy); # here's the circularity
724*37da2899SCharles.Forsyth		walk(rex.left, end, ar);
725*37da2899SCharles.Forsyth	OPT =>	walk(rex.left, succ, ar);
726*37da2899SCharles.Forsyth	MNCLO =>
727*37da2899SCharles.Forsyth		ar.ptr++;
728*37da2899SCharles.Forsyth		walk(rex.left, r, ar);
729*37da2899SCharles.Forsyth	LCP =>
730*37da2899SCharles.Forsyth		ar.rex[r].left = newRe(IDLE, NONE, succ, nil, ar, 0, 0);
731*37da2899SCharles.Forsyth	}
732*37da2899SCharles.Forsyth	ar.rex[r].right = succ;
733*37da2899SCharles.Forsyth}
734*37da2899SCharles.Forsyth
735*37da2899SCharles.Forsythprtree(r: refRex, ar: ref Arena, done: list of int, ind: string): list of int
736*37da2899SCharles.Forsyth{
737*37da2899SCharles.Forsyth	sys->print("%s", ind);
738*37da2899SCharles.Forsyth	if(r==NIL){
739*37da2899SCharles.Forsyth		sys->print("NIL\n");
740*37da2899SCharles.Forsyth		return done;
741*37da2899SCharles.Forsyth	}
742*37da2899SCharles.Forsyth	if(r==NONE){
743*37da2899SCharles.Forsyth		sys->print("NONE\n");
744*37da2899SCharles.Forsyth		return done;
745*37da2899SCharles.Forsyth	}
746*37da2899SCharles.Forsyth	printed := 0;
747*37da2899SCharles.Forsyth	for(li := done; li != nil; li = tl li){
748*37da2899SCharles.Forsyth		if(hd li == r){
749*37da2899SCharles.Forsyth			printed = 1;
750*37da2899SCharles.Forsyth			break;
751*37da2899SCharles.Forsyth		}
752*37da2899SCharles.Forsyth	}
753*37da2899SCharles.Forsyth	rex := ar.rex[r];
754*37da2899SCharles.Forsyth	op := "";
755*37da2899SCharles.Forsyth	z := "Z";
756*37da2899SCharles.Forsyth	case rex.kind{
757*37da2899SCharles.Forsyth		ALT => op = "|";
758*37da2899SCharles.Forsyth		CAT => op = "and";
759*37da2899SCharles.Forsyth		DOT => op = ".";
760*37da2899SCharles.Forsyth		SET => op = "[]";
761*37da2899SCharles.Forsyth		HAT => op = "^";
762*37da2899SCharles.Forsyth		DOL => op = "$";
763*37da2899SCharles.Forsyth		NUL => op = "NUL";
764*37da2899SCharles.Forsyth		PCLO => op = "+";
765*37da2899SCharles.Forsyth		CLO => op = "*";
766*37da2899SCharles.Forsyth		OPT => op = "?";
767*37da2899SCharles.Forsyth		LPN => op = "(";
768*37da2899SCharles.Forsyth		RPN => op = ")";
769*37da2899SCharles.Forsyth		LPN0 => op = "?:";
770*37da2899SCharles.Forsyth		RPN0 => op = ":?";
771*37da2899SCharles.Forsyth		LPN1 => op = "?=";
772*37da2899SCharles.Forsyth		RPN1 => op = "=?";
773*37da2899SCharles.Forsyth		LPN2 => op = "?!";
774*37da2899SCharles.Forsyth		RPN2 => op = "!?";
775*37da2899SCharles.Forsyth		BEET => op = "\\b";
776*37da2899SCharles.Forsyth		BEEF => op = "\\B";
777*37da2899SCharles.Forsyth		MNCLO => op = "{}";
778*37da2899SCharles.Forsyth		LCP => op = "n";
779*37da2899SCharles.Forsyth		IDLE => op = "i";
780*37da2899SCharles.Forsyth		* => z[0] = rex.kind; op = z;
781*37da2899SCharles.Forsyth	}
782*37da2899SCharles.Forsyth	if(printed){
783*37da2899SCharles.Forsyth		sys->print("node %d (%d)\n", r, r);
784*37da2899SCharles.Forsyth		return done;
785*37da2899SCharles.Forsyth	}
786*37da2899SCharles.Forsyth	else{
787*37da2899SCharles.Forsyth		if(rex.ns != nil)
788*37da2899SCharles.Forsyth			sys->print("%s [%d-%d] (%d)\n", op, rex.ns.m, rex.ns.n, r);
789*37da2899SCharles.Forsyth		else
790*37da2899SCharles.Forsyth			sys->print("%s (%d)\n", op, r);
791*37da2899SCharles.Forsyth		done = r :: done;
792*37da2899SCharles.Forsyth		ind += "  ";
793*37da2899SCharles.Forsyth		done = prtree(rex.left, ar, done, ind);
794*37da2899SCharles.Forsyth		done  = prtree(rex.right, ar, done, ind);
795*37da2899SCharles.Forsyth		return done;
796*37da2899SCharles.Forsyth	}
797*37da2899SCharles.Forsyth}
798*37da2899SCharles.Forsyth
799*37da2899SCharles.Forsythcompile(e: string, flag: int): (Re, string)
800*37da2899SCharles.Forsyth{
801*37da2899SCharles.Forsyth	if(e == nil)
802*37da2899SCharles.Forsyth		return (nil, "missing expression");
803*37da2899SCharles.Forsyth	s := ref ReStr(e, 0, len e);
804*37da2899SCharles.Forsyth	ar := ref Arena(array[2*s.n] of Rex, 0, 0, (flag&1)-1);
805*37da2899SCharles.Forsyth	start := ar.start = re(s, ar);
806*37da2899SCharles.Forsyth	if(start==NIL || s.n!=0)
807*37da2899SCharles.Forsyth		syntax("invalid regular expression");
808*37da2899SCharles.Forsyth	walk(start, NIL, ar);
809*37da2899SCharles.Forsyth	# prtree(start, ar, nil, "");
810*37da2899SCharles.Forsyth	if(ar.pno < 0)
811*37da2899SCharles.Forsyth		ar.pno = 0;
812*37da2899SCharles.Forsyth	return (ar, nil);
813*37da2899SCharles.Forsyth}
814*37da2899SCharles.Forsyth
815*37da2899SCharles.Forsyth# todo: queue for epsilon and advancing transitions
816*37da2899SCharles.Forsyth
817*37da2899SCharles.ForsythNum: adt{
818*37da2899SCharles.Forsyth	ns: ref Nstate;
819*37da2899SCharles.Forsyth	m: int;
820*37da2899SCharles.Forsyth	n: int;
821*37da2899SCharles.Forsyth};
822*37da2899SCharles.ForsythGaz: adt {
823*37da2899SCharles.Forsyth	pno: int;
824*37da2899SCharles.Forsyth	beg: int;
825*37da2899SCharles.Forsyth	end: int;
826*37da2899SCharles.Forsyth};
827*37da2899SCharles.ForsythTrace: adt {
828*37da2899SCharles.Forsyth	cre: refRex;		# cursor in Re
829*37da2899SCharles.Forsyth	trans: int;		# 0 epsilon transition, 1 advancing transition
830*37da2899SCharles.Forsyth	beg: int;		# where this trace began;
831*37da2899SCharles.Forsyth	end: int;		# where this trace ended if success (-1 by default)
832*37da2899SCharles.Forsyth	gaz: list of Gaz;
833*37da2899SCharles.Forsyth	ns: list of ref Num;
834*37da2899SCharles.Forsyth};
835*37da2899SCharles.ForsythQueue: adt {
836*37da2899SCharles.Forsyth	ptr: int;
837*37da2899SCharles.Forsyth	q: array of Trace;
838*37da2899SCharles.Forsyth};
839*37da2899SCharles.Forsyth
840*37da2899SCharles.Forsythexecute(re: Re, s: string): array of (int, int)
841*37da2899SCharles.Forsyth{
842*37da2899SCharles.Forsyth	return executese(re, s, (-1,-1), 1, 1, 1, 0);
843*37da2899SCharles.Forsyth}
844*37da2899SCharles.Forsyth
845*37da2899SCharles.Forsythexecutese(re: Re, s: string, range: (int, int), bol: int, eol: int, multiline: int, ignorecase: int): array of (int,int)
846*37da2899SCharles.Forsyth{
847*37da2899SCharles.Forsyth	if(re==nil)
848*37da2899SCharles.Forsyth		return nil;
849*37da2899SCharles.Forsyth	(s0, s1) := range;
850*37da2899SCharles.Forsyth	if(s0 < 0)
851*37da2899SCharles.Forsyth		s0 = 0;
852*37da2899SCharles.Forsyth	if(s1 < 0)
853*37da2899SCharles.Forsyth		s1 = len s;
854*37da2899SCharles.Forsyth	match := 0;
855*37da2899SCharles.Forsyth	todo := ref Queue(0, array[2*re.ptr] of Trace);
856*37da2899SCharles.Forsyth	for(i:=s0; i<=s1; i++) {
857*37da2899SCharles.Forsyth		if(!match)		# no leftmost match yet
858*37da2899SCharles.Forsyth			todo.q[todo.ptr++] = Trace(re.start, 0, i, -1, nil, nil);
859*37da2899SCharles.Forsyth		for(k:=0; k<todo.ptr; k++) {
860*37da2899SCharles.Forsyth			q := todo.q[k];
861*37da2899SCharles.Forsyth			if(q.trans)
862*37da2899SCharles.Forsyth				continue;
863*37da2899SCharles.Forsyth			rex := re.rex[q.cre];
864*37da2899SCharles.Forsyth			next0 := next1 := next2 := NONE;
865*37da2899SCharles.Forsyth			case rex.kind {
866*37da2899SCharles.Forsyth			NUL =>
867*37da2899SCharles.Forsyth				next1 = rex.right;
868*37da2899SCharles.Forsyth			DOT =>
869*37da2899SCharles.Forsyth				if(i<len s && !islt(s[i]))
870*37da2899SCharles.Forsyth					next2 = rex.right;
871*37da2899SCharles.Forsyth			HAT =>
872*37da2899SCharles.Forsyth				if(i == s0 && bol)
873*37da2899SCharles.Forsyth					next1 = rex.right;
874*37da2899SCharles.Forsyth				else if(multiline && i > 0 && islt(s[i-1]))
875*37da2899SCharles.Forsyth					next1 = rex.right;
876*37da2899SCharles.Forsyth			DOL =>
877*37da2899SCharles.Forsyth				if(i == s1 && eol)
878*37da2899SCharles.Forsyth					next1 = rex.right;
879*37da2899SCharles.Forsyth				else if(multiline && i < s1 && islt(s[i]))
880*37da2899SCharles.Forsyth					next1 = rex.right;
881*37da2899SCharles.Forsyth			SET =>
882*37da2899SCharles.Forsyth				if(i<len s && member(s[i], rex.set, ignorecase))
883*37da2899SCharles.Forsyth					next2 = rex.right;
884*37da2899SCharles.Forsyth			CAT or
885*37da2899SCharles.Forsyth			PCLO =>
886*37da2899SCharles.Forsyth				next1 = rex.left;
887*37da2899SCharles.Forsyth			ALT or
888*37da2899SCharles.Forsyth			CLO or
889*37da2899SCharles.Forsyth			OPT =>
890*37da2899SCharles.Forsyth				if(rex.kind == ALT || rex.greedy){
891*37da2899SCharles.Forsyth					next0 = rex.left;
892*37da2899SCharles.Forsyth					next1 = rex.right;
893*37da2899SCharles.Forsyth				}
894*37da2899SCharles.Forsyth				else{
895*37da2899SCharles.Forsyth					next0 = rex.right;
896*37da2899SCharles.Forsyth					next1 = rex.left;
897*37da2899SCharles.Forsyth				}
898*37da2899SCharles.Forsyth			LPN =>
899*37da2899SCharles.Forsyth				next1 = rex.right;
900*37da2899SCharles.Forsyth				q.gaz = Gaz(rex.pno,i,-1)::q.gaz;
901*37da2899SCharles.Forsyth			RPN =>
902*37da2899SCharles.Forsyth				next1 = rex.right;
903*37da2899SCharles.Forsyth				for(r:=q.gaz; ; r=tl r) {
904*37da2899SCharles.Forsyth					(pno,beg1,end1) := hd r;
905*37da2899SCharles.Forsyth					if(rex.pno==pno && end1==-1) {
906*37da2899SCharles.Forsyth						q.gaz = Gaz(pno,beg1,i)::q.gaz;
907*37da2899SCharles.Forsyth						break;
908*37da2899SCharles.Forsyth					}
909*37da2899SCharles.Forsyth				}
910*37da2899SCharles.Forsyth			LPN0 or RPN0 or RPN1 or RPN2 =>
911*37da2899SCharles.Forsyth				next1 = rex.right;
912*37da2899SCharles.Forsyth			LPN1 =>
913*37da2899SCharles.Forsyth				(rpn, nxt, nre) := storetree(q.cre, re);
914*37da2899SCharles.Forsyth				m := executese(nre, s, (i, -1), bol, eol, multiline, ignorecase);
915*37da2899SCharles.Forsyth				if(m != nil && m[0].t0 == i){
916*37da2899SCharles.Forsyth					next1 = nxt;
917*37da2899SCharles.Forsyth					for(j := 1; j < len m; j++)
918*37da2899SCharles.Forsyth						if(m[j].t0 >= 0)
919*37da2899SCharles.Forsyth							q.gaz = Gaz(j, m[j].t0, m[j].t1)::q.gaz;
920*37da2899SCharles.Forsyth				}
921*37da2899SCharles.Forsyth				restoretree(LPN1, rpn, nxt, nre);
922*37da2899SCharles.Forsyth			LPN2 =>
923*37da2899SCharles.Forsyth				(rpn, nxt, nre) := storetree(q.cre, re);
924*37da2899SCharles.Forsyth				m := executese(nre, s, (i, -1), bol, eol, multiline, ignorecase);
925*37da2899SCharles.Forsyth				if(m == nil || m[0].t0 != i)
926*37da2899SCharles.Forsyth					next1 = nxt;
927*37da2899SCharles.Forsyth				restoretree(LPN2, rpn, nxt, nre);
928*37da2899SCharles.Forsyth			MNCLO =>
929*37da2899SCharles.Forsyth				num: ref Num;
930*37da2899SCharles.Forsyth
931*37da2899SCharles.Forsyth				(q.ns, num) = nextn(q.cre, q.ns, rex.ns.m, rex.ns.n, re);
932*37da2899SCharles.Forsyth				if(num.m > 0)
933*37da2899SCharles.Forsyth					next1 = rex.left;
934*37da2899SCharles.Forsyth				else if(num.n > 0){
935*37da2899SCharles.Forsyth					if(rex.greedy){
936*37da2899SCharles.Forsyth						next0 = rex.left;
937*37da2899SCharles.Forsyth						next1 = rex.right;
938*37da2899SCharles.Forsyth					}
939*37da2899SCharles.Forsyth					else{
940*37da2899SCharles.Forsyth						next0 = rex.right;
941*37da2899SCharles.Forsyth						next1 = rex.left;
942*37da2899SCharles.Forsyth					}
943*37da2899SCharles.Forsyth				}
944*37da2899SCharles.Forsyth				else{
945*37da2899SCharles.Forsyth					next1 = rex.right;
946*37da2899SCharles.Forsyth					(num.m, num.n) = (-1, -1);
947*37da2899SCharles.Forsyth				}
948*37da2899SCharles.Forsyth			LCP =>
949*37da2899SCharles.Forsyth				pno := rex.ns.m;
950*37da2899SCharles.Forsyth				(beg1, end1) := lcpar(q.gaz, pno);
951*37da2899SCharles.Forsyth				l := end1-beg1;
952*37da2899SCharles.Forsyth				if(beg1 < 0)	# undefined so succeeds
953*37da2899SCharles.Forsyth					next1 = rex.right;
954*37da2899SCharles.Forsyth				else if(i+l <= s1 && eqstr(s[beg1: end1], s[i: i+l], ignorecase)){
955*37da2899SCharles.Forsyth					(q.ns, nil) = nextn(rex.left, q.ns, l, l, re);
956*37da2899SCharles.Forsyth					next1 = rex.left;	# idle
957*37da2899SCharles.Forsyth				}
958*37da2899SCharles.Forsyth			IDLE =>
959*37da2899SCharles.Forsyth				num: ref Num;
960*37da2899SCharles.Forsyth
961*37da2899SCharles.Forsyth				(q.ns, num) = nextn(q.cre, q.ns, -1, -1, re);
962*37da2899SCharles.Forsyth				if(num.m >= 0)
963*37da2899SCharles.Forsyth					next2 = q.cre;
964*37da2899SCharles.Forsyth				else{
965*37da2899SCharles.Forsyth					next1 = rex.right;
966*37da2899SCharles.Forsyth					(num.m, num.n) = (-1, -1);
967*37da2899SCharles.Forsyth				}
968*37da2899SCharles.Forsyth			BEET =>
969*37da2899SCharles.Forsyth				if(iswordc(s, i-1) != iswordc(s, i))
970*37da2899SCharles.Forsyth					next1 = rex.right;
971*37da2899SCharles.Forsyth			BEEF =>
972*37da2899SCharles.Forsyth				if(iswordc(s, i-1) == iswordc(s, i))
973*37da2899SCharles.Forsyth					next1 = rex.right;
974*37da2899SCharles.Forsyth			* =>
975*37da2899SCharles.Forsyth				if(i<len s && (rex.kind==s[i] || (ignorecase && eqcase(rex.kind, s[i]))))
976*37da2899SCharles.Forsyth					next2 = rex.right;
977*37da2899SCharles.Forsyth			}
978*37da2899SCharles.Forsyth			l := k;
979*37da2899SCharles.Forsyth			if(next0 != NONE) {
980*37da2899SCharles.Forsyth				if(next0 != NIL)
981*37da2899SCharles.Forsyth					(k, l) = insert(next0, 0, q.beg, -1, q.gaz, q.ns, todo, k, l);
982*37da2899SCharles.Forsyth				else{
983*37da2899SCharles.Forsyth					match = 1;
984*37da2899SCharles.Forsyth					(k, l) = insert(NIL, 2, q.beg, i, q.gaz, nil, todo, k, l);
985*37da2899SCharles.Forsyth				}
986*37da2899SCharles.Forsyth			}
987*37da2899SCharles.Forsyth			if(next1 != NONE) {
988*37da2899SCharles.Forsyth				if(next1 != NIL)
989*37da2899SCharles.Forsyth					(k, l) = insert(next1, 0, q.beg, -1, q.gaz, q.ns, todo, k, l);
990*37da2899SCharles.Forsyth				else{
991*37da2899SCharles.Forsyth					match = 1;
992*37da2899SCharles.Forsyth					(k, l) = insert(NIL, 2, q.beg, i, q.gaz, nil, todo, k, l);
993*37da2899SCharles.Forsyth				}
994*37da2899SCharles.Forsyth			}
995*37da2899SCharles.Forsyth			if(next2 != NONE) {
996*37da2899SCharles.Forsyth				if(next2 != NIL)
997*37da2899SCharles.Forsyth					(k, l) = insert(next2, 1, q.beg, -1, q.gaz, q.ns, todo, k, l);
998*37da2899SCharles.Forsyth				else{
999*37da2899SCharles.Forsyth					match = 1;
1000*37da2899SCharles.Forsyth					(k, l) = insert(NIL, 2, q.beg, i+1, q.gaz, nil, todo, k, l);
1001*37da2899SCharles.Forsyth				}
1002*37da2899SCharles.Forsyth			}
1003*37da2899SCharles.Forsyth		}
1004*37da2899SCharles.Forsyth		if(!atoe(todo) && match)
1005*37da2899SCharles.Forsyth			break;
1006*37da2899SCharles.Forsyth	}
1007*37da2899SCharles.Forsyth	if(todo.ptr == 0)
1008*37da2899SCharles.Forsyth		return nil;
1009*37da2899SCharles.Forsyth	if(todo.ptr > 1)
1010*37da2899SCharles.Forsyth		rfatal(sys->sprint("todo.ptr = %d", todo.ptr));
1011*37da2899SCharles.Forsyth	if(todo.q[0].trans != 2)
1012*37da2899SCharles.Forsyth		rfatal(sys->sprint("trans = %d", todo.q[0].trans));
1013*37da2899SCharles.Forsyth	if(todo.q[0].cre != NIL)
1014*37da2899SCharles.Forsyth		rfatal(sys->sprint("cre = %d", todo.q[0].cre));
1015*37da2899SCharles.Forsyth	beg := todo.q[0].beg;
1016*37da2899SCharles.Forsyth	end := todo.q[0].end;
1017*37da2899SCharles.Forsyth	gaz := todo.q[0].gaz;
1018*37da2899SCharles.Forsyth	if(beg == -1)
1019*37da2899SCharles.Forsyth		return nil;
1020*37da2899SCharles.Forsyth	result := array[re.pno+1] of { 0 => (beg,end), * => (-1,-1) };
1021*37da2899SCharles.Forsyth	for( ; gaz!=nil; gaz=tl gaz) {
1022*37da2899SCharles.Forsyth		(pno, beg1, end1) := hd gaz;
1023*37da2899SCharles.Forsyth		(rbeg, nil) := result[pno];
1024*37da2899SCharles.Forsyth		if(rbeg==-1 && (beg1|end1)!=-1)
1025*37da2899SCharles.Forsyth			result[pno] = (beg1,end1);
1026*37da2899SCharles.Forsyth	}
1027*37da2899SCharles.Forsyth	return result;
1028*37da2899SCharles.Forsyth}
1029*37da2899SCharles.Forsyth
1030*37da2899SCharles.Forsythbetter(newbeg, newend, oldbeg, oldend: int): int
1031*37da2899SCharles.Forsyth{
1032*37da2899SCharles.Forsyth	return oldbeg==-1 || newbeg<oldbeg ||
1033*37da2899SCharles.Forsyth	       newbeg==oldbeg && newend>oldend;
1034*37da2899SCharles.Forsyth}
1035*37da2899SCharles.Forsyth
1036*37da2899SCharles.Forsythinsert(next: refRex, trans: int, tbeg: int, tend: int, tgaz: list of Gaz, tns: list of ref Num, todo: ref Queue, k: int, l: int): (int, int)
1037*37da2899SCharles.Forsyth{
1038*37da2899SCharles.Forsyth# sys->print("insert %d eps=%d beg=%d end=%d (k, l) = (%d %d) => ", next, trans, tbeg, tend, k, l);
1039*37da2899SCharles.Forsyth	for(j:=0; j<todo.ptr; j++){
1040*37da2899SCharles.Forsyth		if(todo.q[j].trans == trans){
1041*37da2899SCharles.Forsyth			if(todo.q[j].cre == next){
1042*37da2899SCharles.Forsyth				if(better(todo.q[j].beg, todo.q[j].end, tbeg, tend))
1043*37da2899SCharles.Forsyth					return (k, l);
1044*37da2899SCharles.Forsyth				else if(better(tbeg, tend, todo.q[j].beg, todo.q[j].end))
1045*37da2899SCharles.Forsyth					break;
1046*37da2899SCharles.Forsyth				else if(j < k)
1047*37da2899SCharles.Forsyth					return (k, l);
1048*37da2899SCharles.Forsyth				else
1049*37da2899SCharles.Forsyth					break;
1050*37da2899SCharles.Forsyth			}
1051*37da2899SCharles.Forsyth		}
1052*37da2899SCharles.Forsyth	}
1053*37da2899SCharles.Forsyth	if(j < k){
1054*37da2899SCharles.Forsyth		k--;
1055*37da2899SCharles.Forsyth		l--;
1056*37da2899SCharles.Forsyth	}
1057*37da2899SCharles.Forsyth	if(j < todo.ptr){
1058*37da2899SCharles.Forsyth		todo.q[j: ] = todo.q[j+1: todo.ptr];
1059*37da2899SCharles.Forsyth		todo.ptr--;
1060*37da2899SCharles.Forsyth	}
1061*37da2899SCharles.Forsyth	todo.q[l+2: ] = todo.q[l+1: todo.ptr];
1062*37da2899SCharles.Forsyth	todo.ptr++;
1063*37da2899SCharles.Forsyth	todo.q[l+1] = Trace(next, trans, tbeg, tend, tgaz, tns);
1064*37da2899SCharles.Forsyth# for(j=0; j < todo.ptr; j++) sys->print("%d(%d) ", todo.q[j].cre, todo.q[j].trans); sys->print("\n");
1065*37da2899SCharles.Forsyth	return (k, l+1);
1066*37da2899SCharles.Forsyth}
1067*37da2899SCharles.Forsyth
1068*37da2899SCharles.Forsyth# remove epsilon transitions and move advancing transitions to epsilon ones
1069*37da2899SCharles.Forsythatoe(todo: ref Queue): int
1070*37da2899SCharles.Forsyth{
1071*37da2899SCharles.Forsyth	n := 0;
1072*37da2899SCharles.Forsyth	for(j := 0; j < todo.ptr; j++){
1073*37da2899SCharles.Forsyth		if(todo.q[j].trans){
1074*37da2899SCharles.Forsyth			if(todo.q[j].trans == 1){
1075*37da2899SCharles.Forsyth				todo.q[j].trans = 0;
1076*37da2899SCharles.Forsyth				n++;
1077*37da2899SCharles.Forsyth			}
1078*37da2899SCharles.Forsyth		}
1079*37da2899SCharles.Forsyth		else{
1080*37da2899SCharles.Forsyth			todo.q[j: ] = todo.q[j+1: todo.ptr];
1081*37da2899SCharles.Forsyth			todo.ptr--;
1082*37da2899SCharles.Forsyth			j--;
1083*37da2899SCharles.Forsyth		}
1084*37da2899SCharles.Forsyth	}
1085*37da2899SCharles.Forsyth	return n;
1086*37da2899SCharles.Forsyth}
1087*37da2899SCharles.Forsyth
1088*37da2899SCharles.Forsythnextn(re: int, ln: list of ref Num, m: int, n: int, ar: ref Arena): (list of ref Num, ref Num)
1089*37da2899SCharles.Forsyth{
1090*37da2899SCharles.Forsyth	num: ref Num;
1091*37da2899SCharles.Forsyth
1092*37da2899SCharles.Forsyth	ns := ar.rex[re].ns;
1093*37da2899SCharles.Forsyth	for(l := ln; l != nil; l = tl l){
1094*37da2899SCharles.Forsyth		if((hd l).ns == ns){
1095*37da2899SCharles.Forsyth			num = hd l;
1096*37da2899SCharles.Forsyth			break;
1097*37da2899SCharles.Forsyth		}
1098*37da2899SCharles.Forsyth	}
1099*37da2899SCharles.Forsyth	if(num == nil)
1100*37da2899SCharles.Forsyth		ln = (num = ref Num(ns, -1, -1)) :: ln;
1101*37da2899SCharles.Forsyth	if(num.m == -1 && num.n == -1)
1102*37da2899SCharles.Forsyth		(num.m, num.n) = (m, n);
1103*37da2899SCharles.Forsyth	else
1104*37da2899SCharles.Forsyth		(nil, nil) = (--num.m, --num.n);
1105*37da2899SCharles.Forsyth	return (ln, num);
1106*37da2899SCharles.Forsyth}
1107*37da2899SCharles.Forsyth
1108*37da2899SCharles.ForsythASCII : con 128;
1109*37da2899SCharles.ForsythWORD : con 32;
1110*37da2899SCharles.Forsyth
1111*37da2899SCharles.Forsythmem(c: int, set: ref Set): int
1112*37da2899SCharles.Forsyth{
1113*37da2899SCharles.Forsyth	return (set.ascii[c/WORD]>>c%WORD)&1;
1114*37da2899SCharles.Forsyth}
1115*37da2899SCharles.Forsyth
1116*37da2899SCharles.Forsythmember(char: int, set: ref Set, ignorecase: int): int
1117*37da2899SCharles.Forsyth{
1118*37da2899SCharles.Forsyth	if(set.subset != nil){
1119*37da2899SCharles.Forsyth		for(l := set.subset; l != nil; l = tl l)
1120*37da2899SCharles.Forsyth			if(member(char, hd l, ignorecase))
1121*37da2899SCharles.Forsyth				return !set.neg;
1122*37da2899SCharles.Forsyth	}
1123*37da2899SCharles.Forsyth	if(char < 128){
1124*37da2899SCharles.Forsyth		if(ignorecase)
1125*37da2899SCharles.Forsyth			return (mem(tolower(char), set) || mem(toupper(char), set))^set.neg;
1126*37da2899SCharles.Forsyth		else
1127*37da2899SCharles.Forsyth			return ((set.ascii[char/WORD]>>char%WORD)&1)^set.neg;
1128*37da2899SCharles.Forsyth	}
1129*37da2899SCharles.Forsyth	for(l:=set.unicode; l!=nil; l=tl l) {
1130*37da2899SCharles.Forsyth		(beg, end) := hd l;
1131*37da2899SCharles.Forsyth		if(char>=beg && char<=end)
1132*37da2899SCharles.Forsyth			return !set.neg;
1133*37da2899SCharles.Forsyth	}
1134*37da2899SCharles.Forsyth	return set.neg;
1135*37da2899SCharles.Forsyth}
1136*37da2899SCharles.Forsyth
1137*37da2899SCharles.ForsythnewSet(s: ref ReStr): ref Set
1138*37da2899SCharles.Forsyth{
1139*37da2899SCharles.Forsyth	op: int;
1140*37da2899SCharles.Forsyth	set0: ref Set;
1141*37da2899SCharles.Forsyth
1142*37da2899SCharles.Forsyth	set := ref Set(0, array[ASCII/WORD] of {* => 0}, nil, nil);
1143*37da2899SCharles.Forsyth	if(s.peek() == '^') {
1144*37da2899SCharles.Forsyth		set.neg = 1;
1145*37da2899SCharles.Forsyth		s.next();
1146*37da2899SCharles.Forsyth	}
1147*37da2899SCharles.Forsyth	while(s.n > 0) {
1148*37da2899SCharles.Forsyth		char1 := s.next();
1149*37da2899SCharles.Forsyth		if(char1 == ']')
1150*37da2899SCharles.Forsyth			return set;
1151*37da2899SCharles.Forsyth		(char1, set0, op) = esc(s, char1, 1);
1152*37da2899SCharles.Forsyth		if(set0 != nil)
1153*37da2899SCharles.Forsyth			mergeset(set, set0);
1154*37da2899SCharles.Forsyth		char2 := char1;
1155*37da2899SCharles.Forsyth		if(s.peek() == '-') {
1156*37da2899SCharles.Forsyth			if(set0 != nil)
1157*37da2899SCharles.Forsyth				syntax("set in range");
1158*37da2899SCharles.Forsyth			s.next();
1159*37da2899SCharles.Forsyth			char2 = s.next();
1160*37da2899SCharles.Forsyth			if(char2 == ']')
1161*37da2899SCharles.Forsyth				break;
1162*37da2899SCharles.Forsyth			(char2, set0, op) = esc(s, char2, 1);
1163*37da2899SCharles.Forsyth			if(set0 != nil)
1164*37da2899SCharles.Forsyth				syntax("set in range");
1165*37da2899SCharles.Forsyth			if(char2 < char1)
1166*37da2899SCharles.Forsyth				break;
1167*37da2899SCharles.Forsyth		}
1168*37da2899SCharles.Forsyth		addset(set, char1, char2);
1169*37da2899SCharles.Forsyth	}
1170*37da2899SCharles.Forsyth	syntax("bad set");
1171*37da2899SCharles.Forsyth	return nil;
1172*37da2899SCharles.Forsyth}
1173*37da2899SCharles.Forsyth
1174*37da2899SCharles.Forsythaddset(set: ref Set, c1: int, c2: int)
1175*37da2899SCharles.Forsyth{
1176*37da2899SCharles.Forsyth	for(c := c1; c <= c2; c++){
1177*37da2899SCharles.Forsyth		if(c < ASCII)
1178*37da2899SCharles.Forsyth			set.ascii[c/WORD] |= 1<<c%WORD;
1179*37da2899SCharles.Forsyth		else{
1180*37da2899SCharles.Forsyth			set.unicode = (c, c2) :: set.unicode;
1181*37da2899SCharles.Forsyth			break;
1182*37da2899SCharles.Forsyth		}
1183*37da2899SCharles.Forsyth	}
1184*37da2899SCharles.Forsyth}
1185*37da2899SCharles.Forsyth
1186*37da2899SCharles.Forsythaddsets(set: ref Set, s: string)
1187*37da2899SCharles.Forsyth{
1188*37da2899SCharles.Forsyth	for(i := 0; i < len s; i++)
1189*37da2899SCharles.Forsyth		addset(set, s[i], s[i]);
1190*37da2899SCharles.Forsyth}
1191*37da2899SCharles.Forsyth
1192*37da2899SCharles.Forsythmergeset(set: ref Set, set0: ref Set)
1193*37da2899SCharles.Forsyth{
1194*37da2899SCharles.Forsyth	if(!set0.neg){
1195*37da2899SCharles.Forsyth		for(i := 0; i < ASCII/WORD; i++)
1196*37da2899SCharles.Forsyth			set.ascii[i] |= set0.ascii[i];
1197*37da2899SCharles.Forsyth		for(l := set0.unicode; l != nil; l = tl l)
1198*37da2899SCharles.Forsyth			set.unicode = hd l :: set.unicode;
1199*37da2899SCharles.Forsyth	}
1200*37da2899SCharles.Forsyth	else
1201*37da2899SCharles.Forsyth		set.subset = set0 :: set.subset;
1202*37da2899SCharles.Forsyth}
1203*37da2899SCharles.Forsyth
1204*37da2899SCharles.Forsythnewset(c1: int, c2: int): ref Set
1205*37da2899SCharles.Forsyth{
1206*37da2899SCharles.Forsyth	set := ref Set(0, array[ASCII/WORD] of {* => 0}, nil, nil);
1207*37da2899SCharles.Forsyth	addset(set, c1, c2);
1208*37da2899SCharles.Forsyth	return set;
1209*37da2899SCharles.Forsyth}
1210*37da2899SCharles.Forsyth
1211*37da2899SCharles.Forsythstoretree(lpn: int, re: ref Arena): (int, int, ref Arena)
1212*37da2899SCharles.Forsyth{
1213*37da2899SCharles.Forsyth	rpn: int;
1214*37da2899SCharles.Forsyth
1215*37da2899SCharles.Forsyth	rex := re.rex[lpn];
1216*37da2899SCharles.Forsyth	k := rex.kind;
1217*37da2899SCharles.Forsyth	l := 1;
1218*37da2899SCharles.Forsyth	for(;;){
1219*37da2899SCharles.Forsyth		rpn = rex.right;
1220*37da2899SCharles.Forsyth		rex = re.rex[rpn];
1221*37da2899SCharles.Forsyth		if(rex.kind == k)
1222*37da2899SCharles.Forsyth			l++;
1223*37da2899SCharles.Forsyth		else if(rex.kind == k+1 && --l == 0)
1224*37da2899SCharles.Forsyth			break;
1225*37da2899SCharles.Forsyth	}
1226*37da2899SCharles.Forsyth	re.rex[lpn].kind = LPN;
1227*37da2899SCharles.Forsyth	re.rex[rpn].kind = RPN;
1228*37da2899SCharles.Forsyth	nxt := re.rex[rpn].right;
1229*37da2899SCharles.Forsyth	re.rex[rpn].right = NIL;
1230*37da2899SCharles.Forsyth	nre := ref *re;
1231*37da2899SCharles.Forsyth	nre.start = lpn;
1232*37da2899SCharles.Forsyth	return (rpn, nxt, nre);
1233*37da2899SCharles.Forsyth}
1234*37da2899SCharles.Forsyth
1235*37da2899SCharles.Forsythrestoretree(lop: int, rpn: int, nxt: int, re: ref Arena)
1236*37da2899SCharles.Forsyth{
1237*37da2899SCharles.Forsyth	lpn := re.start;
1238*37da2899SCharles.Forsyth	re.rex[lpn].kind = lop;
1239*37da2899SCharles.Forsyth	re.rex[rpn].kind = lop+1;
1240*37da2899SCharles.Forsyth	re.rex[rpn].right = nxt;
1241*37da2899SCharles.Forsyth}
1242*37da2899SCharles.Forsyth
1243*37da2899SCharles.Forsythiswordc(s: string, i: int): int
1244*37da2899SCharles.Forsyth{
1245*37da2899SCharles.Forsyth	if(i < 0 || i >= len s)
1246*37da2899SCharles.Forsyth		return 0;
1247*37da2899SCharles.Forsyth	c := s[i];
1248*37da2899SCharles.Forsyth	return isdigit(c) || isalpha(c) || c == '_';
1249*37da2899SCharles.Forsyth}
1250*37da2899SCharles.Forsyth
1251*37da2899SCharles.Forsythlcpar(gaz: list of Gaz, pno: int): (int, int)
1252*37da2899SCharles.Forsyth{
1253*37da2899SCharles.Forsyth	for(r := gaz; r != nil; r = tl r) {
1254*37da2899SCharles.Forsyth		(pno1, beg1, end1) := hd r;
1255*37da2899SCharles.Forsyth		if(pno == pno1)
1256*37da2899SCharles.Forsyth			return (beg1, end1);
1257*37da2899SCharles.Forsyth	}
1258*37da2899SCharles.Forsyth	return (-1, -1);
1259*37da2899SCharles.Forsyth}
1260*37da2899SCharles.Forsyth
1261*37da2899SCharles.Forsytheqstr(s: string, t: string, ic: int): int
1262*37da2899SCharles.Forsyth{
1263*37da2899SCharles.Forsyth	if(!ic)
1264*37da2899SCharles.Forsyth		return s == t;
1265*37da2899SCharles.Forsyth	if(len s != len t)
1266*37da2899SCharles.Forsyth		return 0;
1267*37da2899SCharles.Forsyth	for(i := 0; i < len s; i++)
1268*37da2899SCharles.Forsyth		if(!eqcase(s[i], t[i]))
1269*37da2899SCharles.Forsyth			return 0;
1270*37da2899SCharles.Forsyth	return 1;
1271*37da2899SCharles.Forsyth}
1272*37da2899SCharles.Forsyth
1273*37da2899SCharles.Forsytheqcase(c1: int, c2: int): int
1274*37da2899SCharles.Forsyth{
1275*37da2899SCharles.Forsyth	return toupper(c1) == toupper(c2);
1276*37da2899SCharles.Forsyth}
1277*37da2899SCharles.Forsyth
1278*37da2899SCharles.Forsythsyntax(s: string)
1279*37da2899SCharles.Forsyth{
1280*37da2899SCharles.Forsyth	runtime(regex, SyntaxError, s);
1281*37da2899SCharles.Forsyth}
1282*37da2899SCharles.Forsyth
1283*37da2899SCharles.Forsythrfatal(s: string)
1284*37da2899SCharles.Forsyth{
1285*37da2899SCharles.Forsyth	runtime(regex, InternalError, s);
1286*37da2899SCharles.Forsyth}
1287