xref: /inferno-os/module/regex.m (revision 46439007cf417cbd9ac8049bb4122c890097a0fa)
1*46439007SCharles.ForsythRegex: module {
2*46439007SCharles.Forsyth
3*46439007SCharles.Forsyth	PATH:	con "/dis/lib/regex.dis";
4*46439007SCharles.Forsyth
5*46439007SCharles.Forsyth# normally imported identifiers
6*46439007SCharles.Forsyth
7*46439007SCharles.Forsyth	Re: type ref Arena;
8*46439007SCharles.Forsyth	compile:	fn(nil:string,nil:int): (Re, string);
9*46439007SCharles.Forsyth	execute:	fn(nil:Re, nil:string): array of (int, int);
10*46439007SCharles.Forsyth	executese:	fn(nil:Re, nil:string, se: (int, int), bol: int, eol: int): array of (int, int);
11*46439007SCharles.Forsyth
12*46439007SCharles.Forsyth# internal identifiers, not normally imported
13*46439007SCharles.Forsyth
14*46439007SCharles.Forsyth	ALT, CAT, DOT, SET, HAT, DOL, NUL, PCLO, CLO, OPT, LPN, RPN : con (1<<16)+iota;
15*46439007SCharles.Forsyth
16*46439007SCharles.Forsyth	refRex : type int;	# used instead of ref Rex to avoid circularity
17*46439007SCharles.Forsyth
18*46439007SCharles.Forsyth	Set: adt {				# character class
19*46439007SCharles.Forsyth		neg: int;			# 0 or 1
20*46439007SCharles.Forsyth		ascii : array of int;		# ascii members, bit array
21*46439007SCharles.Forsyth		unicode : list of (int,int);	# non-ascii char ranges
22*46439007SCharles.Forsyth	};
23*46439007SCharles.Forsyth
24*46439007SCharles.Forsyth	Rex: adt {		# node in parse of regex, or state of fsm
25*46439007SCharles.Forsyth		kind : int;	# kind of node: char or ALT, CAT, etc
26*46439007SCharles.Forsyth		left : refRex;	# left descendant
27*46439007SCharles.Forsyth		right : refRex;	# right descendant, or next state
28*46439007SCharles.Forsyth		set : ref Set;	# character class
29*46439007SCharles.Forsyth		pno : int;
30*46439007SCharles.Forsyth	};
31*46439007SCharles.Forsyth
32*46439007SCharles.Forsyth	Arena: adt {		# free store from which nodes are allocated
33*46439007SCharles.Forsyth		rex : array of Rex;
34*46439007SCharles.Forsyth		ptr : refRex;	# next available space
35*46439007SCharles.Forsyth		start : refRex;	# root of parse, or start of fsm
36*46439007SCharles.Forsyth		pno : int;
37*46439007SCharles.Forsyth	};
38*46439007SCharles.Forsyth};
39