xref: /inferno-os/appl/cmd/sh/regex.b (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1implement Shellbuiltin;
2
3include "sys.m";
4	sys: Sys;
5include "draw.m";
6include "sh.m";
7	sh: Sh;
8	Listnode, Context: import sh;
9	myself: Shellbuiltin;
10include "regex.m";
11	regex: Regex;
12
13initbuiltin(ctxt: ref Context, shmod: Sh): string
14{
15	sys = load Sys Sys->PATH;
16	sh = shmod;
17	myself = load Shellbuiltin "$self";
18	if (myself == nil)
19		ctxt.fail("bad module", sys->sprint("regex: cannot load self: %r"));
20	regex = load Regex Regex->PATH;
21	if (regex == nil)
22		ctxt.fail("bad module",
23			sys->sprint("regex: cannot load %s: %r", Regex->PATH));
24	ctxt.addbuiltin("match", myself);
25	ctxt.addsbuiltin("re", myself);
26	return nil;
27}
28
29getself(): Shellbuiltin
30{
31	return myself;
32}
33
34runbuiltin(ctxt: ref Context, nil: Sh,
35			argv: list of ref Listnode, nil: int): string
36{
37	case (hd argv).word {
38	"match" =>
39		return builtin_match(ctxt, argv);
40	}
41	return nil;
42}
43
44whatis(nil: ref Sh->Context, nil: Sh, nil: string, nil: int): string
45{
46	return nil;
47}
48
49runsbuiltin(ctxt: ref Context, nil: Sh,
50			argv: list of ref Listnode): list of ref Listnode
51{
52	name := (hd argv).word;
53	case name {
54	"re" =>
55		return sbuiltin_re(ctxt, argv);
56	}
57	return nil;
58}
59
60sbuiltin_re(ctxt: ref Context, argv: list of ref Listnode): list of ref Listnode
61{
62	if (tl argv == nil)
63		ctxt.fail("usage", "usage: re (g|v|s|sg|m|mg|M) arg...");
64	argv = tl argv;
65	w := (hd argv).word;
66	case w {
67	"g" or
68	"v" =>
69		return sbuiltin_sel(ctxt, argv, w == "v");
70	"s" or
71	"sg" =>
72		return sbuiltin_sub(ctxt, argv, w == "sg");
73	"m" =>
74		return sbuiltin_match(ctxt, argv, 0);
75	"mg" =>
76		return sbuiltin_gmatch(ctxt, argv);
77	"M" =>
78		return sbuiltin_match(ctxt, argv, 1);
79	* =>
80		ctxt.fail("usage", "usage: re (g|v|s|sg|m|mg|M) arg...");
81		return nil;
82	}
83}
84
85sbuiltin_match(ctxt: ref Context, argv: list of ref Listnode, aflag: int): list of ref Listnode
86{
87	if (len argv != 3)
88		ctxt.fail("usage", "usage: re " + (hd argv).word + " arg");
89	argv = tl argv;
90	re := getregex(ctxt, word(hd argv), aflag);
91	w := word(hd tl argv);
92	a := regex->execute(re, w);
93	if (a == nil)
94		return nil;
95	ret: list of ref Listnode;
96	for (i := len a - 1; i >= 0; i--)
97		ret = ref Listnode(nil, elem(a, i, w)) :: ret;
98	return ret;
99}
100
101sbuiltin_gmatch(ctxt: ref Context, argv: list of ref Listnode): list of ref Listnode
102{
103	if (len argv != 3)
104		ctxt.fail("usage", "usage: re mg arg");
105	argv = tl argv;
106	re := getregex(ctxt, word(hd argv), 0);
107	w := word(hd tl argv);
108	ret, nret: list of ref Listnode;
109	beg := 0;
110	while ((a := regex->executese(re, w, (beg, len w), beg == 0, 1)) != nil) {
111		(s, e) := a[0];
112		ret = ref Listnode(nil, w[s:e]) :: ret;
113		if (s == e)
114			break;
115		beg = e;
116	}
117	for (; ret != nil; ret = tl ret)
118		nret = hd ret :: nret;
119	return nret;
120}
121
122sbuiltin_sel(ctxt: ref Context, argv: list of ref Listnode, vflag: int): list of ref Listnode
123{
124	cmd := (hd argv).word;
125	argv = tl argv;
126	if (argv == nil)
127		ctxt.fail("usage", "usage: " + cmd + " regex [arg...]");
128	re := getregex(ctxt, word(hd argv), 0);
129	ret, nret: list of ref Listnode;
130	for (argv = tl argv; argv != nil; argv = tl argv)
131		if (vflag ^ (regex->execute(re, word(hd argv)) != nil))
132			ret = hd argv :: ret;
133	for (; ret != nil; ret = tl ret)
134		nret = hd ret :: nret;
135	return nret;
136}
137
138sbuiltin_sub(ctxt: ref Context, argv: list of ref Listnode, gflag: int): list of ref Listnode
139{
140	cmd := (hd argv).word;
141	argv = tl argv;
142	if (argv == nil || tl argv == nil)
143		ctxt.fail("usage", "usage: " + cmd + " regex subs [arg...]");
144	re := getregex(ctxt, word(hd argv), 1);
145	subs := word(hd tl argv);
146	ret, nret: list of ref Listnode;
147	for (argv = tl tl argv; argv != nil; argv = tl argv)
148		ret = ref Listnode(nil, substitute(word(hd argv), re, subs, gflag).t1) :: ret;
149	for (; ret != nil; ret = tl ret)
150		nret = hd ret :: nret;
151	return nret;
152}
153
154builtin_match(ctxt: ref Context, argv: list of ref Listnode): string
155{
156	if (tl argv == nil)
157		ctxt.fail("usage", "usage: match regexp [arg...]");
158	re := getregex(ctxt, word(hd tl argv), 0);
159	for (argv = tl tl argv; argv != nil; argv = tl argv)
160		if (regex->execute(re, word(hd argv)) == nil)
161			return "no match";
162	return nil;
163}
164
165substitute(w: string, re: Regex->Re, subs: string, gflag: int): (int, string)
166{
167	matched := 0;
168	s := "";
169	beg := 0;
170	do {
171		a := regex->executese(re, w, (beg, len w), beg == 0, 1);
172		if (a == nil)
173			break;
174		matched = 1;
175		s += w[beg:a[0].t0];
176		for (i := 0; i < len subs; i++) {
177			if (subs[i] != '\\' || i == len subs - 1)
178				s[len s] = subs[i];
179			else {
180				c := subs[++i];
181				if (c < '0' || c > '9')
182					s[len s] = c;
183				else
184					s += elem(a, c - '0', w);
185			}
186		}
187		beg = a[0].t1;
188		if (a[0].t0 == a[0].t1)
189			break;
190	} while (gflag && beg < len w);
191	return (matched, s + w[beg:]);
192}
193
194elem(a: array of (int, int), i: int, w: string): string
195{
196	if (i < 0 || i >= len a)
197		return nil;		# XXX could raise failure here. (invalid backslash escape)
198	(s, e) := a[i];
199	if (s == -1)
200		return nil;
201	return w[s:e];
202}
203
204# XXX could do regex caching here if it was worth it.
205getregex(ctxt: ref Context, res: string, flag: int): Regex->Re
206{
207	(re, err) := regex->compile(res, flag);
208	if (re == nil)
209		ctxt.fail("bad regex", "regex: bad regex \"" + res + "\": " + err);
210	return re;
211}
212
213word(n: ref Listnode): string
214{
215	if (n.word != nil)
216		return n.word;
217	if (n.cmd != nil)
218		n.word = sh->cmd2string(n.cmd);
219	return n.word;
220}
221