xref: /openbsd-src/usr.bin/mg/interpreter.c (revision de8cc8edbc71bd3e3bc7fbffa27ba0e564c37d8b)
1 /*      $OpenBSD: interpreter.c,v 1.6 2021/02/24 14:17:18 lum Exp $	*/
2 /*
3  * This file is in the public domain.
4  *
5  * Author: Mark Lumsden <mark@showcomplex.com>
6  */
7 
8 /*
9  * This file attempts to add some 'scripting' functionality into mg.
10  *
11  * The initial goal is to give mg the ability to use it's existing functions
12  * and structures in a linked-up way. Hopefully resulting in user definable
13  * functions. The syntax is 'scheme' like but currently it is not a scheme
14  * interpreter.
15  *
16  * At the moment there is no manual page reference to this file. The code below
17  * is liable to change, so use at your own risk!
18  *
19  * If you do want to do some testing, you can add some lines to your .mg file
20  * like:
21  *
22  * 1. Give multiple arguments to a function that usually would accept only one:
23  * (find-file a.txt b.txt. c.txt)
24  *
25  * 2. Define a list:
26  * (define myfiles(list d.txt e.txt))
27  *
28  * 3. Use the previously defined list:
29  * (find-file myfiles)
30  *
31  * To do:
32  * 1. multiline parsing - currently only single lines supported.
33  * 2. parsing for '(' and ')' throughout whole string and evaluate correctly.
34  * 3. conditional execution.
35  * 4. define single value variables (define i 0)
36  * 5. deal with quotes around a string: "x x"
37  * 6. oh so many things....
38  * [...]
39  * n. implement user definable functions.
40  */
41 #include <sys/queue.h>
42 #include <regex.h>
43 #include <signal.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 
48 #include "def.h"
49 #include "funmap.h"
50 
51 #ifdef  MGLOG
52 #include "kbd.h"
53 #include "log.h"
54 #endif
55 
56 static int	 multiarg(char *);
57 static int	 isvar(char **, char **, int);
58 static int	 foundvar(char *);
59 static int	 foundlist(char *);
60 
61 
62 /*
63  * Structure for variables during buffer evaluation.
64  */
65 struct varentry {
66 	SLIST_ENTRY(varentry) entry;
67 	char	*name;
68 	char	*vals;
69 	int	 count;
70 };
71 SLIST_HEAD(vlisthead, varentry) varhead = SLIST_HEAD_INITIALIZER(varhead);
72 
73 /*
74  * Pass a list of arguments to a function.
75  */
76 static int
77 multiarg(char *funstr)
78 {
79 	regex_t  regex_buff;
80 	PF	 funcp;
81 	char	 excbuf[BUFSIZE], argbuf[BUFSIZE], *contbuf, tmpbuf[BUFSIZE];
82 	char	*cmdp, *argp, *fendp, *endp, *p, *t, *s = " ";
83 	int	 singlecmd = 0, spc, numparams, numspc;
84 	int	 inlist, foundlst = 0, eolst, rpar, sizof, fin;
85 
86 	contbuf = NULL;
87 	endp = strrchr(funstr, ')');
88 	if (endp == NULL) {
89 		ewprintf("No closing parenthesis found");
90 		return(FALSE);
91 	}
92 	p = endp + 1;
93 	if (*p != '\0')
94 		*p = '\0';
95 	/* we now know that string starts with '(' and ends with ')' */
96 	if (regcomp(&regex_buff, "^[(][\t ]*[)]$", REG_EXTENDED)) {
97 		regfree(&regex_buff);
98 		return (dobeep_msg("Could not compile regex"));
99 	}
100 	if (!regexec(&regex_buff, funstr, 0, NULL, 0)) {
101 		regfree(&regex_buff);
102 		return (dobeep_msg("No command found"));
103 	}
104 	/* currently there are no mg commands that don't have a letter */
105 	if (regcomp(&regex_buff, "^[(][\t ]*[A-Za-z-]+[\t ]*[)]$",
106 	    REG_EXTENDED)) {
107 		regfree(&regex_buff);
108 		return (dobeep_msg("Could not compile regex"));
109 	}
110 	if (!regexec(&regex_buff, funstr, 0, NULL, 0))
111 		singlecmd = 1;
112 
113 	regfree(&regex_buff);
114 	p = funstr + 1;		/* move past first '(' char.	*/
115 	cmdp = skipwhite(p);	/* find first char of command.	*/
116 
117 	if (singlecmd) {
118 		/* remove ')', then check for spaces at the end */
119 		cmdp[strlen(cmdp) - 1] = '\0';
120 		if ((fendp = strchr(cmdp, ' ')) != NULL)
121 			*fendp = '\0';
122 		else if ((fendp = strchr(cmdp, '\t')) != NULL)
123 			*fendp = '\0';
124 		return(excline(cmdp));
125 	}
126 	if ((fendp = strchr(cmdp, ' ')) == NULL)
127 		fendp = strchr(cmdp, '\t');
128 
129 	*fendp = '\0';
130 	/*
131 	 * If no extant mg command found, just return.
132 	 */
133 	if ((funcp = name_function(cmdp)) == NULL)
134 		return (dobeep_msgs("Unknown command: ", cmdp));
135 
136 	numparams = numparams_function(funcp);
137 	if (numparams == 0)
138 		return (dobeep_msgs("Command takes no arguments: ", cmdp));
139 
140 	/* now find the first argument */
141 	if (fendp)
142 		p = fendp + 1;
143 	else
144 		p = "";
145 	p = skipwhite(p);
146 	if (strlcpy(argbuf, p, sizeof(argbuf)) >= sizeof(argbuf))
147 		return (dobeep_msg("strlcpy error"));
148 	argp = argbuf;
149 	numspc = spc = 1; /* initially fake a space so we find first argument */
150 	inlist = eolst = fin = rpar = 0;
151 
152 	for (p = argp; fin == 0; p++) {
153 #ifdef  MGLOG
154 		mglog_execbuf("", excbuf, argbuf, argp, eolst, inlist, cmdp,
155 		    p, contbuf);
156 #endif
157 		if (foundlst) {
158 			foundlst = 0;
159 			p--;	/* otherwise 1st arg is missed from list. */
160 		}
161 		if (*p == ')') {
162 			rpar = 1;
163 			*p = '\0';
164 		}
165 		if (*p == ' ' || *p == '\t' || *p == '\0') {
166 			if (spc == 1)
167 				continue;
168 			if (spc == 0 && (numspc % numparams == 0)) {
169 				if (*p == '\0')
170 					eolst = 1;
171 				else
172 					eolst = 0;
173 				*p = '\0'; 	/* terminate arg string */
174 				endp = p + 1;
175 				excbuf[0] = '\0';
176 				/* Is arg a var? */
177 				if (!inlist) {
178 					sizof = sizeof(tmpbuf);
179 					t = tmpbuf;
180 					if (isvar(&argp, &t, sizof)) {
181 						if ((contbuf = strndup(endp,
182 						    BUFSIZE)) == NULL)
183 							return(FALSE);
184 						*p = ' ';
185 						(void)(strlcpy(argbuf, tmpbuf,
186 						    sizof) >= sizof);
187 						p = argp = argbuf;
188 						spc = 1;
189 						foundlst = inlist = 1;
190 						continue;
191 					}
192 				}
193 				if (strlcpy(excbuf, cmdp, sizeof(excbuf))
194 				     >= sizeof(excbuf))
195 					return (dobeep_msg("strlcpy error"));
196 				if (strlcat(excbuf, s, sizeof(excbuf))
197 				    >= sizeof(excbuf))
198 					return (dobeep_msg("strlcat error"));
199 				if (strlcat(excbuf, argp, sizeof(excbuf))
200 				    >= sizeof(excbuf))
201 					return (dobeep_msg("strlcat error"));
202 
203 				excline(excbuf);
204 #ifdef  MGLOG
205 				mglog_execbuf("  ", excbuf, argbuf, argp,
206 				    eolst, inlist, cmdp, p, contbuf);
207 #endif
208 				*p = ' ';	/* so 'for' loop can continue */
209 				if (eolst) {
210 					if (contbuf != NULL) {
211 						(void)strlcpy(argbuf, contbuf,
212 						    sizeof(argbuf));
213 						free(contbuf);
214 						contbuf = NULL;
215 						p = argp = argbuf;
216 						foundlst = 1;
217 						inlist = 0;
218 						if (rpar)
219 							fin = 1;
220 						continue;
221 					}
222 					spc = 1;
223 					inlist = 0;
224 				}
225 				if (eolst && rpar)
226 					fin = 1;
227 			}
228 			numspc++;
229 			spc = 1;
230 		} else {
231 			if (spc == 1)
232 				if ((numparams == 1) ||
233 				    ((numspc + 1) % numparams) == 0)
234 					argp = p;
235 			spc = 0;
236 		}
237 	}
238 	return (TRUE);
239 }
240 
241 
242 /*
243  * Is an item a value or a variable?
244  */
245 static int
246 isvar(char **argp, char **tmpbuf, int sizof)
247 {
248 	struct varentry *v1 = NULL;
249 
250 	if (SLIST_EMPTY(&varhead))
251 		return (FALSE);
252 #ifdef  MGLOG
253 	mglog_isvar(*tmpbuf, *argp, sizof);
254 #endif
255 	SLIST_FOREACH(v1, &varhead, entry) {
256 		if (strcmp(*argp, v1->name) == 0) {
257 			(void)(strlcpy(*tmpbuf, v1->vals, sizof) >= sizof);
258 			return (TRUE);
259 		}
260 	}
261 	return (FALSE);
262 }
263 
264 
265 /*
266  * The (define string _must_ adhere to the regex in foundparen.
267  * This is not the correct way to do parsing but it does highlight
268  * the issues.
269  */
270 static int
271 foundlist(char *defstr)
272 {
273 	struct varentry *vt, *v1 = NULL;
274 	const char	 e[2] = "e", t[2] = "t";
275 	char		*p, *vnamep, *vendp = NULL, *valp, *o;
276 	int		 spc;
277 
278 
279 	p = defstr + 1;         /* move past first '(' char.    */
280 	p = skipwhite(p);    	/* find first char of 'define'. */
281 	p = strstr(p, e);	/* find first 'e' in 'define'.	*/
282 	p = strstr(++p, e);	/* find second 'e' in 'define'.	*/
283 	p++;			/* move past second 'e'.	*/
284 	vnamep = skipwhite(p);  /* find first char of var name. */
285 	vendp = vnamep;
286 
287 	/* now find the end of the list name */
288 	while (1) {
289 		++vendp;
290 		if (*vendp == '(' || *vendp == ' ' || *vendp == '\t')
291 			break;
292 	}
293 	*vendp = '\0';
294 	/*
295 	 * Check list name is not an existing function.
296 	 * Although could this be allowed? Shouldn't context dictate?
297 	 */
298 	if (name_function(vnamep) != NULL)
299 		return(dobeep_msgs("Variable/function name clash:", vnamep));
300 
301 	p = ++vendp;
302 	p = strstr(p, t);	/* find 't' in 'list'.	*/
303 	valp = skipwhite(++p);	/* find first value	*/
304 	/*
305 	 * Now we have the name of the list starting at 'vnamep',
306 	 * and the first value is at 'valp', record the details
307 	 * in a linked list. But first remove variable, if existing already.
308 	 */
309 	if (!SLIST_EMPTY(&varhead)) {
310 		SLIST_FOREACH_SAFE(v1, &varhead, entry, vt) {
311 			if (strcmp(vnamep, v1->name) == 0)
312 				SLIST_REMOVE(&varhead, v1, varentry, entry);
313 		}
314 	}
315 	if ((v1 = malloc(sizeof(struct varentry))) == NULL)
316 		return (ABORT);
317 	SLIST_INSERT_HEAD(&varhead, v1, entry);
318 	if ((v1->name = strndup(vnamep, BUFSIZE)) == NULL)
319 		return(dobeep_msg("strndup error"));
320 	v1->count = 0;
321 	vendp = NULL;
322 
323 	/* initially fake a space so we find first value */
324 	spc = 1;
325 	/* now loop through values in list value string while counting them */
326 	for (p = valp; *p != '\0'; p++) {
327 		if (*p == ' ' || *p == '\t') {
328 			if (spc == 0)
329 				vendp = p;
330 			spc = 1;
331 		} else if (*p == ')') {
332 			o = p - 1;
333 			if (*o != ' ' && *o != '\t')
334 				vendp = p;
335 			break;
336 		} else {
337 			if (spc == 1)
338 				v1->count++;
339 			spc = 0;
340 		}
341 	}
342 	if (vendp)
343 		*vendp = '\0';
344 
345 	if ((v1->vals = strndup(valp, BUFSIZE)) == NULL)
346 		return(dobeep_msg("strndup error"));
347 
348 	return (TRUE);
349 }
350 
351 
352 /*
353  * to do
354  */
355 static int
356 foundvar(char *funstr)
357 {
358 	ewprintf("to do");
359 	return (TRUE);
360 }
361 
362 /*
363  * Finished with evaluation, so clean up any vars.
364  */
365 int
366 clearvars(void)
367 {
368 	struct varentry	*v1 = NULL;
369 
370 	while (!SLIST_EMPTY(&varhead)) {
371 		v1 = SLIST_FIRST(&varhead);
372 		SLIST_REMOVE_HEAD(&varhead, entry);
373 		free(v1->vals);
374 		free(v1->name);
375 		free(v1);
376 	}
377 	return (FALSE);
378 }
379 
380 /*
381  * Line has a '(' as the first non-white char.
382  * Do some very basic parsing of line with '(' as the first character.
383  * Multi-line not supported at the moment, To do.
384  */
385 int
386 foundparen(char *funstr)
387 {
388 	regex_t  regex_buff;
389 	char	*regs;
390 
391 	/* Does the line have a list 'define' like: */
392 	/* (define alist(list 1 2 3 4)) */
393 	regs = "^[(][\t ]*define[\t ]+[^\t (]+[\t ]*[(][\t ]*list[\t ]+"\
394 		"[^\t ]+.*[)][\t ]*[)]";
395 	if (regcomp(&regex_buff, regs, REG_EXTENDED)) {
396 		regfree(&regex_buff);
397 		return(dobeep_msg("Could not compile regex"));
398 	}
399 	if (!regexec(&regex_buff, funstr, 0, NULL, 0)) {
400 		regfree(&regex_buff);
401 		return(foundlist(funstr));
402 	}
403 	/* Does the line have a single variable 'define' like: */
404 	/* (define i 0) */
405 	regs = "^[(][\t ]*define[\t ]+[^\t (]+[\t ]*[^\t (]+[\t ]*[)]";
406 	if (regcomp(&regex_buff, regs, REG_EXTENDED)) {
407 		regfree(&regex_buff);
408 		return(dobeep_msg("Could not compile regex"));
409 	}
410 	if (!regexec(&regex_buff, funstr, 0, NULL, 0)) {
411 		regfree(&regex_buff);
412 		return(foundvar(funstr));
413 	}
414 	/* Does the line have an unrecognised 'define' */
415 	regs = "^[(][\t ]*define[\t ]+";
416 	if (regcomp(&regex_buff, regs, REG_EXTENDED)) {
417 		regfree(&regex_buff);
418 		return(dobeep_msg("Could not compile regex"));
419 	}
420 	if (!regexec(&regex_buff, funstr, 0, NULL, 0)) {
421 		regfree(&regex_buff);
422 		return(dobeep_msg("Invalid use of define"));
423 	}
424 	regfree(&regex_buff);
425 	return(multiarg(funstr));
426 }
427