xref: /openbsd-src/usr.bin/m4/gnum4.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /* $OpenBSD: gnum4.c,v 1.9 2001/07/28 05:36:18 pvalchev Exp $ */
2 
3 /*
4  * Copyright (c) 1999 Marc Espie
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * functions needed to support gnu-m4 extensions, including a fake freezing
30  */
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <ctype.h>
36 #include <paths.h>
37 #include <regex.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <unistd.h>
45 #include "mdef.h"
46 #include "stdd.h"
47 #include "extern.h"
48 
49 
50 int mimic_gnu = 0;
51 
52 /*
53  * Support for include path search
54  * First search in the the current directory.
55  * If not found, and the path is not absolute, include path kicks in.
56  * First, -I options, in the order found on the command line.
57  * Then M4PATH env variable
58  */
59 
60 struct path_entry {
61 	char *name;
62 	struct path_entry *next;
63 } *first, *last;
64 
65 static struct path_entry *new_path_entry __P((const char *));
66 static void ensure_m4path __P((void));
67 static struct input_file *dopath __P((struct input_file *, const char *));
68 
69 static struct path_entry *
70 new_path_entry(dirname)
71 	const char *dirname;
72 {
73 	struct path_entry *n;
74 
75 	n = malloc(sizeof(struct path_entry));
76 	if (!n)
77 		errx(1, "out of memory");
78 	n->name = strdup(dirname);
79 	if (!n->name)
80 		errx(1, "out of memory");
81 	n->next = 0;
82 	return n;
83 }
84 
85 void
86 addtoincludepath(dirname)
87 	const char *dirname;
88 {
89 	struct path_entry *n;
90 
91 	n = new_path_entry(dirname);
92 
93 	if (last) {
94 		last->next = n;
95 		last = n;
96 	}
97 	else
98 		last = first = n;
99 }
100 
101 static void
102 ensure_m4path()
103 {
104 	static int envpathdone = 0;
105 	char *envpath;
106 	char *sweep;
107 	char *path;
108 
109 	if (envpathdone)
110 		return;
111 	envpathdone = TRUE;
112 	envpath = getenv("M4PATH");
113 	if (!envpath)
114 		return;
115 	/* for portability: getenv result is read-only */
116 	envpath = strdup(envpath);
117 	if (!envpath)
118 		errx(1, "out of memory");
119 	for (sweep = envpath;
120 	    (path = strsep(&sweep, ":")) != NULL;)
121 	    addtoincludepath(path);
122 	free(envpath);
123 }
124 
125 static
126 struct input_file *
127 dopath(i, filename)
128 	struct input_file *i;
129 	const char *filename;
130 {
131 	char path[MAXPATHLEN];
132 	struct path_entry *pe;
133 	FILE *f;
134 
135 	for (pe = first; pe; pe = pe->next) {
136 		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
137 		if ((f = fopen(path, "r")) != 0) {
138 			set_input(i, f, path);
139 			return i;
140 		}
141 	}
142 	return NULL;
143 }
144 
145 struct input_file *
146 fopen_trypath(i, filename)
147 	struct input_file *i;
148 	const char *filename;
149 {
150 	FILE *f;
151 
152 	f = fopen(filename, "r");
153 	if (f != NULL) {
154 		set_input(i, f, filename);
155 		return i;
156 	}
157 	if (filename[0] == '/')
158 		return NULL;
159 
160 	ensure_m4path();
161 
162 	return dopath(i, filename);
163 }
164 
165 void
166 doindir(argv, argc)
167 	const char *argv[];
168 	int argc;
169 {
170 	ndptr p;
171 
172 	p = lookup(argv[2]);
173 	if (p == NULL)
174 		errx(1, "undefined macro %s", argv[2]);
175 	argv[1] = p->defn;
176 	if (p->type == MACRTYPE)
177 		expand(argv+1, argc-1);
178 	else
179 		eval(argv+1, argc-1, p->type);
180 }
181 
182 void
183 dobuiltin(argv, argc)
184 	const char *argv[];
185 	int argc;
186 {
187 	int n;
188 	argv[1] = NULL;
189 	n = builtin_type(argv[2]);
190 	if (n != -1)
191 		eval(argv+1, argc-1, n);
192 	else
193 		errx(1, "unknown builtin %s", argv[2]);
194 }
195 
196 
197 /* We need some temporary buffer space, as pb pushes BACK and substitution
198  * proceeds forward... */
199 static char *buffer;
200 static size_t bufsize = 0;
201 static size_t current = 0;
202 
203 static void addchars __P((const char *, size_t));
204 static void addchar __P((char));
205 static char *twiddle __P((const char *));
206 static char *getstring __P((void));
207 static void exit_regerror __P((int, regex_t *));
208 static void do_subst __P((const char *, regex_t *, const char *, regmatch_t *));
209 static void do_regexpindex __P((const char *, regex_t *, regmatch_t *));
210 static void do_regexp __P((const char *, regex_t *, const char *, regmatch_t *));
211 static void add_sub __P((int, const char *, regex_t *, regmatch_t *));
212 static void add_replace __P((const char *, regex_t *, const char *, regmatch_t *));
213 
214 static void
215 addchars(c, n)
216 	const char *c;
217 	size_t n;
218 {
219 	if (n == 0)
220 		return;
221 	if (current + n > bufsize) {
222 		if (bufsize == 0)
223 			bufsize = 1024;
224 		else
225 			bufsize *= 2;
226 		buffer = realloc(buffer, bufsize);
227 		if (buffer == NULL)
228 			errx(1, "out of memory");
229 	}
230 	memcpy(buffer+current, c, n);
231 	current += n;
232 }
233 
234 static void
235 addchar(c)
236 	char c;
237 {
238 	if (current +1 > bufsize) {
239 		if (bufsize == 0)
240 			bufsize = 1024;
241 		else
242 			bufsize *= 2;
243 		buffer = realloc(buffer, bufsize);
244 		if (buffer == NULL)
245 			errx(1, "out of memory");
246 	}
247 	buffer[current++] = c;
248 }
249 
250 static char *
251 getstring()
252 {
253 	addchar('\0');
254 	current = 0;
255 	return buffer;
256 }
257 
258 
259 static void
260 exit_regerror(er, re)
261 	int er;
262 	regex_t *re;
263 {
264 	size_t 	errlen;
265 	char 	*errbuf;
266 
267 	errlen = regerror(er, re, NULL, 0);
268 	errbuf = xalloc(errlen);
269 	regerror(er, re, errbuf, errlen);
270 	errx(1, "regular expression error: %s", errbuf);
271 }
272 
273 static void
274 add_sub(n, string, re, pm)
275 	int n;
276 	const char *string;
277 	regex_t *re;
278 	regmatch_t *pm;
279 {
280 	if (n > re->re_nsub)
281 		warnx("No subexpression %d", n);
282 	/* Subexpressions that did not match are
283 	 * not an error.  */
284 	else if (pm[n].rm_so != -1 &&
285 	    pm[n].rm_eo != -1) {
286 		addchars(string + pm[n].rm_so,
287 			pm[n].rm_eo - pm[n].rm_so);
288 	}
289 }
290 
291 /* Add replacement string to the output buffer, recognizing special
292  * constructs and replacing them with substrings of the original string.
293  */
294 static void
295 add_replace(string, re, replace, pm)
296 	const char *string;
297 	regex_t *re;
298 	const char *replace;
299 	regmatch_t *pm;
300 {
301 	const char *p;
302 
303 	for (p = replace; *p != '\0'; p++) {
304 		if (*p == '&' && !mimic_gnu) {
305 			add_sub(0, string, re, pm);
306 			continue;
307 		}
308 		if (*p == '\\') {
309 			if (p[1] == '\\') {
310 				addchar(p[1]);
311 				continue;
312 			}
313 			if (p[1] == '&') {
314 				if (mimic_gnu)
315 					add_sub(0, string, re, pm);
316 				else
317 					addchar(p[1]);
318 				p++;
319 				continue;
320 			}
321 			if (isdigit(p[1])) {
322 				add_sub(*(++p) - '0', string, re, pm);
323 				continue;
324 			}
325 		}
326 	    	addchar(*p);
327 	}
328 }
329 
330 static void
331 do_subst(string, re, replace, pm)
332 	const char *string;
333 	regex_t *re;
334 	const char *replace;
335 	regmatch_t *pm;
336 {
337 	int error;
338 	regoff_t last_match = -1;
339 
340 	while ((error = regexec(re, string, re->re_nsub+1, pm, 0)) == 0) {
341 
342 		/* NULL length matches are special... We use the `vi-mode'
343 		 * rule: don't allow a NULL-match at the last match
344 		 * position.
345 		 */
346 		if (pm[0].rm_so == pm[0].rm_eo && pm[0].rm_so == last_match) {
347 			if (*string == '\0')
348 				return;
349 			addchar(*string);
350 			string++;
351 			continue;
352 		}
353 		last_match = pm[0].rm_so;
354 		addchars(string, last_match);
355 		add_replace(string, re, replace, pm);
356 		string += pm[0].rm_eo;
357 	}
358 	if (error != REG_NOMATCH)
359 		exit_regerror(error, re);
360 	pbstr(string);
361 }
362 
363 static void
364 do_regexp(string, re, replace, pm)
365 	const char *string;
366 	regex_t *re;
367 	const char *replace;
368 	regmatch_t *pm;
369 {
370 	int error;
371 
372 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
373 	case 0:
374 		add_replace(string, re, replace, pm);
375 		pbstr(getstring());
376 		break;
377 	case REG_NOMATCH:
378 		break;
379 	default:
380 		exit_regerror(error, re);
381 	}
382 }
383 
384 static void
385 do_regexpindex(string, re, pm)
386 	const char *string;
387 	regex_t *re;
388 	regmatch_t *pm;
389 {
390 	int error;
391 
392 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
393 	case 0:
394 		pbunsigned(pm[0].rm_so);
395 		break;
396 	case REG_NOMATCH:
397 		pbnum(-1);
398 		break;
399 	default:
400 		exit_regerror(error, re);
401 	}
402 }
403 
404 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
405  * says. So we twiddle with the regexp before passing it to regcomp.
406  */
407 static char *
408 twiddle(p)
409 	const char *p;
410 {
411 	/* This could use strcspn for speed... */
412 	while (*p != '\0') {
413 		if (*p == '\\' && (p[1] == '(' || p[1] == ')')) {
414 			addchar(p[1]);
415 			p+=2;
416 			continue;
417 		}
418 		if (*p == '(' || *p == ')')
419 			addchar('\\');
420 
421 		addchar(*p);
422 		p++;
423 	}
424 	return getstring();
425 }
426 
427 /* patsubst(string, regexp, opt replacement) */
428 /* argv[2]: string
429  * argv[3]: regexp
430  * argv[4]: opt rep
431  */
432 void
433 dopatsubst(argv, argc)
434 	const char *argv[];
435 	int argc;
436 {
437 	int error;
438 	regex_t re;
439 	regmatch_t *pmatch;
440 
441 	if (argc <= 3) {
442 		warnx("Too few arguments to patsubst");
443 		return;
444 	}
445 	error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3],
446 	    REG_EXTENDED);
447 	if (error != 0)
448 		exit_regerror(error, &re);
449 
450 	pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1));
451 	do_subst(argv[2], &re,
452 	    argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch);
453 	pbstr(getstring());
454 	free(pmatch);
455 	regfree(&re);
456 }
457 
458 void
459 doregexp(argv, argc)
460 	const char *argv[];
461 	int argc;
462 {
463 	int error;
464 	regex_t re;
465 	regmatch_t *pmatch;
466 
467 	if (argc <= 3) {
468 		warnx("Too few arguments to regexp");
469 		return;
470 	}
471 	error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3],
472 	    REG_EXTENDED);
473 	if (error != 0)
474 		exit_regerror(error, &re);
475 
476 	pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1));
477 	if (argv[4] == NULL || argc == 4)
478 		do_regexpindex(argv[2], &re, pmatch);
479 	else
480 		do_regexp(argv[2], &re, argv[4], pmatch);
481 	free(pmatch);
482 	regfree(&re);
483 }
484 
485 void
486 doesyscmd(cmd)
487 	const char *cmd;
488 {
489 	int p[2];
490 	pid_t pid, cpid;
491 	char *argv[4];
492 	int cc;
493 	int status;
494 
495 	/* Follow gnu m4 documentation: first flush buffers. */
496 	fflush(NULL);
497 
498 	argv[0] = "sh";
499 	argv[1] = "-c";
500 	argv[2] = (char *)cmd;
501 	argv[3] = NULL;
502 
503 	/* Just set up standard output, share stderr and stdin with m4 */
504 	if (pipe(p) == -1)
505 		err(1, "bad pipe");
506 	switch(cpid = fork()) {
507 	case -1:
508 		err(1, "bad fork");
509 		/* NOTREACHED */
510 	case 0:
511 		(void) close(p[0]);
512 		(void) dup2(p[1], 1);
513 		(void) close(p[1]);
514 		execv(_PATH_BSHELL, argv);
515 		exit(1);
516 	default:
517 		/* Read result in two stages, since m4's buffer is
518 		 * pushback-only. */
519 		(void) close(p[1]);
520 		do {
521 			char result[BUFSIZE];
522 			cc = read(p[0], result, sizeof result);
523 			if (cc > 0)
524 				addchars(result, cc);
525 		} while (cc > 0 || (cc == -1 && errno == EINTR));
526 
527 		(void) close(p[0]);
528 		while ((pid = wait(&status)) != cpid && pid >= 0)
529 			continue;
530 		pbstr(getstring());
531 	}
532 }
533