xref: /openbsd-src/usr.bin/m4/gnum4.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /* $OpenBSD: gnum4.c,v 1.50 2015/04/29 00:13:26 millert Exp $ */
2 
3 /*
4  * Copyright (c) 1999 Marc Espie
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * functions needed to support gnu-m4 extensions, including a fake freezing
30  */
31 
32 #include <sys/types.h>
33 #include <sys/wait.h>
34 #include <ctype.h>
35 #include <err.h>
36 #include <paths.h>
37 #include <regex.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <unistd.h>
45 #include <limits.h>
46 #include "mdef.h"
47 #include "stdd.h"
48 #include "extern.h"
49 
50 
51 int mimic_gnu = 0;
52 
53 /*
54  * Support for include path search
55  * First search in the current directory.
56  * If not found, and the path is not absolute, include path kicks in.
57  * First, -I options, in the order found on the command line.
58  * Then M4PATH env variable
59  */
60 
61 struct path_entry {
62 	char *name;
63 	struct path_entry *next;
64 } *first, *last;
65 
66 static struct path_entry *new_path_entry(const char *);
67 static void ensure_m4path(void);
68 static struct input_file *dopath(struct input_file *, const char *);
69 
70 static struct path_entry *
71 new_path_entry(const char *dirname)
72 {
73 	struct path_entry *n;
74 
75 	n = malloc(sizeof(struct path_entry));
76 	if (!n)
77 		errx(1, "out of memory");
78 	n->name = xstrdup(dirname);
79 	n->next = 0;
80 	return n;
81 }
82 
83 void
84 addtoincludepath(const char *dirname)
85 {
86 	struct path_entry *n;
87 
88 	n = new_path_entry(dirname);
89 
90 	if (last) {
91 		last->next = n;
92 		last = n;
93 	}
94 	else
95 		last = first = n;
96 }
97 
98 static void
99 ensure_m4path()
100 {
101 	static int envpathdone = 0;
102 	char *envpath;
103 	char *sweep;
104 	char *path;
105 
106 	if (envpathdone)
107 		return;
108 	envpathdone = TRUE;
109 	envpath = getenv("M4PATH");
110 	if (!envpath)
111 		return;
112 	/* for portability: getenv result is read-only */
113 	envpath = xstrdup(envpath);
114 	for (sweep = envpath;
115 	    (path = strsep(&sweep, ":")) != NULL;)
116 	    addtoincludepath(path);
117 	free(envpath);
118 }
119 
120 static
121 struct input_file *
122 dopath(struct input_file *i, const char *filename)
123 {
124 	char path[PATH_MAX];
125 	struct path_entry *pe;
126 	FILE *f;
127 
128 	for (pe = first; pe; pe = pe->next) {
129 		snprintf(path, sizeof(path), "%s/%s", pe->name, filename);
130 		if ((f = fopen(path, "r")) != 0) {
131 			set_input(i, f, path);
132 			return i;
133 		}
134 	}
135 	return NULL;
136 }
137 
138 struct input_file *
139 fopen_trypath(struct input_file *i, const char *filename)
140 {
141 	FILE *f;
142 
143 	f = fopen(filename, "r");
144 	if (f != NULL) {
145 		set_input(i, f, filename);
146 		return i;
147 	}
148 	if (filename[0] == '/')
149 		return NULL;
150 
151 	ensure_m4path();
152 
153 	return dopath(i, filename);
154 }
155 
156 void
157 doindir(const char *argv[], int argc)
158 {
159 	ndptr n;
160 	struct macro_definition *p;
161 
162 	n = lookup(argv[2]);
163 	if (n == NULL || (p = macro_getdef(n)) == NULL)
164 		m4errx(1, "indir: undefined macro %s.", argv[2]);
165 	argv[1] = p->defn;
166 
167 	eval(argv+1, argc-1, p->type, is_traced(n));
168 }
169 
170 void
171 dobuiltin(const char *argv[], int argc)
172 {
173 	ndptr p;
174 
175 	argv[1] = NULL;
176 	p = macro_getbuiltin(argv[2]);
177 	if (p != NULL)
178 		eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p));
179 	else
180 		m4errx(1, "unknown builtin %s.", argv[2]);
181 }
182 
183 
184 /* We need some temporary buffer space, as pb pushes BACK and substitution
185  * proceeds forward... */
186 static char *buffer;
187 static size_t bufsize = 0;
188 static size_t current = 0;
189 
190 static void addchars(const char *, size_t);
191 static void addchar(int);
192 static char *twiddle(const char *);
193 static char *getstring(void);
194 static void exit_regerror(int, regex_t *, const char *);
195 static void do_subst(const char *, regex_t *, const char *, const char *,
196     regmatch_t *);
197 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *);
198 static void do_regexp(const char *, regex_t *, const char *, const char *,
199     regmatch_t *);
200 static void add_sub(int, const char *, regex_t *, regmatch_t *);
201 static void add_replace(const char *, regex_t *, const char *, regmatch_t *);
202 #define addconstantstring(s) addchars((s), sizeof(s)-1)
203 
204 static void
205 addchars(const char *c, size_t n)
206 {
207 	if (n == 0)
208 		return;
209 	while (current + n > bufsize) {
210 		if (bufsize == 0)
211 			bufsize = 1024;
212 		else if (bufsize <= SIZE_MAX/2) {
213 			bufsize *= 2;
214 		} else {
215 			errx(1, "size overflow");
216 		}
217 		buffer = xrealloc(buffer, bufsize, NULL);
218 	}
219 	memcpy(buffer+current, c, n);
220 	current += n;
221 }
222 
223 static void
224 addchar(int c)
225 {
226 	if (current +1 > bufsize) {
227 		if (bufsize == 0)
228 			bufsize = 1024;
229 		else
230 			bufsize *= 2;
231 		buffer = xrealloc(buffer, bufsize, NULL);
232 	}
233 	buffer[current++] = c;
234 }
235 
236 static char *
237 getstring()
238 {
239 	addchar('\0');
240 	current = 0;
241 	return buffer;
242 }
243 
244 
245 static void
246 exit_regerror(int er, regex_t *re, const char *source)
247 {
248 	size_t	errlen;
249 	char	*errbuf;
250 
251 	errlen = regerror(er, re, NULL, 0);
252 	errbuf = xalloc(errlen,
253 	    "malloc in regerror: %lu", (unsigned long)errlen);
254 	regerror(er, re, errbuf, errlen);
255 	m4errx(1, "regular expression error in %s: %s.", source, errbuf);
256 }
257 
258 static void
259 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm)
260 {
261 	if (n > re->re_nsub)
262 		warnx("No subexpression %d", n);
263 	/* Subexpressions that did not match are
264 	 * not an error.  */
265 	else if (pm[n].rm_so != -1 &&
266 	    pm[n].rm_eo != -1) {
267 		addchars(string + pm[n].rm_so,
268 			pm[n].rm_eo - pm[n].rm_so);
269 	}
270 }
271 
272 /* Add replacement string to the output buffer, recognizing special
273  * constructs and replacing them with substrings of the original string.
274  */
275 static void
276 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm)
277 {
278 	const char *p;
279 
280 	for (p = replace; *p != '\0'; p++) {
281 		if (*p == '&' && !mimic_gnu) {
282 			add_sub(0, string, re, pm);
283 			continue;
284 		}
285 		if (*p == '\\') {
286 			if (p[1] == '\\') {
287 				addchar(p[1]);
288 				p++;
289 				continue;
290 			}
291 			if (p[1] == '&') {
292 				if (mimic_gnu)
293 					add_sub(0, string, re, pm);
294 				else
295 					addchar(p[1]);
296 				p++;
297 				continue;
298 			}
299 			if (isdigit((unsigned char)p[1])) {
300 				add_sub(*(++p) - '0', string, re, pm);
301 				continue;
302 			}
303 		}
304 		addchar(*p);
305 	}
306 }
307 
308 static void
309 do_subst(const char *string, regex_t *re, const char *source,
310     const char *replace, regmatch_t *pm)
311 {
312 	int error;
313 	int flags = 0;
314 	const char *last_match = NULL;
315 
316 	while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) {
317 		if (pm[0].rm_eo != 0) {
318 			if (string[pm[0].rm_eo-1] == '\n')
319 				flags = 0;
320 			else
321 				flags = REG_NOTBOL;
322 		}
323 
324 		/* NULL length matches are special... We use the `vi-mode'
325 		 * rule: don't allow a NULL-match at the last match
326 		 * position.
327 		 */
328 		if (pm[0].rm_so == pm[0].rm_eo &&
329 		    string + pm[0].rm_so == last_match) {
330 			if (*string == '\0')
331 				return;
332 			addchar(*string);
333 			if (*string++ == '\n')
334 				flags = 0;
335 			else
336 				flags = REG_NOTBOL;
337 			continue;
338 		}
339 		last_match = string + pm[0].rm_so;
340 		addchars(string, pm[0].rm_so);
341 		add_replace(string, re, replace, pm);
342 		string += pm[0].rm_eo;
343 	}
344 	if (error != REG_NOMATCH)
345 		exit_regerror(error, re, source);
346 	pbstr(string);
347 }
348 
349 static void
350 do_regexp(const char *string, regex_t *re, const char *source,
351     const char *replace, regmatch_t *pm)
352 {
353 	int error;
354 
355 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
356 	case 0:
357 		add_replace(string, re, replace, pm);
358 		pbstr(getstring());
359 		break;
360 	case REG_NOMATCH:
361 		break;
362 	default:
363 		exit_regerror(error, re, source);
364 	}
365 }
366 
367 static void
368 do_regexpindex(const char *string, regex_t *re, const char *source,
369     regmatch_t *pm)
370 {
371 	int error;
372 
373 	switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) {
374 	case 0:
375 		pbunsigned(pm[0].rm_so);
376 		break;
377 	case REG_NOMATCH:
378 		pbnum(-1);
379 		break;
380 	default:
381 		exit_regerror(error, re, source);
382 	}
383 }
384 
385 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2
386  * says. So we twiddle with the regexp before passing it to regcomp.
387  */
388 static char *
389 twiddle(const char *p)
390 {
391 	/* + at start of regexp is a normal character for Gnu m4 */
392 	if (*p == '^') {
393 		addchar(*p);
394 		p++;
395 	}
396 	if (*p == '+') {
397 		addchar('\\');
398 	}
399 	/* This could use strcspn for speed... */
400 	while (*p != '\0') {
401 		if (*p == '\\') {
402 			switch(p[1]) {
403 			case '(':
404 			case ')':
405 			case '|':
406 				addchar(p[1]);
407 				break;
408 			case 'w':
409 				addconstantstring("[_a-zA-Z0-9]");
410 				break;
411 			case 'W':
412 				addconstantstring("[^_a-zA-Z0-9]");
413 				break;
414 			case '<':
415 				addconstantstring("[[:<:]]");
416 				break;
417 			case '>':
418 				addconstantstring("[[:>:]]");
419 				break;
420 			default:
421 				addchars(p, 2);
422 				break;
423 			}
424 			p+=2;
425 			continue;
426 		}
427 		if (*p == '(' || *p == ')' || *p == '|')
428 			addchar('\\');
429 
430 		addchar(*p);
431 		p++;
432 	}
433 	return getstring();
434 }
435 
436 /* patsubst(string, regexp, opt replacement) */
437 /* argv[2]: string
438  * argv[3]: regexp
439  * argv[4]: opt rep
440  */
441 void
442 dopatsubst(const char *argv[], int argc)
443 {
444 	if (argc <= 3) {
445 		warnx("Too few arguments to patsubst");
446 		return;
447 	}
448 	/* special case: empty regexp */
449 	if (argv[3][0] == '\0') {
450 		const char *s;
451 		size_t len;
452 		if (argc > 4 && argv[4])
453 			len = strlen(argv[4]);
454 		else
455 			len = 0;
456 		for (s = argv[2]; *s != '\0'; s++) {
457 			addchars(argv[4], len);
458 			addchar(*s);
459 		}
460 	} else {
461 		int error;
462 		regex_t re;
463 		regmatch_t *pmatch;
464 		int mode = REG_EXTENDED;
465 		const char *source;
466 		size_t l = strlen(argv[3]);
467 
468 		if (!mimic_gnu ||
469 		    (argv[3][0] == '^') ||
470 		    (l > 0 && argv[3][l-1] == '$'))
471 			mode |= REG_NEWLINE;
472 
473 		source = mimic_gnu ? twiddle(argv[3]) : argv[3];
474 		error = regcomp(&re, source, mode);
475 		if (error != 0)
476 			exit_regerror(error, &re, source);
477 
478 		pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t),
479 		    NULL);
480 		do_subst(argv[2], &re, source,
481 		    argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch);
482 		free(pmatch);
483 		regfree(&re);
484 	}
485 	pbstr(getstring());
486 }
487 
488 void
489 doregexp(const char *argv[], int argc)
490 {
491 	int error;
492 	regex_t re;
493 	regmatch_t *pmatch;
494 	const char *source;
495 
496 	if (argc <= 3) {
497 		warnx("Too few arguments to regexp");
498 		return;
499 	}
500 	/* special gnu case */
501 	if (argv[3][0] == '\0' && mimic_gnu) {
502 		if (argc == 4 || argv[4] == NULL)
503 			return;
504 		else
505 			pbstr(argv[4]);
506 	}
507 	source = mimic_gnu ? twiddle(argv[3]) : argv[3];
508 	error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE);
509 	if (error != 0)
510 		exit_regerror(error, &re, source);
511 
512 	pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL);
513 	if (argc == 4 || argv[4] == NULL)
514 		do_regexpindex(argv[2], &re, source, pmatch);
515 	else
516 		do_regexp(argv[2], &re, source, argv[4], pmatch);
517 	free(pmatch);
518 	regfree(&re);
519 }
520 
521 void
522 doformat(const char *argv[], int argc)
523 {
524 	const char *format = argv[2];
525 	int pos = 3;
526 	int left_padded;
527 	long width;
528 	size_t l;
529 	const char *thisarg;
530 	char temp[2];
531 	long extra;
532 
533 	while (*format != 0) {
534 		if (*format != '%') {
535 			addchar(*format++);
536 			continue;
537 		}
538 
539 		format++;
540 		if (*format == '%') {
541 			addchar(*format++);
542 			continue;
543 		}
544 		if (*format == 0) {
545 			addchar('%');
546 			break;
547 		}
548 
549 		if (*format == '*') {
550 			format++;
551 			if (pos >= argc)
552 				m4errx(1,
553 				    "Format with too many format specifiers.");
554 			width = strtol(argv[pos++], NULL, 10);
555 		} else {
556 			width = strtol(format, (char **)&format, 10);
557 		}
558 		if (width < 0) {
559 			left_padded = 1;
560 			width = -width;
561 		} else {
562 			left_padded = 0;
563 		}
564 		if (*format == '.') {
565 			format++;
566 			if (*format == '*') {
567 				format++;
568 				if (pos >= argc)
569 					m4errx(1,
570 					    "Format with too many format specifiers.");
571 				extra = strtol(argv[pos++], NULL, 10);
572 			} else {
573 				extra = strtol(format, (char **)&format, 10);
574 			}
575 		} else {
576 			extra = LONG_MAX;
577 		}
578 		if (pos >= argc)
579 			m4errx(1, "Format with too many format specifiers.");
580 		switch(*format) {
581 		case 's':
582 			thisarg = argv[pos++];
583 			break;
584 		case 'c':
585 			temp[0] = strtoul(argv[pos++], NULL, 10);
586 			temp[1] = 0;
587 			thisarg = temp;
588 			break;
589 		default:
590 			m4errx(1, "Unsupported format specification: %s.",
591 			    argv[2]);
592 		}
593 		format++;
594 		l = strlen(thisarg);
595 		if (l > extra)
596 			l = extra;
597 		if (!left_padded) {
598 			while (l < width--)
599 				addchar(' ');
600 		}
601 		addchars(thisarg, l);
602 		if (left_padded) {
603 			while (l < width--)
604 				addchar(' ');
605 		}
606 	}
607 	pbstr(getstring());
608 }
609 
610 void
611 doesyscmd(const char *cmd)
612 {
613 	int p[2];
614 	pid_t pid, cpid;
615 	char *argv[4];
616 	int cc;
617 	int status;
618 
619 	/* Follow gnu m4 documentation: first flush buffers. */
620 	fflush(NULL);
621 
622 	argv[0] = "sh";
623 	argv[1] = "-c";
624 	argv[2] = (char *)cmd;
625 	argv[3] = NULL;
626 
627 	/* Just set up standard output, share stderr and stdin with m4 */
628 	if (pipe(p) == -1)
629 		err(1, "bad pipe");
630 	switch(cpid = fork()) {
631 	case -1:
632 		err(1, "bad fork");
633 		/* NOTREACHED */
634 	case 0:
635 		(void) close(p[0]);
636 		(void) dup2(p[1], 1);
637 		(void) close(p[1]);
638 		execv(_PATH_BSHELL, argv);
639 		exit(1);
640 	default:
641 		/* Read result in two stages, since m4's buffer is
642 		 * pushback-only. */
643 		(void) close(p[1]);
644 		do {
645 			char result[BUFSIZE];
646 			cc = read(p[0], result, sizeof result);
647 			if (cc > 0)
648 				addchars(result, cc);
649 		} while (cc > 0 || (cc == -1 && errno == EINTR));
650 
651 		(void) close(p[0]);
652 		while ((pid = wait(&status)) != cpid && pid >= 0)
653 			continue;
654 		pbstr(getstring());
655 	}
656 }
657 
658 void
659 getdivfile(const char *name)
660 {
661 	FILE *f;
662 	int c;
663 
664 	f = fopen(name, "r");
665 	if (!f)
666 		return;
667 
668 	while ((c = getc(f))!= EOF)
669 		putc(c, active);
670 	(void) fclose(f);
671 }
672