xref: /netbsd-src/usr.bin/sed/process.c (revision da5f4674a3fc214be3572d358b66af40ab9401e7)
1 /*	$NetBSD: process.c,v 1.33 2003/08/07 11:15:50 agc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Diomidis Spinellis of Imperial College, University of London.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*-
36  * Copyright (c) 1992 Diomidis Spinellis.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Diomidis Spinellis of Imperial College, University of London.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. All advertising materials mentioning features or use of this software
50  *    must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Berkeley and its contributors.
53  * 4. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  */
69 
70 #include <sys/cdefs.h>
71 #ifndef lint
72 #if 0
73 static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
74 #else
75 __RCSID("$NetBSD: process.c,v 1.33 2003/08/07 11:15:50 agc Exp $");
76 #endif
77 #endif /* not lint */
78 
79 #include <sys/types.h>
80 #include <sys/stat.h>
81 #include <sys/ioctl.h>
82 #include <sys/uio.h>
83 
84 #include <ctype.h>
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <limits.h>
88 #include <regex.h>
89 #include <stdio.h>
90 #include <stdlib.h>
91 #include <string.h>
92 #include <unistd.h>
93 
94 #include "defs.h"
95 #include "extern.h"
96 
97 static SPACE HS, PS, SS;
98 #define	pd		PS.deleted
99 #define	ps		PS.space
100 #define	psl		PS.len
101 #define	hs		HS.space
102 #define	hsl		HS.len
103 
104 static inline int	 applies(struct s_command *);
105 static void		 flush_appends(void);
106 static void		 lputs(char *);
107 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
108 static void		 regsub(SPACE *, char *, char *);
109 static int		 substitute(struct s_command *);
110 
111 struct s_appends *appends;	/* Array of pointers to strings to append. */
112 static int appendx;		/* Index into appends array. */
113 int appendnum;			/* Size of appends array. */
114 
115 static int lastaddr;		/* Set by applies if last address of a range. */
116 static int sdone;		/* If any substitutes since last line input. */
117 				/* Iov structure for 'w' commands. */
118 static regex_t *defpreg;
119 size_t maxnsub;
120 regmatch_t *match;
121 
122 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
123 
124 void
125 process(void)
126 {
127 	struct s_command *cp;
128 	SPACE tspace;
129 	size_t len, oldpsl;
130 	char *p;
131 
132 	oldpsl = 0;
133 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
134 		pd = 0;
135 top:
136 		cp = prog;
137 redirect:
138 		while (cp != NULL) {
139 			if (!applies(cp)) {
140 				cp = cp->next;
141 				continue;
142 			}
143 			switch (cp->code) {
144 			case '{':
145 				cp = cp->u.c;
146 				goto redirect;
147 			case 'a':
148 				if (appendx >= appendnum)
149 					appends = xrealloc(appends,
150 					    sizeof(struct s_appends) *
151 					    (appendnum *= 2));
152 				appends[appendx].type = AP_STRING;
153 				appends[appendx].s = cp->t;
154 				appends[appendx].len = strlen(cp->t);
155 				appendx++;
156 				break;
157 			case 'b':
158 				cp = cp->u.c;
159 				goto redirect;
160 			case 'c':
161 				pd = 1;
162 				psl = 0;
163 				if (cp->a2 == NULL || lastaddr)
164 					(void)printf("%s", cp->t);
165 				break;
166 			case 'd':
167 				pd = 1;
168 				goto new;
169 			case 'D':
170 				if (psl == 0)
171 					pd = 1;
172 				if (pd)
173 					goto new;
174 				if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
175 					pd = 1;
176 					goto new;
177 				} else {
178 					psl -= (p + 1) - ps;
179 					memmove(ps, p + 1, psl);
180 					goto top;
181 				}
182 			case 'g':
183 				cspace(&PS, hs, hsl, REPLACE);
184 				break;
185 			case 'G':
186 				if (hs == NULL)
187 					cspace(&HS, "\n", 1, REPLACE);
188 				cspace(&PS, hs, hsl, 0);
189 				break;
190 			case 'h':
191 				cspace(&HS, ps, psl, REPLACE);
192 				break;
193 			case 'H':
194 				cspace(&HS, ps, psl, 0);
195 				break;
196 			case 'i':
197 				(void)printf("%s", cp->t);
198 				break;
199 			case 'l':
200 				lputs(ps);
201 				break;
202 			case 'n':
203 				if (!nflag && !pd)
204 					OUT(ps)
205 				flush_appends();
206 				if (!mf_fgets(&PS, REPLACE))
207 					exit(0);
208 				pd = 0;
209 				break;
210 			case 'N':
211 				flush_appends();
212 				if (!mf_fgets(&PS, 0)) {
213 					if (!nflag && !pd)
214 						OUT(ps)
215 					exit(0);
216 				}
217 				break;
218 			case 'p':
219 				if (pd)
220 					break;
221 				OUT(ps)
222 				break;
223 			case 'P':
224 				if (pd)
225 					break;
226 				if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
227 					oldpsl = psl;
228 					psl = (p + 1) - ps;
229 				}
230 				OUT(ps)
231 				if (p != NULL)
232 					psl = oldpsl;
233 				break;
234 			case 'q':
235 				if (!nflag && !pd)
236 					OUT(ps)
237 				flush_appends();
238 				exit(0);
239 			case 'r':
240 				if (appendx >= appendnum)
241 					appends = xrealloc(appends,
242 					    sizeof(struct s_appends) *
243 					    (appendnum *= 2));
244 				appends[appendx].type = AP_FILE;
245 				appends[appendx].s = cp->t;
246 				appends[appendx].len = strlen(cp->t);
247 				appendx++;
248 				break;
249 			case 's':
250 				sdone |= substitute(cp);
251 				break;
252 			case 't':
253 				if (sdone) {
254 					sdone = 0;
255 					cp = cp->u.c;
256 					goto redirect;
257 				}
258 				break;
259 			case 'w':
260 				if (pd)
261 					break;
262 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
263 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
264 				    DEFFILEMODE)) == -1)
265 					err(FATAL, "%s: %s",
266 					    cp->t, strerror(errno));
267 				if (write(cp->u.fd, ps, psl) != psl)
268 					err(FATAL, "%s: %s",
269 					    cp->t, strerror(errno));
270 				break;
271 			case 'x':
272 				if (hs == NULL)
273 					cspace(&HS, "\n", 1, REPLACE);
274 				tspace = PS;
275 				PS = HS;
276 				HS = tspace;
277 				break;
278 			case 'y':
279 				if (pd)
280 					break;
281 				for (p = ps, len = psl; --len; ++p)
282 					*p = cp->u.y[(int)*p];
283 				break;
284 			case ':':
285 			case '}':
286 				break;
287 			case '=':
288 				(void)printf("%lu\n", linenum);
289 			}
290 			cp = cp->next;
291 		} /* for all cp */
292 
293 new:		if (!nflag && !pd)
294 			OUT(ps)
295 		flush_appends();
296 	} /* for all lines */
297 }
298 
299 /*
300  * TRUE if the address passed matches the current program state
301  * (lastline, linenumber, ps).
302  */
303 #define	MATCH(a)						\
304 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
305 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
306 
307 /*
308  * Return TRUE if the command applies to the current line.  Sets the inrange
309  * flag to process ranges.  Interprets the non-select (``!'') flag.
310  */
311 static inline int
312 applies(struct s_command *cp)
313 {
314 	int r;
315 
316 	lastaddr = 0;
317 	if (cp->a1 == NULL && cp->a2 == NULL)
318 		r = 1;
319 	else if (cp->a2) {
320 		if (cp->inrange) {
321 			if (MATCH(cp->a2)) {
322 				cp->inrange = 0;
323 				lastaddr = 1;
324 			}
325 			r = 1;
326 		} else if (MATCH(cp->a1)) {
327 			/*
328 			 * If the second address is a number less than or
329 			 * equal to the line number first selected, only
330 			 * one line shall be selected.
331 			 *	-- POSIX 1003.2
332 			 */
333 			if (cp->a2->type == AT_LINE &&
334 			    linenum >= cp->a2->u.l)
335 				lastaddr = 1;
336 			else
337 				cp->inrange = 1;
338 			r = 1;
339 		} else
340 			r = 0;
341 	} else
342 		r = MATCH(cp->a1);
343 	return (cp->nonsel ? ! r : r);
344 }
345 
346 /*
347  * substitute --
348  *	Do substitutions in the pattern space.  Currently, we build a
349  *	copy of the new pattern space in the substitute space structure
350  *	and then swap them.
351  */
352 static int
353 substitute(struct s_command *cp)
354 {
355 	SPACE tspace;
356 	regex_t *re;
357 	size_t re_off, slen;
358 	int lastempty, n;
359 	char *s;
360 
361 	s = ps;
362 	re = cp->u.s->re;
363 	if (re == NULL) {
364 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
365 			linenum = cp->u.s->linenum;
366 			err(COMPILE, "\\%d not defined in the RE",
367 			    cp->u.s->maxbref);
368 		}
369 	}
370 	if (!regexec_e(re, s, 0, 0, psl))
371 		return (0);
372 
373 	SS.len = 0;				/* Clean substitute space. */
374 	slen = psl;
375 	n = cp->u.s->n;
376 	lastempty = 1;
377 
378 	switch (n) {
379 	case 0:					/* Global */
380 		do {
381 			if (lastempty || match[0].rm_so != match[0].rm_eo) {
382 				/* Locate start of replaced string. */
383 				re_off = match[0].rm_so;
384 				/* Copy leading retained string. */
385 				cspace(&SS, s, re_off, APPEND);
386 				/* Add in regular expression. */
387 				regsub(&SS, s, cp->u.s->new);
388 			}
389 
390 			/* Move past this match. */
391 			if (match[0].rm_so != match[0].rm_eo) {
392 				s += match[0].rm_eo;
393 				slen -= match[0].rm_eo;
394 				lastempty = 0;
395 			} else {
396 				if (match[0].rm_so == 0)
397 					cspace(&SS,
398 					    s, match[0].rm_so + 1, APPEND);
399 				else
400 					cspace(&SS,
401 					    s + match[0].rm_so, 1, APPEND);
402 				s += match[0].rm_so + 1;
403 				slen -= match[0].rm_so + 1;
404 				lastempty = 1;
405 			}
406 		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
407 		/* Copy trailing retained string. */
408 		if (slen > 0)
409 			cspace(&SS, s, slen, APPEND);
410 		break;
411 	default:				/* Nth occurrence */
412 		while (--n) {
413 			s += match[0].rm_eo;
414 			slen -= match[0].rm_eo;
415 			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
416 				return (0);
417 		}
418 		/* FALLTHROUGH */
419 	case 1:					/* 1st occurrence */
420 		/* Locate start of replaced string. */
421 		re_off = match[0].rm_so + (s - ps);
422 		/* Copy leading retained string. */
423 		cspace(&SS, ps, re_off, APPEND);
424 		/* Add in regular expression. */
425 		regsub(&SS, s, cp->u.s->new);
426 		/* Copy trailing retained string. */
427 		s += match[0].rm_eo;
428 		slen -= match[0].rm_eo;
429 		cspace(&SS, s, slen, APPEND);
430 		break;
431 	}
432 
433 	/*
434 	 * Swap the substitute space and the pattern space, and make sure
435 	 * that any leftover pointers into stdio memory get lost.
436 	 */
437 	tspace = PS;
438 	PS = SS;
439 	SS = tspace;
440 	SS.space = SS.back;
441 
442 	/* Handle the 'p' flag. */
443 	if (cp->u.s->p)
444 		OUT(ps)
445 
446 	/* Handle the 'w' flag. */
447 	if (cp->u.s->wfile && !pd) {
448 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
449 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
450 			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
451 		if (write(cp->u.s->wfd, ps, psl) != psl)
452 			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
453 	}
454 	return (1);
455 }
456 
457 /*
458  * Flush append requests.  Always called before reading a line,
459  * therefore it also resets the substitution done (sdone) flag.
460  */
461 static void
462 flush_appends(void)
463 {
464 	FILE *f;
465 	int count, i;
466 	char buf[8 * 1024];
467 
468 	for (i = 0; i < appendx; i++)
469 		switch (appends[i].type) {
470 		case AP_STRING:
471 			fwrite(appends[i].s, sizeof(char), appends[i].len,
472 			    stdout);
473 			break;
474 		case AP_FILE:
475 			/*
476 			 * Read files probably shouldn't be cached.  Since
477 			 * it's not an error to read a non-existent file,
478 			 * it's possible that another program is interacting
479 			 * with the sed script through the file system.  It
480 			 * would be truly bizarre, but possible.  It's probably
481 			 * not that big a performance win, anyhow.
482 			 */
483 			if ((f = fopen(appends[i].s, "r")) == NULL)
484 				break;
485 			while ((count =
486 			    fread(buf, sizeof(char), sizeof(buf), f)) > 0)
487 				(void)fwrite(buf, sizeof(char), count, stdout);
488 			(void)fclose(f);
489 			break;
490 		}
491 	if (ferror(stdout))
492 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
493 	appendx = sdone = 0;
494 }
495 
496 static void
497 lputs(char *s)
498 {
499 	int count;
500 	char *escapes, *p;
501 	struct winsize win;
502 	static int termwidth = -1;
503 
504 	if (termwidth == -1) {
505 		if ((p = getenv("COLUMNS")) != NULL)
506 			termwidth = atoi(p);
507 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
508 		    win.ws_col > 0)
509 			termwidth = win.ws_col;
510 		else
511 			termwidth = 60;
512 	}
513 	for (count = 0; *s; ++s) {
514 		if (count >= termwidth) {
515 			(void)printf("\\\n");
516 			count = 0;
517 		}
518 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
519 		    *s != '\\') {
520 			(void)putchar(*s);
521 			count++;
522 		} else {
523 			escapes = "\\\a\b\f\n\r\t\v";
524 			(void)putchar('\\');
525 			if ((p = strchr(escapes, *s)) != NULL) {
526 				(void)putchar("\\abfnrtv"[p - escapes]);
527 				count += 2;
528 			} else {
529 				(void)printf("%03o", *(u_char *)s);
530 				count += 4;
531 			}
532 		}
533 	}
534 	(void)putchar('$');
535 	(void)putchar('\n');
536 	if (ferror(stdout))
537 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
538 }
539 
540 static inline int
541 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
542 {
543 	int eval;
544 #ifndef REG_STARTEND
545 	char *buf;
546 #endif
547 
548 	if (preg == NULL) {
549 		if (defpreg == NULL)
550 			err(FATAL, "first RE may not be empty");
551 	} else
552 		defpreg = preg;
553 
554 	/* Set anchors, discounting trailing newline (if any). */
555 	if (slen > 0 && string[slen - 1] == '\n')
556 		slen--;
557 
558 #ifndef REG_STARTEND
559 	if ((buf = malloc(slen + 1)) == NULL)
560 		err(1, NULL);
561 	(void)memcpy(buf, string, slen);
562 	buf[slen] = '\0';
563 	eval = regexec(defpreg, buf,
564 	    nomatch ? 0 : maxnsub + 1, match, eflags);
565 	free(buf);
566 #else
567 	match[0].rm_so = 0;
568 	match[0].rm_eo = slen;
569 	eval = regexec(defpreg, string,
570 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
571 #endif
572 	switch(eval) {
573 	case 0:
574 		return (1);
575 	case REG_NOMATCH:
576 		return (0);
577 	}
578 	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
579 	/* NOTREACHED */
580 	return (0);
581 }
582 
583 /*
584  * regsub - perform substitutions after a regexp match
585  * Based on a routine by Henry Spencer
586  */
587 static void
588 regsub(SPACE *sp, char *string, char *src)
589 {
590 	int len, no;
591 	char c, *dst;
592 
593 #define	NEEDSP(reqlen)							\
594 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
595 		sp->blen += (reqlen) + 1024;				\
596 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
597 		dst = sp->space + sp->len;				\
598 	}
599 
600 	dst = sp->space + sp->len;
601 	while ((c = *src++) != '\0') {
602 		if (c == '&')
603 			no = 0;
604 		else if (c == '\\' && isdigit((unsigned char)*src))
605 			no = *src++ - '0';
606 		else
607 			no = -1;
608 		if (no < 0) {		/* Ordinary character. */
609  			if (c == '\\' && (*src == '\\' || *src == '&'))
610  				c = *src++;
611 			NEEDSP(1);
612  			*dst++ = c;
613 			++sp->len;
614  		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
615 			len = match[no].rm_eo - match[no].rm_so;
616 			NEEDSP(len);
617 			memmove(dst, string + match[no].rm_so, len);
618 			dst += len;
619 			sp->len += len;
620 		}
621 	}
622 	NEEDSP(1);
623 	*dst = '\0';
624 }
625 
626 /*
627  * aspace --
628  *	Append the source space to the destination space, allocating new
629  *	space as necessary.
630  */
631 void
632 cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag)
633 {
634 	size_t tlen;
635 
636 	/* Make sure SPACE has enough memory and ramp up quickly. */
637 	tlen = sp->len + len + 1;
638 	if (tlen > sp->blen) {
639 		sp->blen = tlen + 1024;
640 		sp->space = sp->back = xrealloc(sp->back, sp->blen);
641 	}
642 
643 	if (spflag == REPLACE)
644 		sp->len = 0;
645 
646 	memmove(sp->space + sp->len, p, len);
647 
648 	sp->space[sp->len += len] = '\0';
649 }
650 
651 /*
652  * Close all cached opened files and report any errors
653  */
654 void
655 cfclose(struct s_command *cp, struct s_command *end)
656 {
657 
658 	for (; cp != end; cp = cp->next)
659 		switch(cp->code) {
660 		case 's':
661 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
662 				err(FATAL,
663 				    "%s: %s", cp->u.s->wfile, strerror(errno));
664 			cp->u.s->wfd = -1;
665 			break;
666 		case 'w':
667 			if (cp->u.fd != -1 && close(cp->u.fd))
668 				err(FATAL, "%s: %s", cp->t, strerror(errno));
669 			cp->u.fd = -1;
670 			break;
671 		case '{':
672 			cfclose(cp->u.c, cp->next);
673 			break;
674 		}
675 }
676