xref: /netbsd-src/usr.bin/sed/process.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: process.c,v 1.35 2003/11/07 04:44:57 itojun Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Diomidis Spinellis of Imperial College, University of London.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*-
36  * Copyright (c) 1992 Diomidis Spinellis.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * Diomidis Spinellis of Imperial College, University of London.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  * 1. Redistributions of source code must retain the above copyright
45  *    notice, this list of conditions and the following disclaimer.
46  * 2. Redistributions in binary form must reproduce the above copyright
47  *    notice, this list of conditions and the following disclaimer in the
48  *    documentation and/or other materials provided with the distribution.
49  * 3. All advertising materials mentioning features or use of this software
50  *    must display the following acknowledgement:
51  *	This product includes software developed by the University of
52  *	California, Berkeley and its contributors.
53  * 4. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  */
69 
70 #include <sys/cdefs.h>
71 #ifndef lint
72 #if 0
73 static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
74 #else
75 __RCSID("$NetBSD: process.c,v 1.35 2003/11/07 04:44:57 itojun Exp $");
76 #endif
77 #endif /* not lint */
78 
79 #include <sys/types.h>
80 #include <sys/stat.h>
81 #include <sys/ioctl.h>
82 #include <sys/uio.h>
83 
84 #include <ctype.h>
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <limits.h>
88 #include <regex.h>
89 #include <stdio.h>
90 #include <stdlib.h>
91 #include <string.h>
92 #include <unistd.h>
93 
94 #include "defs.h"
95 #include "extern.h"
96 
97 static SPACE HS, PS, SS;
98 #define	pd		PS.deleted
99 #define	ps		PS.space
100 #define	psl		PS.len
101 #define	hs		HS.space
102 #define	hsl		HS.len
103 
104 static inline int	 applies(struct s_command *);
105 static void		 flush_appends(void);
106 static void		 lputs(char *);
107 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
108 static void		 regsub(SPACE *, char *, char *);
109 static int		 substitute(struct s_command *);
110 
111 struct s_appends *appends;	/* Array of pointers to strings to append. */
112 static int appendx;		/* Index into appends array. */
113 int appendnum;			/* Size of appends array. */
114 
115 static int lastaddr;		/* Set by applies if last address of a range. */
116 static int sdone;		/* If any substitutes since last line input. */
117 				/* Iov structure for 'w' commands. */
118 static regex_t *defpreg;
119 size_t maxnsub;
120 regmatch_t *match;
121 
122 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
123 
124 void
125 process(void)
126 {
127 	struct s_command *cp;
128 	SPACE tspace;
129 	size_t len, oldpsl;
130 	char *p;
131 
132 	oldpsl = 0;
133 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
134 		pd = 0;
135 top:
136 		cp = prog;
137 redirect:
138 		while (cp != NULL) {
139 			if (!applies(cp)) {
140 				cp = cp->next;
141 				continue;
142 			}
143 			switch (cp->code) {
144 			case '{':
145 				cp = cp->u.c;
146 				goto redirect;
147 			case 'a':
148 				if (appendx >= appendnum) {
149 					appends = xrealloc(appends,
150 					    sizeof(struct s_appends) *
151 					    (appendnum * 2));
152 					appendnum *= 2;
153 				}
154 				appends[appendx].type = AP_STRING;
155 				appends[appendx].s = cp->t;
156 				appends[appendx].len = strlen(cp->t);
157 				appendx++;
158 				break;
159 			case 'b':
160 				cp = cp->u.c;
161 				goto redirect;
162 			case 'c':
163 				pd = 1;
164 				psl = 0;
165 				if (cp->a2 == NULL || lastaddr)
166 					(void)printf("%s", cp->t);
167 				break;
168 			case 'd':
169 				pd = 1;
170 				goto new;
171 			case 'D':
172 				if (psl == 0)
173 					pd = 1;
174 				if (pd)
175 					goto new;
176 				if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
177 					pd = 1;
178 					goto new;
179 				} else {
180 					psl -= (p + 1) - ps;
181 					memmove(ps, p + 1, psl);
182 					goto top;
183 				}
184 			case 'g':
185 				cspace(&PS, hs, hsl, REPLACE);
186 				break;
187 			case 'G':
188 				if (hs == NULL)
189 					cspace(&HS, "\n", 1, REPLACE);
190 				cspace(&PS, hs, hsl, 0);
191 				break;
192 			case 'h':
193 				cspace(&HS, ps, psl, REPLACE);
194 				break;
195 			case 'H':
196 				cspace(&HS, ps, psl, 0);
197 				break;
198 			case 'i':
199 				(void)printf("%s", cp->t);
200 				break;
201 			case 'l':
202 				lputs(ps);
203 				break;
204 			case 'n':
205 				if (!nflag && !pd)
206 					OUT(ps)
207 				flush_appends();
208 				if (!mf_fgets(&PS, REPLACE))
209 					exit(0);
210 				pd = 0;
211 				break;
212 			case 'N':
213 				flush_appends();
214 				if (!mf_fgets(&PS, 0)) {
215 					if (!nflag && !pd)
216 						OUT(ps)
217 					exit(0);
218 				}
219 				break;
220 			case 'p':
221 				if (pd)
222 					break;
223 				OUT(ps)
224 				break;
225 			case 'P':
226 				if (pd)
227 					break;
228 				if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
229 					oldpsl = psl;
230 					psl = (p + 1) - ps;
231 				}
232 				OUT(ps)
233 				if (p != NULL)
234 					psl = oldpsl;
235 				break;
236 			case 'q':
237 				if (!nflag && !pd)
238 					OUT(ps)
239 				flush_appends();
240 				exit(0);
241 			case 'r':
242 				if (appendx >= appendnum) {
243 					appends = xrealloc(appends,
244 					    sizeof(struct s_appends) *
245 					    (appendnum * 2));
246 					appendnum *= 2;
247 				}
248 				appends[appendx].type = AP_FILE;
249 				appends[appendx].s = cp->t;
250 				appends[appendx].len = strlen(cp->t);
251 				appendx++;
252 				break;
253 			case 's':
254 				sdone |= substitute(cp);
255 				break;
256 			case 't':
257 				if (sdone) {
258 					sdone = 0;
259 					cp = cp->u.c;
260 					goto redirect;
261 				}
262 				break;
263 			case 'w':
264 				if (pd)
265 					break;
266 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
267 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
268 				    DEFFILEMODE)) == -1)
269 					err(FATAL, "%s: %s",
270 					    cp->t, strerror(errno));
271 				if (write(cp->u.fd, ps, psl) != psl)
272 					err(FATAL, "%s: %s",
273 					    cp->t, strerror(errno));
274 				break;
275 			case 'x':
276 				if (hs == NULL)
277 					cspace(&HS, "\n", 1, REPLACE);
278 				tspace = PS;
279 				PS = HS;
280 				HS = tspace;
281 				break;
282 			case 'y':
283 				if (pd)
284 					break;
285 				for (p = ps, len = psl; --len; ++p)
286 					*p = cp->u.y[(int)*p];
287 				break;
288 			case ':':
289 			case '}':
290 				break;
291 			case '=':
292 				(void)printf("%lu\n", linenum);
293 			}
294 			cp = cp->next;
295 		} /* for all cp */
296 
297 new:		if (!nflag && !pd)
298 			OUT(ps)
299 		flush_appends();
300 	} /* for all lines */
301 }
302 
303 /*
304  * TRUE if the address passed matches the current program state
305  * (lastline, linenumber, ps).
306  */
307 #define	MATCH(a)						\
308 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
309 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
310 
311 /*
312  * Return TRUE if the command applies to the current line.  Sets the inrange
313  * flag to process ranges.  Interprets the non-select (``!'') flag.
314  */
315 static inline int
316 applies(struct s_command *cp)
317 {
318 	int r;
319 
320 	lastaddr = 0;
321 	if (cp->a1 == NULL && cp->a2 == NULL)
322 		r = 1;
323 	else if (cp->a2) {
324 		if (cp->inrange) {
325 			if (MATCH(cp->a2)) {
326 				cp->inrange = 0;
327 				lastaddr = 1;
328 			}
329 			r = 1;
330 		} else if (MATCH(cp->a1)) {
331 			/*
332 			 * If the second address is a number less than or
333 			 * equal to the line number first selected, only
334 			 * one line shall be selected.
335 			 *	-- POSIX 1003.2
336 			 */
337 			if (cp->a2->type == AT_LINE &&
338 			    linenum >= cp->a2->u.l)
339 				lastaddr = 1;
340 			else
341 				cp->inrange = 1;
342 			r = 1;
343 		} else
344 			r = 0;
345 	} else
346 		r = MATCH(cp->a1);
347 	return (cp->nonsel ? ! r : r);
348 }
349 
350 /*
351  * substitute --
352  *	Do substitutions in the pattern space.  Currently, we build a
353  *	copy of the new pattern space in the substitute space structure
354  *	and then swap them.
355  */
356 static int
357 substitute(struct s_command *cp)
358 {
359 	SPACE tspace;
360 	regex_t *re;
361 	size_t re_off, slen;
362 	int lastempty, n;
363 	char *s;
364 
365 	s = ps;
366 	re = cp->u.s->re;
367 	if (re == NULL) {
368 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
369 			linenum = cp->u.s->linenum;
370 			err(COMPILE, "\\%d not defined in the RE",
371 			    cp->u.s->maxbref);
372 		}
373 	}
374 	if (!regexec_e(re, s, 0, 0, psl))
375 		return (0);
376 
377 	SS.len = 0;				/* Clean substitute space. */
378 	slen = psl;
379 	n = cp->u.s->n;
380 	lastempty = 1;
381 
382 	switch (n) {
383 	case 0:					/* Global */
384 		do {
385 			if (lastempty || match[0].rm_so != match[0].rm_eo) {
386 				/* Locate start of replaced string. */
387 				re_off = match[0].rm_so;
388 				/* Copy leading retained string. */
389 				cspace(&SS, s, re_off, APPEND);
390 				/* Add in regular expression. */
391 				regsub(&SS, s, cp->u.s->new);
392 			}
393 
394 			/* Move past this match. */
395 			if (match[0].rm_so != match[0].rm_eo) {
396 				s += match[0].rm_eo;
397 				slen -= match[0].rm_eo;
398 				lastempty = 0;
399 			} else {
400 				if (match[0].rm_so == 0)
401 					cspace(&SS,
402 					    s, match[0].rm_so + 1, APPEND);
403 				else
404 					cspace(&SS,
405 					    s + match[0].rm_so, 1, APPEND);
406 				s += match[0].rm_so + 1;
407 				slen -= match[0].rm_so + 1;
408 				lastempty = 1;
409 			}
410 		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
411 		/* Copy trailing retained string. */
412 		if (slen > 0)
413 			cspace(&SS, s, slen, APPEND);
414 		break;
415 	default:				/* Nth occurrence */
416 		while (--n) {
417 			s += match[0].rm_eo;
418 			slen -= match[0].rm_eo;
419 			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
420 				return (0);
421 		}
422 		/* FALLTHROUGH */
423 	case 1:					/* 1st occurrence */
424 		/* Locate start of replaced string. */
425 		re_off = match[0].rm_so + (s - ps);
426 		/* Copy leading retained string. */
427 		cspace(&SS, ps, re_off, APPEND);
428 		/* Add in regular expression. */
429 		regsub(&SS, s, cp->u.s->new);
430 		/* Copy trailing retained string. */
431 		s += match[0].rm_eo;
432 		slen -= match[0].rm_eo;
433 		cspace(&SS, s, slen, APPEND);
434 		break;
435 	}
436 
437 	/*
438 	 * Swap the substitute space and the pattern space, and make sure
439 	 * that any leftover pointers into stdio memory get lost.
440 	 */
441 	tspace = PS;
442 	PS = SS;
443 	SS = tspace;
444 	SS.space = SS.back;
445 
446 	/* Handle the 'p' flag. */
447 	if (cp->u.s->p)
448 		OUT(ps)
449 
450 	/* Handle the 'w' flag. */
451 	if (cp->u.s->wfile && !pd) {
452 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
453 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
454 			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
455 		if (write(cp->u.s->wfd, ps, psl) != psl)
456 			err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
457 	}
458 	return (1);
459 }
460 
461 /*
462  * Flush append requests.  Always called before reading a line,
463  * therefore it also resets the substitution done (sdone) flag.
464  */
465 static void
466 flush_appends(void)
467 {
468 	FILE *f;
469 	int count, i;
470 	char buf[8 * 1024];
471 
472 	for (i = 0; i < appendx; i++)
473 		switch (appends[i].type) {
474 		case AP_STRING:
475 			fwrite(appends[i].s, sizeof(char), appends[i].len,
476 			    stdout);
477 			break;
478 		case AP_FILE:
479 			/*
480 			 * Read files probably shouldn't be cached.  Since
481 			 * it's not an error to read a non-existent file,
482 			 * it's possible that another program is interacting
483 			 * with the sed script through the file system.  It
484 			 * would be truly bizarre, but possible.  It's probably
485 			 * not that big a performance win, anyhow.
486 			 */
487 			if ((f = fopen(appends[i].s, "r")) == NULL)
488 				break;
489 			while ((count =
490 			    fread(buf, sizeof(char), sizeof(buf), f)) > 0)
491 				(void)fwrite(buf, sizeof(char), count, stdout);
492 			(void)fclose(f);
493 			break;
494 		}
495 	if (ferror(stdout))
496 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
497 	appendx = sdone = 0;
498 }
499 
500 static void
501 lputs(char *s)
502 {
503 	int count;
504 	char *escapes, *p;
505 	struct winsize win;
506 	static int termwidth = -1;
507 
508 	if (termwidth == -1) {
509 		if ((p = getenv("COLUMNS")) != NULL)
510 			termwidth = atoi(p);
511 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
512 		    win.ws_col > 0)
513 			termwidth = win.ws_col;
514 		else
515 			termwidth = 60;
516 	}
517 	for (count = 0; *s; ++s) {
518 		if (count >= termwidth) {
519 			(void)printf("\\\n");
520 			count = 0;
521 		}
522 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
523 		    *s != '\\') {
524 			(void)putchar(*s);
525 			count++;
526 		} else {
527 			escapes = "\\\a\b\f\n\r\t\v";
528 			(void)putchar('\\');
529 			if ((p = strchr(escapes, *s)) != NULL) {
530 				(void)putchar("\\abfnrtv"[p - escapes]);
531 				count += 2;
532 			} else {
533 				(void)printf("%03o", *(u_char *)s);
534 				count += 4;
535 			}
536 		}
537 	}
538 	(void)putchar('$');
539 	(void)putchar('\n');
540 	if (ferror(stdout))
541 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
542 }
543 
544 static inline int
545 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
546 {
547 	int eval;
548 #ifndef REG_STARTEND
549 	char *buf;
550 #endif
551 
552 	if (preg == NULL) {
553 		if (defpreg == NULL)
554 			err(FATAL, "first RE may not be empty");
555 	} else
556 		defpreg = preg;
557 
558 	/* Set anchors, discounting trailing newline (if any). */
559 	if (slen > 0 && string[slen - 1] == '\n')
560 		slen--;
561 
562 #ifndef REG_STARTEND
563 	if ((buf = malloc(slen + 1)) == NULL)
564 		err(1, NULL);
565 	(void)memcpy(buf, string, slen);
566 	buf[slen] = '\0';
567 	eval = regexec(defpreg, buf,
568 	    nomatch ? 0 : maxnsub + 1, match, eflags);
569 	free(buf);
570 #else
571 	match[0].rm_so = 0;
572 	match[0].rm_eo = slen;
573 	eval = regexec(defpreg, string,
574 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
575 #endif
576 	switch(eval) {
577 	case 0:
578 		return (1);
579 	case REG_NOMATCH:
580 		return (0);
581 	}
582 	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
583 	/* NOTREACHED */
584 	return (0);
585 }
586 
587 /*
588  * regsub - perform substitutions after a regexp match
589  * Based on a routine by Henry Spencer
590  */
591 static void
592 regsub(SPACE *sp, char *string, char *src)
593 {
594 	int len, no;
595 	char c, *dst;
596 
597 #define	NEEDSP(reqlen)							\
598 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
599 		size_t newlen = sp->blen + (reqlen) + 1024;		\
600 		sp->space = sp->back = xrealloc(sp->back, newlen);	\
601 		sp->blen = newlen;					\
602 		dst = sp->space + sp->len;				\
603 	}
604 
605 	dst = sp->space + sp->len;
606 	while ((c = *src++) != '\0') {
607 		if (c == '&')
608 			no = 0;
609 		else if (c == '\\' && isdigit((unsigned char)*src))
610 			no = *src++ - '0';
611 		else
612 			no = -1;
613 		if (no < 0) {		/* Ordinary character. */
614  			if (c == '\\' && (*src == '\\' || *src == '&'))
615  				c = *src++;
616 			NEEDSP(1);
617  			*dst++ = c;
618 			++sp->len;
619  		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
620 			len = match[no].rm_eo - match[no].rm_so;
621 			NEEDSP(len);
622 			memmove(dst, string + match[no].rm_so, len);
623 			dst += len;
624 			sp->len += len;
625 		}
626 	}
627 	NEEDSP(1);
628 	*dst = '\0';
629 }
630 
631 /*
632  * aspace --
633  *	Append the source space to the destination space, allocating new
634  *	space as necessary.
635  */
636 void
637 cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag)
638 {
639 	size_t tlen;
640 
641 	/* Make sure SPACE has enough memory and ramp up quickly. */
642 	tlen = sp->len + len + 1;
643 	if (tlen > sp->blen) {
644 		size_t newlen = tlen + 1024;
645 		sp->space = sp->back = xrealloc(sp->back, newlen);
646 		sp->blen = newlen;
647 	}
648 
649 	if (spflag == REPLACE)
650 		sp->len = 0;
651 
652 	memmove(sp->space + sp->len, p, len);
653 
654 	sp->space[sp->len += len] = '\0';
655 }
656 
657 /*
658  * Close all cached opened files and report any errors
659  */
660 void
661 cfclose(struct s_command *cp, struct s_command *end)
662 {
663 
664 	for (; cp != end; cp = cp->next)
665 		switch(cp->code) {
666 		case 's':
667 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
668 				err(FATAL,
669 				    "%s: %s", cp->u.s->wfile, strerror(errno));
670 			cp->u.s->wfd = -1;
671 			break;
672 		case 'w':
673 			if (cp->u.fd != -1 && close(cp->u.fd))
674 				err(FATAL, "%s: %s", cp->t, strerror(errno));
675 			cp->u.fd = -1;
676 			break;
677 		case '{':
678 			cfclose(cp->u.c, cp->next);
679 			break;
680 		}
681 }
682