xref: /csrg-svn/usr.bin/sed/process.c (revision 56064)
1 /*-
2  * Copyright (c) 1992 Diomidis Spinellis.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Diomidis Spinellis of Imperial College, University of London.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c	5.4 (Berkeley) 08/27/92";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "defs.h"
32 #include "extern.h"
33 
34 typedef struct {
35 	char *space;		/* Current space pointer. */
36 	size_t len;		/* Current length. */
37 	int deleted;		/* If deleted. */
38 	char *back;		/* Backing memory. */
39 	size_t blen;		/* Backing memory length. */
40 } SPACE;
41 static SPACE HS, PS, SS;
42 #define	pd		PS.deleted
43 #define	ps		PS.space
44 #define	psl		PS.len
45 #define	hs		HS.space
46 #define	hsl		HS.len
47 
48 static inline int	 applies __P((struct s_command *));
49 static void		 cspace __P((SPACE *, char *, size_t, int));
50 static void		 flush_appends __P((void));
51 static void		 lputs __P((char *));
52 static inline int	 regexec_e __P((regex_t *, const char *,
53 			    size_t, regmatch_t [], int));
54 static void		 regsub __P((regmatch_t *, char *, char *, SPACE *));
55 static int		 substitute __P((struct s_command *));
56 
57 struct s_appends *appends;	/* Array of pointers to strings to append. */
58 static int appendx;		/* Index into appends array. */
59 int appendnum;			/* Size of appends array. */
60 
61 static int lastaddr;		/* Set by applies if last address of a range. */
62 static int sdone;		/* If any substitutes since last line input. */
63 				/* Iov structure for 'w' commands. */
64 static struct iovec iov[2] = { NULL, 0, "\n", 1 };
65 
66 static regex_t *defpreg;
67 static size_t defnmatch;
68 
69 void
70 process()
71 {
72 	struct s_command *cp;
73 	SPACE tspace;
74 	size_t len;
75 	char oldc, *p;
76 
77 	for (linenum = 0; ps = mf_fgets(&psl);) {
78 		pd = 0;
79 		cp = prog;
80 redirect:
81 		while (cp != NULL) {
82 			if (!applies(cp)) {
83 				cp = cp->next;
84 				continue;
85 			}
86 			switch (cp->code) {
87 			case '{':
88 				cp = cp->u.c;
89 				goto redirect;
90 			case 'a':
91 				if (appendx >= appendnum)
92 					appends = xrealloc(appends,
93 					    sizeof(struct s_appends) *
94 					    (appendnum *= 2));
95 				appends[appendx].type = AP_STRING;
96 				appends[appendx].s = cp->t;
97 				appendx++;
98 				break;
99 			case 'b':
100 				cp = cp->u.c;
101 				goto redirect;
102 			case 'c':
103 				pd = 1;
104 				psl = 0;
105 				if (cp->a2 == NULL || lastaddr)
106 					(void)printf("%s", cp->t);
107 				break;
108 			case 'd':
109 				if (pd)
110 					goto new;
111 				pd = 1;
112 				goto new;
113 			case 'D':
114 				if (pd)
115 					goto new;
116 				if ((p = strchr(ps, '\n')) == NULL)
117 					pd = 1;
118 				else {
119 					psl -= (p - ps) - 1;
120 					memmove(ps, p + 1, psl);
121 				}
122 				goto new;
123 			case 'g':
124 				ps = hs;
125 				psl = hsl;
126 				break;
127 			case 'G':
128 				cspace(&PS, hs, hsl, 1);
129 				break;
130 			case 'h':
131 				cspace(&HS, ps, psl, 0);
132 				break;
133 			case 'H':
134 				cspace(&HS, ps, psl, 1);
135 				break;
136 			case 'i':
137 				(void)printf("%s", cp->t);
138 				break;
139 			case 'l':
140 				lputs(ps);
141 				break;
142 			case 'n':
143 				if (!nflag && !pd)
144 					(void)printf("%s\n", ps);
145 				flush_appends();
146 				ps = mf_fgets(&psl);
147 #ifdef HISTORIC_PRACTICE
148 				if (ps == NULL)
149 					exit(0);
150 #endif
151 				pd = 0;
152 				break;
153 			case 'N':
154 				flush_appends();
155 				if (ps != PS.back)
156 					cspace(&PS, NULL, 0, 0);
157 				if ((p = mf_fgets(&len)) == NULL) {
158 					if (!nflag && !pd)
159 						(void)printf("%s\n", ps);
160 					exit(0);
161 				}
162 				cspace(&PS, p, len, 1);
163 				break;
164 			case 'p':
165 				if (pd)
166 					break;
167 				(void)printf("%s\n", ps);
168 				break;
169 			case 'P':
170 				if (pd)
171 					break;
172 				if ((p = strchr(ps, '\n')) != NULL) {
173 					oldc = *p;
174 					*p = '\0';
175 				}
176 				(void)printf("%s\n", ps);
177 				if (p != NULL)
178 					*p = oldc;
179 				break;
180 			case 'q':
181 				if (!nflag && !pd)
182 					(void)printf("%s\n", ps);
183 				flush_appends();
184 				exit(0);
185 			case 'r':
186 				if (appendx >= appendnum)
187 					appends = xrealloc(appends,
188 					    sizeof(struct s_appends) *
189 					    (appendnum *= 2));
190 				appends[appendx].type = AP_FILE;
191 				appends[appendx].s = cp->t;
192 				appendx++;
193 				break;
194 			case 's':
195 				sdone = substitute(cp);
196 				break;
197 			case 't':
198 				if (sdone) {
199 					sdone = 0;
200 					cp = cp->u.c;
201 					goto redirect;
202 				}
203 				break;
204 			case 'w':
205 				if (pd)
206 					break;
207 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
208 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
209 				    DEFFILEMODE)) == -1)
210 					err(FATAL, "%s: %s\n",
211 					    cp->t, strerror(errno));
212 				iov[0].iov_base = ps;
213 				iov[0].iov_len = psl;
214 				if (writev(cp->u.fd, iov, 2) != psl + 1)
215 					err(FATAL, "%s: %s\n",
216 					    cp->t, strerror(errno));
217 				break;
218 			case 'x':
219 				tspace = PS;
220 				PS = HS;
221 				HS = tspace;
222 				break;
223 			case 'y':
224 				if (pd)
225 					break;
226 				for (p = ps, len = psl; len--; ++p)
227 					*p = cp->u.y[*p];
228 				break;
229 			case ':':
230 			case '}':
231 				break;
232 			case '=':
233 				(void)printf("%lu\n", linenum);
234 			}
235 			cp = cp->next;
236 		} /* for all cp */
237 
238 new:		if (!nflag && !pd)
239 			(void)printf("%s\n", ps);
240 		flush_appends();
241 	} /* for all lines */
242 }
243 
244 /*
245  * TRUE if the address passed matches the current program state
246  * (lastline, linenumber, ps).
247  */
248 #define	MATCH(a)							\
249 	(a)->type == AT_RE ?						\
250 	    regexec_e((a)->u.r, ps, 0, NULL, 0) :			\
251 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
252 
253 /*
254  * Return TRUE if the command applies to the current line.  Sets the inrange
255  * flag to process ranges.  Interprets the non-select (``!'') flag.
256  */
257 static inline int
258 applies(cp)
259 	struct s_command *cp;
260 {
261 	int r;
262 
263 	lastaddr = 0;
264 	if (cp->a1 == NULL && cp->a2 == NULL)
265 		r = 1;
266 	else if (cp->a2)
267 		if (cp->inrange) {
268 			if (MATCH(cp->a2)) {
269 				cp->inrange = 0;
270 				lastaddr = 1;
271 			}
272 			r = 1;
273 		} else if (MATCH(cp->a1)) {
274 			/*
275 			 * If the second address is a number less than or
276 			 * equal to the line number first selected, only
277 			 * one line shall be selected.
278 			 *	-- POSIX 1003.2
279 			 */
280 			if (cp->a2->type == AT_LINE &&
281 			    linenum >= cp->a2->u.l)
282 				lastaddr = 1;
283 			else
284 				cp->inrange = 1;
285 			r = 1;
286 		} else
287 			r = 0;
288 	else
289 		r = MATCH(cp->a1);
290 	return (cp->nonsel ? ! r : r);
291 }
292 
293 /*
294  * substitute --
295  *	Do substitutions in the pattern space.  Currently, we build a
296  *	copy of the new pattern space in the substitute space structure
297  *	and then swap them.
298  */
299 static int
300 substitute(cp)
301 	struct s_command *cp;
302 {
303 	SPACE tspace;
304 	regex_t *re;
305 	size_t nsub;
306 	int n, re_off;
307 	char *endp, *s;
308 
309 	s = ps;
310 	re = cp->u.s->re;
311 	if (re == NULL) {
312 		nsub = 1;
313 		if (defpreg != NULL && cp->u.s->maxbref > defnmatch) {
314 			linenum = cp->u.s->linenum;
315 			err(COMPILE, "\\%d not defined in the RE",
316 			    cp->u.s->maxbref);
317 		}
318 	} else
319 		nsub = re->re_nsub + 1;
320 	if (!regexec_e(re, s, nsub, cp->u.s->pmatch, 0))
321 		return (0);
322 
323 	SS.len = 0;				/* Clean substitute space. */
324 	n = cp->u.s->n;
325 	switch (n) {
326 	case 0:					/* Global */
327 		do {
328 			/* Locate start of replaced string. */
329 			re_off = cp->u.s->pmatch[0].rm_so;
330 			/* Locate end of replaced string + 1. */
331 			endp = s + cp->u.s->pmatch[0].rm_eo;
332 			/* Copy leading retained string. */
333 			cspace(&SS, s, re_off, 0);
334 			/* Add in regular expression. */
335 			regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
336 			/* Move past this match. */
337 			s += cp->u.s->pmatch[0].rm_eo;
338 		} while(regexec_e(re, s, nsub, cp->u.s->pmatch, REG_NOTBOL));
339 		/* Copy trailing retained string. */
340 		cspace(&SS, s, strlen(s), 0);
341 		break;
342 	default:				/* Nth occurrence */
343 		while (--n) {
344 			s += cp->u.s->pmatch[0].rm_eo;
345 			if (!regexec_e(re,
346 			    s, nsub, cp->u.s->pmatch, REG_NOTBOL))
347 				return (0);
348 		}
349 		/* FALLTHROUGH */
350 	case 1:					/* 1st occurrence */
351 		/* Locate start of replaced string. */
352 		re_off = cp->u.s->pmatch[0].rm_so + s - ps;
353 		/* Copy leading retained string. */
354 		cspace(&SS, ps, re_off, 0);
355 		/* Add in regular expression. */
356 		regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
357 		/* Copy trailing retained string. */
358 		s += cp->u.s->pmatch[0].rm_eo;
359 		cspace(&SS, s, strlen(s), 0);
360 		break;
361 	}
362 
363 	/*
364 	 * Swap the substitute space and the pattern space, and make sure
365 	 * that any leftover pointers into stdio memory get lost.
366 	 */
367 	tspace = PS;
368 	PS = SS;
369 	SS = tspace;
370 	SS.space = SS.back;
371 
372 	/* Handle the 'p' flag. */
373 	if (cp->u.s->p)
374 		(void)printf("%s\n", ps);
375 
376 	/* Handle the 'w' flag. */
377 	if (cp->u.s->wfile && !pd) {
378 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
379 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
380 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
381 		iov[0].iov_base = ps;
382 		iov[0].iov_len = psl;
383 		if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
384 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
385 	}
386 	return (1);
387 }
388 
389 /*
390  * Flush append requests.  Always called before reading a line,
391  * therefore it also resets the substitution done (sdone) flag.
392  */
393 static void
394 flush_appends()
395 {
396 	FILE *f;
397 	int count, i;
398 	char buf[8 * 1024];
399 
400 	for (i = 0; i < appendx; i++)
401 		switch (appends[i].type) {
402 		case AP_STRING:
403 			(void)printf("%s", appends[i].s);
404 			break;
405 		case AP_FILE:
406 			/*
407 			 * Read files probably shouldn't be cached.  Since
408 			 * it's not an error to read a non-existent file,
409 			 * it's possible that another program is interacting
410 			 * with the sed script through the file system.  It
411 			 * would be truly bizarre, but possible.  It's probably
412 			 * not that big a performance win, anyhow.
413 			 */
414 			if ((f = fopen(appends[i].s, "r")) == NULL)
415 				break;
416 			while (count = fread(buf, 1, sizeof(buf), f))
417 				(void)fwrite(buf, 1, count, stdout);
418 			(void)fclose(f);
419 			break;
420 		}
421 	if (ferror(stdout))
422 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
423 	appendx = 0;
424 	sdone = 0;
425 }
426 
427 static void
428 lputs(s)
429 	register char *s;
430 {
431 	register int count;
432 	register char *escapes, *p;
433 	struct winsize win;
434 	static int termwidth = -1;
435 
436 	if (termwidth == -1)
437 		if (p = getenv("COLUMNS"))
438 			termwidth = atoi(p);
439 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
440 		    win.ws_col > 0)
441 			termwidth = win.ws_col;
442 		else
443 			termwidth = 60;
444 
445 	for (count = 0; *s; ++s) {
446 		if (count >= termwidth) {
447 			(void)printf("\\\n");
448 			count = 0;
449 		}
450 		if (isascii(*s) && isprint(*s) && *s != '\\') {
451 			(void)putchar(*s);
452 			count++;
453 		} else {
454 			escapes = "\\\a\b\f\n\r\t\v";
455 			(void)putchar('\\');
456 			if (p = strchr(escapes, *s)) {
457 				(void)putchar("\\abfnrtv"[p - escapes]);
458 				count += 2;
459 			} else {
460 				(void)printf("%03o", (u_char)*s);
461 				count += 4;
462 			}
463 		}
464 	}
465 	(void)putchar('$');
466 	(void)putchar('\n');
467 	if (ferror(stdout))
468 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
469 }
470 
471 static inline int
472 regexec_e(preg, string, nmatch, pmatch, eflags)
473 	regex_t *preg;
474 	const char *string;
475 	size_t nmatch;
476 	regmatch_t pmatch[];
477 	int eflags;
478 {
479 	int eval;
480 
481 	if (preg == NULL) {
482 		if (defpreg == NULL)
483 			err(FATAL, "first RE may not be empty");
484 	} else {
485 		defpreg = preg;
486 		defnmatch = nmatch;
487 	}
488 
489 	eval = regexec(defpreg,
490 	    string, pmatch == NULL ? 0 : defnmatch, pmatch, eflags);
491 	switch(eval) {
492 	case 0:
493 		return (1);
494 	case REG_NOMATCH:
495 		return (0);
496 	}
497 	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
498 	/* NOTREACHED */
499 }
500 
501 /*
502  * regsub - perform substitutions after a regexp match
503  * Based on a routine by Henry Spencer
504  */
505 static void
506 regsub(pmatch, string, src, sp)
507 	regmatch_t *pmatch;
508 	char *string, *src;
509 	SPACE *sp;
510 {
511 	register int len, no;
512 	register char c, *dst;
513 
514 #define	NEEDSP(reqlen)							\
515 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
516 		sp->blen += (reqlen) + 1024;				\
517 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
518 		dst = sp->space + sp->len;				\
519 	}
520 
521 	dst = sp->space + sp->len;
522 	while ((c = *src++) != '\0') {
523 		if (c == '&')
524 			no = 0;
525 		else if (c == '\\' && isdigit(*src))
526 			no = *src++ - '0';
527 		else
528 			no = -1;
529 		if (no < 0) {		/* Ordinary character. */
530  			if (c == '\\' && (*src == '\\' || *src == '&'))
531  				c = *src++;
532 			NEEDSP(1);
533  			*dst++ = c;
534 			++sp->len;
535  		} else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) {
536 			len = pmatch[no].rm_eo - pmatch[no].rm_so;
537 			NEEDSP(len);
538 			memmove(dst, string + pmatch[no].rm_so, len);
539 			dst += len;
540 			sp->len += len;
541 		}
542 	}
543 	NEEDSP(1);
544 	*dst = '\0';
545 }
546 
547 /*
548  * aspace --
549  *	Append the source space to the destination space, allocating new
550  *	space as necessary.
551  */
552 static void
553 cspace(sp, p, len, append)
554 	SPACE *sp;
555 	char *p;
556 	size_t len;
557 	int append;
558 {
559 	size_t tlen;
560 	int needcopy;
561 
562 	/* Current pointer may point to something else at the moment. */
563 	needcopy = sp->space != sp->back;
564 
565 	/*
566 	 * Make sure SPACE has enough memory and ramp up quickly.
567 	 * Add in two extra bytes, one for the newline, one for a
568 	 * terminating NULL.
569 	 */
570 	tlen = sp->len + len + 2;
571 	if (tlen > sp->blen) {
572 		sp->blen = tlen + 1024;
573 		sp->back = xrealloc(sp->back, sp->blen);
574 	}
575 
576 	if (needcopy)
577 		memmove(sp->back, sp->space, sp->len + 1);
578 	sp->space = sp->back;
579 
580 	/* May just be copying out of a stdio buffer. */
581 	if (len == NULL)
582 		return;
583 
584 	/* Append a separating newline. */
585 	if (append)
586 		sp->space[sp->len++] = '\n';
587 
588 	/* Append the new stuff, plus its terminating NULL. */
589 	memmove(sp->space + sp->len, p, len + 1);
590 	sp->len += len;
591 }
592 
593 /*
594  * Close all cached opened files and report any errors
595  */
596 void
597 cfclose(cp)
598 	register struct s_command *cp;
599 {
600 
601 	for (; cp != NULL; cp = cp->next)
602 		switch(cp->code) {
603 		case 's':
604 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
605 				err(FATAL,
606 				    "%s: %s", cp->u.s->wfile, strerror(errno));
607 			cp->u.s->wfd = -1;
608 			break;
609 		case 'w':
610 			if (cp->u.fd != -1 && close(cp->u.fd))
611 				err(FATAL, "%s: %s", cp->t, strerror(errno));
612 			cp->u.fd = -1;
613 			break;
614 		case '{':
615 			cfclose(cp->u.c);
616 			break;
617 		}
618 }
619