xref: /csrg-svn/usr.bin/sed/process.c (revision 56070)
1 /*-
2  * Copyright (c) 1992 Diomidis Spinellis.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Diomidis Spinellis of Imperial College, University of London.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c	5.5 (Berkeley) 08/27/92";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "defs.h"
32 #include "extern.h"
33 
34 typedef struct {
35 	char *space;		/* Current space pointer. */
36 	size_t len;		/* Current length. */
37 	int deleted;		/* If deleted. */
38 	char *back;		/* Backing memory. */
39 	size_t blen;		/* Backing memory length. */
40 } SPACE;
41 static SPACE HS, PS, SS;
42 #define	pd		PS.deleted
43 #define	ps		PS.space
44 #define	psl		PS.len
45 #define	hs		HS.space
46 #define	hsl		HS.len
47 
48 static inline int	 applies __P((struct s_command *));
49 static void		 cspace __P((SPACE *, char *, size_t, int));
50 static void		 flush_appends __P((void));
51 static void		 lputs __P((char *));
52 static inline int	 regexec_e __P((regex_t *, const char *,
53 			    size_t, regmatch_t [], int));
54 static void		 regsub __P((regmatch_t *, char *, char *, SPACE *));
55 static int		 substitute __P((struct s_command *));
56 
57 struct s_appends *appends;	/* Array of pointers to strings to append. */
58 static int appendx;		/* Index into appends array. */
59 int appendnum;			/* Size of appends array. */
60 
61 static int lastaddr;		/* Set by applies if last address of a range. */
62 static int sdone;		/* If any substitutes since last line input. */
63 				/* Iov structure for 'w' commands. */
64 static struct iovec iov[2] = { NULL, 0, "\n", 1 };
65 
66 static regex_t *defpreg;
67 static size_t defnmatch;
68 
69 void
70 process()
71 {
72 	struct s_command *cp;
73 	SPACE tspace;
74 	size_t len;
75 	char oldc, *p;
76 
77 	for (linenum = 0; ps = mf_fgets(&psl);) {
78 		pd = 0;
79 		cp = prog;
80 redirect:
81 		while (cp != NULL) {
82 			if (!applies(cp)) {
83 				cp = cp->next;
84 				continue;
85 			}
86 			switch (cp->code) {
87 			case '{':
88 				cp = cp->u.c;
89 				goto redirect;
90 			case 'a':
91 				if (appendx >= appendnum)
92 					appends = xrealloc(appends,
93 					    sizeof(struct s_appends) *
94 					    (appendnum *= 2));
95 				appends[appendx].type = AP_STRING;
96 				appends[appendx].s = cp->t;
97 				appendx++;
98 				break;
99 			case 'b':
100 				cp = cp->u.c;
101 				goto redirect;
102 			case 'c':
103 				pd = 1;
104 				psl = 0;
105 				if (cp->a2 == NULL || lastaddr)
106 					(void)printf("%s", cp->t);
107 				break;
108 			case 'd':
109 				pd = 1;
110 				goto new;
111 			case 'D':
112 				if (pd)
113 					goto new;
114 				if ((p = strchr(ps, '\n')) == NULL)
115 					pd = 1;
116 				else {
117 					psl -= (p - ps) - 1;
118 					memmove(ps, p + 1, psl);
119 				}
120 				goto new;
121 			case 'g':
122 				ps = hs;
123 				psl = hsl;
124 				break;
125 			case 'G':
126 				cspace(&PS, hs, hsl, 1);
127 				break;
128 			case 'h':
129 				cspace(&HS, ps, psl, 0);
130 				break;
131 			case 'H':
132 				cspace(&HS, ps, psl, 1);
133 				break;
134 			case 'i':
135 				(void)printf("%s", cp->t);
136 				break;
137 			case 'l':
138 				lputs(ps);
139 				break;
140 			case 'n':
141 				if (!nflag && !pd)
142 					(void)printf("%s\n", ps);
143 				flush_appends();
144 				ps = mf_fgets(&psl);
145 #ifdef HISTORIC_PRACTICE
146 				if (ps == NULL)
147 					exit(0);
148 #endif
149 				pd = 0;
150 				break;
151 			case 'N':
152 				flush_appends();
153 				if (ps != PS.back)
154 					cspace(&PS, NULL, 0, 0);
155 				if ((p = mf_fgets(&len)) == NULL) {
156 					if (!nflag && !pd)
157 						(void)printf("%s\n", ps);
158 					exit(0);
159 				}
160 				cspace(&PS, p, len, 1);
161 				break;
162 			case 'p':
163 				if (pd)
164 					break;
165 				(void)printf("%s\n", ps);
166 				break;
167 			case 'P':
168 				if (pd)
169 					break;
170 				if ((p = strchr(ps, '\n')) != NULL) {
171 					oldc = *p;
172 					*p = '\0';
173 				}
174 				(void)printf("%s\n", ps);
175 				if (p != NULL)
176 					*p = oldc;
177 				break;
178 			case 'q':
179 				if (!nflag && !pd)
180 					(void)printf("%s\n", ps);
181 				flush_appends();
182 				exit(0);
183 			case 'r':
184 				if (appendx >= appendnum)
185 					appends = xrealloc(appends,
186 					    sizeof(struct s_appends) *
187 					    (appendnum *= 2));
188 				appends[appendx].type = AP_FILE;
189 				appends[appendx].s = cp->t;
190 				appendx++;
191 				break;
192 			case 's':
193 				sdone = substitute(cp);
194 				break;
195 			case 't':
196 				if (sdone) {
197 					sdone = 0;
198 					cp = cp->u.c;
199 					goto redirect;
200 				}
201 				break;
202 			case 'w':
203 				if (pd)
204 					break;
205 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
206 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
207 				    DEFFILEMODE)) == -1)
208 					err(FATAL, "%s: %s\n",
209 					    cp->t, strerror(errno));
210 				iov[0].iov_base = ps;
211 				iov[0].iov_len = psl;
212 				if (writev(cp->u.fd, iov, 2) != psl + 1)
213 					err(FATAL, "%s: %s\n",
214 					    cp->t, strerror(errno));
215 				break;
216 			case 'x':
217 				tspace = PS;
218 				PS = HS;
219 				HS = tspace;
220 				break;
221 			case 'y':
222 				if (pd)
223 					break;
224 				for (p = ps, len = psl; len--; ++p)
225 					*p = cp->u.y[*p];
226 				break;
227 			case ':':
228 			case '}':
229 				break;
230 			case '=':
231 				(void)printf("%lu\n", linenum);
232 			}
233 			cp = cp->next;
234 		} /* for all cp */
235 
236 new:		if (!nflag && !pd)
237 			(void)printf("%s\n", ps);
238 		flush_appends();
239 	} /* for all lines */
240 }
241 
242 /*
243  * TRUE if the address passed matches the current program state
244  * (lastline, linenumber, ps).
245  */
246 #define	MATCH(a)							\
247 	(a)->type == AT_RE ?						\
248 	    regexec_e((a)->u.r, ps, 0, NULL, 0) :			\
249 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
250 
251 /*
252  * Return TRUE if the command applies to the current line.  Sets the inrange
253  * flag to process ranges.  Interprets the non-select (``!'') flag.
254  */
255 static inline int
256 applies(cp)
257 	struct s_command *cp;
258 {
259 	int r;
260 
261 	lastaddr = 0;
262 	if (cp->a1 == NULL && cp->a2 == NULL)
263 		r = 1;
264 	else if (cp->a2)
265 		if (cp->inrange) {
266 			if (MATCH(cp->a2)) {
267 				cp->inrange = 0;
268 				lastaddr = 1;
269 			}
270 			r = 1;
271 		} else if (MATCH(cp->a1)) {
272 			/*
273 			 * If the second address is a number less than or
274 			 * equal to the line number first selected, only
275 			 * one line shall be selected.
276 			 *	-- POSIX 1003.2
277 			 */
278 			if (cp->a2->type == AT_LINE &&
279 			    linenum >= cp->a2->u.l)
280 				lastaddr = 1;
281 			else
282 				cp->inrange = 1;
283 			r = 1;
284 		} else
285 			r = 0;
286 	else
287 		r = MATCH(cp->a1);
288 	return (cp->nonsel ? ! r : r);
289 }
290 
291 /*
292  * substitute --
293  *	Do substitutions in the pattern space.  Currently, we build a
294  *	copy of the new pattern space in the substitute space structure
295  *	and then swap them.
296  */
297 static int
298 substitute(cp)
299 	struct s_command *cp;
300 {
301 	SPACE tspace;
302 	regex_t *re;
303 	size_t nsub;
304 	int n, re_off;
305 	char *endp, *s;
306 
307 	s = ps;
308 	re = cp->u.s->re;
309 	if (re == NULL) {
310 		nsub = 1;
311 		if (defpreg != NULL && cp->u.s->maxbref > defnmatch) {
312 			linenum = cp->u.s->linenum;
313 			err(COMPILE, "\\%d not defined in the RE",
314 			    cp->u.s->maxbref);
315 		}
316 	} else
317 		nsub = re->re_nsub + 1;
318 	if (!regexec_e(re, s, nsub, cp->u.s->pmatch, 0))
319 		return (0);
320 
321 	SS.len = 0;				/* Clean substitute space. */
322 	n = cp->u.s->n;
323 	switch (n) {
324 	case 0:					/* Global */
325 		do {
326 			/* Locate start of replaced string. */
327 			re_off = cp->u.s->pmatch[0].rm_so;
328 			/* Locate end of replaced string + 1. */
329 			endp = s + cp->u.s->pmatch[0].rm_eo;
330 			/* Copy leading retained string. */
331 			cspace(&SS, s, re_off, 0);
332 			/* Add in regular expression. */
333 			regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
334 			/* Move past this match. */
335 			s += cp->u.s->pmatch[0].rm_eo;
336 		} while(regexec_e(re, s, nsub, cp->u.s->pmatch, REG_NOTBOL));
337 		/* Copy trailing retained string. */
338 		cspace(&SS, s, strlen(s), 0);
339 		break;
340 	default:				/* Nth occurrence */
341 		while (--n) {
342 			s += cp->u.s->pmatch[0].rm_eo;
343 			if (!regexec_e(re,
344 			    s, nsub, cp->u.s->pmatch, REG_NOTBOL))
345 				return (0);
346 		}
347 		/* FALLTHROUGH */
348 	case 1:					/* 1st occurrence */
349 		/* Locate start of replaced string. */
350 		re_off = cp->u.s->pmatch[0].rm_so + s - ps;
351 		/* Copy leading retained string. */
352 		cspace(&SS, ps, re_off, 0);
353 		/* Add in regular expression. */
354 		regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
355 		/* Copy trailing retained string. */
356 		s += cp->u.s->pmatch[0].rm_eo;
357 		cspace(&SS, s, strlen(s), 0);
358 		break;
359 	}
360 
361 	/*
362 	 * Swap the substitute space and the pattern space, and make sure
363 	 * that any leftover pointers into stdio memory get lost.
364 	 */
365 	tspace = PS;
366 	PS = SS;
367 	SS = tspace;
368 	SS.space = SS.back;
369 
370 	/* Handle the 'p' flag. */
371 	if (cp->u.s->p)
372 		(void)printf("%s\n", ps);
373 
374 	/* Handle the 'w' flag. */
375 	if (cp->u.s->wfile && !pd) {
376 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
377 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
378 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
379 		iov[0].iov_base = ps;
380 		iov[0].iov_len = psl;
381 		if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
382 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
383 	}
384 	return (1);
385 }
386 
387 /*
388  * Flush append requests.  Always called before reading a line,
389  * therefore it also resets the substitution done (sdone) flag.
390  */
391 static void
392 flush_appends()
393 {
394 	FILE *f;
395 	int count, i;
396 	char buf[8 * 1024];
397 
398 	for (i = 0; i < appendx; i++)
399 		switch (appends[i].type) {
400 		case AP_STRING:
401 			(void)printf("%s", appends[i].s);
402 			break;
403 		case AP_FILE:
404 			/*
405 			 * Read files probably shouldn't be cached.  Since
406 			 * it's not an error to read a non-existent file,
407 			 * it's possible that another program is interacting
408 			 * with the sed script through the file system.  It
409 			 * would be truly bizarre, but possible.  It's probably
410 			 * not that big a performance win, anyhow.
411 			 */
412 			if ((f = fopen(appends[i].s, "r")) == NULL)
413 				break;
414 			while (count = fread(buf, 1, sizeof(buf), f))
415 				(void)fwrite(buf, 1, count, stdout);
416 			(void)fclose(f);
417 			break;
418 		}
419 	if (ferror(stdout))
420 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
421 	appendx = 0;
422 	sdone = 0;
423 }
424 
425 static void
426 lputs(s)
427 	register char *s;
428 {
429 	register int count;
430 	register char *escapes, *p;
431 	struct winsize win;
432 	static int termwidth = -1;
433 
434 	if (termwidth == -1)
435 		if (p = getenv("COLUMNS"))
436 			termwidth = atoi(p);
437 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
438 		    win.ws_col > 0)
439 			termwidth = win.ws_col;
440 		else
441 			termwidth = 60;
442 
443 	for (count = 0; *s; ++s) {
444 		if (count >= termwidth) {
445 			(void)printf("\\\n");
446 			count = 0;
447 		}
448 		if (isascii(*s) && isprint(*s) && *s != '\\') {
449 			(void)putchar(*s);
450 			count++;
451 		} else {
452 			escapes = "\\\a\b\f\n\r\t\v";
453 			(void)putchar('\\');
454 			if (p = strchr(escapes, *s)) {
455 				(void)putchar("\\abfnrtv"[p - escapes]);
456 				count += 2;
457 			} else {
458 				(void)printf("%03o", (u_char)*s);
459 				count += 4;
460 			}
461 		}
462 	}
463 	(void)putchar('$');
464 	(void)putchar('\n');
465 	if (ferror(stdout))
466 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
467 }
468 
469 static inline int
470 regexec_e(preg, string, nmatch, pmatch, eflags)
471 	regex_t *preg;
472 	const char *string;
473 	size_t nmatch;
474 	regmatch_t pmatch[];
475 	int eflags;
476 {
477 	int eval;
478 
479 	if (preg == NULL) {
480 		if (defpreg == NULL)
481 			err(FATAL, "first RE may not be empty");
482 	} else {
483 		defpreg = preg;
484 		defnmatch = nmatch;
485 	}
486 
487 	eval = regexec(defpreg,
488 	    string, pmatch == NULL ? 0 : defnmatch, pmatch, eflags);
489 	switch(eval) {
490 	case 0:
491 		return (1);
492 	case REG_NOMATCH:
493 		return (0);
494 	}
495 	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
496 	/* NOTREACHED */
497 }
498 
499 /*
500  * regsub - perform substitutions after a regexp match
501  * Based on a routine by Henry Spencer
502  */
503 static void
504 regsub(pmatch, string, src, sp)
505 	regmatch_t *pmatch;
506 	char *string, *src;
507 	SPACE *sp;
508 {
509 	register int len, no;
510 	register char c, *dst;
511 
512 #define	NEEDSP(reqlen)							\
513 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
514 		sp->blen += (reqlen) + 1024;				\
515 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
516 		dst = sp->space + sp->len;				\
517 	}
518 
519 	dst = sp->space + sp->len;
520 	while ((c = *src++) != '\0') {
521 		if (c == '&')
522 			no = 0;
523 		else if (c == '\\' && isdigit(*src))
524 			no = *src++ - '0';
525 		else
526 			no = -1;
527 		if (no < 0) {		/* Ordinary character. */
528  			if (c == '\\' && (*src == '\\' || *src == '&'))
529  				c = *src++;
530 			NEEDSP(1);
531  			*dst++ = c;
532 			++sp->len;
533  		} else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) {
534 			len = pmatch[no].rm_eo - pmatch[no].rm_so;
535 			NEEDSP(len);
536 			memmove(dst, string + pmatch[no].rm_so, len);
537 			dst += len;
538 			sp->len += len;
539 		}
540 	}
541 	NEEDSP(1);
542 	*dst = '\0';
543 }
544 
545 /*
546  * aspace --
547  *	Append the source space to the destination space, allocating new
548  *	space as necessary.
549  */
550 static void
551 cspace(sp, p, len, append)
552 	SPACE *sp;
553 	char *p;
554 	size_t len;
555 	int append;
556 {
557 	size_t tlen;
558 	int needcopy;
559 
560 	/* Current pointer may point to something else at the moment. */
561 	needcopy = sp->space != sp->back;
562 
563 	/*
564 	 * Make sure SPACE has enough memory and ramp up quickly.
565 	 * Add in two extra bytes, one for the newline, one for a
566 	 * terminating NULL.
567 	 */
568 	tlen = sp->len + len + 2;
569 	if (tlen > sp->blen) {
570 		sp->blen = tlen + 1024;
571 		sp->back = xrealloc(sp->back, sp->blen);
572 	}
573 
574 	if (needcopy)
575 		memmove(sp->back, sp->space, sp->len + 1);
576 	sp->space = sp->back;
577 
578 	/* May just be copying out of a stdio buffer. */
579 	if (len == NULL)
580 		return;
581 
582 	/* Append a separating newline. */
583 	if (append)
584 		sp->space[sp->len++] = '\n';
585 
586 	/* Append the new stuff, plus its terminating NULL. */
587 	memmove(sp->space + sp->len, p, len + 1);
588 	sp->len += len;
589 }
590 
591 /*
592  * Close all cached opened files and report any errors
593  */
594 void
595 cfclose(cp)
596 	register struct s_command *cp;
597 {
598 
599 	for (; cp != NULL; cp = cp->next)
600 		switch(cp->code) {
601 		case 's':
602 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
603 				err(FATAL,
604 				    "%s: %s", cp->u.s->wfile, strerror(errno));
605 			cp->u.s->wfd = -1;
606 			break;
607 		case 'w':
608 			if (cp->u.fd != -1 && close(cp->u.fd))
609 				err(FATAL, "%s: %s", cp->t, strerror(errno));
610 			cp->u.fd = -1;
611 			break;
612 		case '{':
613 			cfclose(cp->u.c);
614 			break;
615 		}
616 }
617