xref: /csrg-svn/usr.bin/sed/process.c (revision 55997)
1 /*-
2  * Copyright (c) 1992 Diomidis Spinellis.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Diomidis Spinellis of Imperial College, University of London.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c	5.1 (Berkeley) 08/23/92";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "defs.h"
32 #include "extern.h"
33 
34 typedef struct {
35 	char *space;		/* Current space pointer. */
36 	size_t len;		/* Current length. */
37 	int deleted;		/* If deleted. */
38 	char *back;		/* Backing memory. */
39 	size_t blen;		/* Backing memory length. */
40 } SPACE;
41 static SPACE HS, PS, SS;
42 #define	pd		PS.deleted
43 #define	ps		PS.space
44 #define	psl		PS.len
45 #define	hs		HS.space
46 #define	hsl		HS.len
47 
48 static inline int	 applies __P((struct s_command *));
49 static void		 cspace __P((SPACE *, char *, size_t, int));
50 static void		 flush_appends __P((void));
51 static void		 lputs __P((char *));
52 static inline int	 match __P((struct s_addr *));
53 static int		 regexec_check __P((regex_t *, const char *,
54 			    int, regmatch_t[], int));
55 static void		 regsub __P((regmatch_t *, char *, char *, SPACE *));
56 static int		 substitute __P((struct s_command *));
57 
58 struct s_appends *appends;	/* Array of pointers to strings to append. */
59 static int appendx;		/* Index into appends array. */
60 int appendnum;			/* Size of appends array. */
61 
62 static int lastaddr;		/* Set by applies if last address of a range. */
63 static int sdone;		/* If any substitutes since last line input. */
64 				/* Iov structure for 'w' commands. */
65 static struct iovec iov[2] = { NULL, 0, "\n", 1 };
66 
67 void
68 process()
69 {
70 	struct s_command *cp;
71 	SPACE tspace;
72 	size_t len;
73 	char oldc, *p;
74 
75 	for (linenum = 0; ps = mf_fgets(&psl);) {
76 		pd = 0;
77 		cp = prog;
78 redirect:
79 		while (cp != NULL) {
80 			if (!applies(cp)) {
81 				cp = cp->next;
82 				continue;
83 			}
84 			switch (cp->code) {
85 			case '{':
86 				cp = cp->u.c;
87 				goto redirect;
88 			case 'a':
89 				if (appendx >= appendnum)
90 					appends = xrealloc(appends,
91 					    sizeof(struct s_appends) *
92 					    (appendnum *= 2));
93 				appends[appendx].type = AP_STRING;
94 				appends[appendx].s = cp->t;
95 				appendx++;
96 				break;
97 			case 'b':
98 				cp = cp->u.c;
99 				goto redirect;
100 			case 'c':
101 				pd = 1;
102 				psl = 0;
103 				if (cp->a2 == NULL || lastaddr)
104 					(void)printf("%s", cp->t);
105 				break;
106 			case 'd':
107 				if (pd)
108 					goto new;
109 				psl = 0;
110 				ps[0] = '\0';
111 				goto new;
112 			case 'D':
113 				if (pd)
114 					goto new;
115 				if ((p = strchr(ps, '\n')) == NULL) {
116 					psl = 0;
117 					ps[0] = '\0';
118 				} else {
119 					psl -= (p - ps) - 1;
120 					memmove(ps, p + 1, psl);
121 				}
122 				goto new;
123 			case 'g':
124 				ps = hs;
125 				psl = hsl;
126 				break;
127 			case 'G':
128 				cspace(&PS, hs, hsl, 1);
129 				break;
130 			case 'h':
131 				cspace(&HS, ps, psl, 0);
132 				break;
133 			case 'H':
134 				cspace(&HS, ps, psl, 1);
135 				break;
136 			case 'i':
137 				(void)printf("%s", cp->t);
138 				break;
139 			case 'l':
140 				lputs(ps);
141 				break;
142 			case 'n':
143 				if (!nflag && !pd)
144 					(void)printf("%s\n", ps);
145 				flush_appends();
146 				ps = mf_fgets(&psl);
147 #ifdef HISTORIC_PRACTICE
148 				if (ps == NULL)
149 					exit(0);
150 #endif
151 				pd = 0;
152 				break;
153 			case 'N':
154 				flush_appends();
155 				if (ps != PS.back)
156 					cspace(&PS, NULL, 0, 0);
157 				if ((p = mf_fgets(&len)) == NULL) {
158 					if (!nflag && !pd)
159 						(void)printf("%s\n", ps);
160 					exit(0);
161 				}
162 				cspace(&PS, p, len, 1);
163 				break;
164 			case 'p':
165 				if (pd)
166 					break;
167 				(void)printf("%s\n", ps);
168 				break;
169 			case 'P':
170 				if (pd)
171 					break;
172 				if ((p = strchr(ps, '\n')) != NULL) {
173 					oldc = *p;
174 					*p = '\0';
175 				}
176 				(void)printf("%s\n", ps);
177 				if (p != NULL)
178 					*p = oldc;
179 				break;
180 			case 'q':
181 				if (!nflag && !pd)
182 					(void)printf("%s\n", ps);
183 				flush_appends();
184 				exit(0);
185 			case 'r':
186 				if (appendx >= appendnum)
187 					appends = xrealloc(appends,
188 					    sizeof(struct s_appends) *
189 					    (appendnum *= 2));
190 				appends[appendx].type = AP_FILE;
191 				appends[appendx].s = cp->t;
192 				appendx++;
193 				break;
194 			case 's':
195 				sdone = substitute(cp);
196 				break;
197 			case 't':
198 				if (sdone) {
199 					sdone = 0;
200 					cp = cp->u.c;
201 					goto redirect;
202 				}
203 				break;
204 			case 'w':
205 				if (pd)
206 					break;
207 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
208 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
209 				    DEFFILEMODE)) == -1)
210 					err(FATAL, "%s: %s\n",
211 					    cp->t, strerror(errno));
212 				iov[0].iov_base = ps;
213 				iov[0].iov_len = psl;
214 				if (writev(cp->u.fd, iov, 2) != psl + 1)
215 					err(FATAL, "%s: %s\n",
216 					    cp->t, strerror(errno));
217 				break;
218 			case 'x':
219 				tspace = PS;
220 				PS = HS;
221 				HS = tspace;
222 				break;
223 			case 'y':
224 				if (pd)
225 					break;
226 				for (p = ps, len = psl; len--; ++p)
227 					*p = cp->u.y[*p];
228 				break;
229 			case ':':
230 			case '}':
231 				break;
232 			case '=':
233 				(void)printf("%lu\n", linenum);
234 			}
235 			cp = cp->next;
236 		} /* for all cp */
237 
238 new:		if (!nflag && !pd)
239 			(void)printf("%s\n", ps);
240 		flush_appends();
241 	} /* for all lines */
242 }
243 
244 /*
245  * Return TRUE if the command applies to the current line.  Sets the inrange
246  * flag to process ranges.  Interprets the non-select (``!'') flag.
247  */
248 static inline int
249 applies(cp)
250 	struct s_command *cp;
251 {
252 	int r;
253 
254 	lastaddr = 0;
255 	if (cp->a1 == NULL && cp->a2 == NULL)
256 		r = 1;
257 	else if (cp->a2)
258 		if (cp->inrange) {
259 			if (match(cp->a2)) {
260 				cp->inrange = 0;
261 				lastaddr = 1;
262 			}
263 			r = 1;
264 		} else if (match(cp->a1)) {
265 			/*
266 			 * If the second address is a number less than or
267 			 * equal to the line number first selected, only
268 			 * one line shall be selected.
269 			 *	-- POSIX 1003.2
270 			 */
271 			if (cp->a2->type == AT_LINE &&
272 			    linenum >= cp->a2->u.l)
273 				lastaddr = 1;
274 			else
275 				cp->inrange = 1;
276 			r = 1;
277 		} else
278 			r = 0;
279 	else
280 		r = match(cp->a1);
281 	return (cp->nonsel ? ! r : r);
282 }
283 
284 /*
285  * Return TRUE if the address passed matches the current program
286  * state (linenumber, ps, lastline)
287  */
288 static int inline
289 match(a)
290 	struct s_addr *a;
291 {
292 	int eval;
293 
294 	switch (a->type) {
295 	case AT_RE:
296 		switch (eval = regexec(a->u.r, ps, 0, NULL, 0)) {
297 		case 0:
298 			return (1);
299 		case REG_NOMATCH:
300 			return (0);
301 		default:
302 			err(FATAL, "RE error: %s", strregerror(eval, a->u.r));
303 		}
304 	case AT_LINE:
305 		return (linenum == a->u.l);
306 	case AT_LAST:
307 		return (lastline);
308 	}
309 	/* NOTREACHED */
310 }
311 
312 /*
313  * substitute --
314  *	Do substitutions in the pattern space.  Currently, we build a
315  *	copy of the new pattern space in the substitute space structure
316  *	and then swap them.
317  */
318 static int
319 substitute(cp)
320 	struct s_command *cp;
321 {
322 	SPACE tspace;
323 	static regex_t *re;
324 	int n, re_off;
325 	char *endp, *s;
326 
327 	s = ps;
328 	re = &cp->u.s->re;
329 	if (regexec_check(re,
330 	    s, re->re_nsub + 1, cp->u.s->pmatch, 0) == REG_NOMATCH)
331 		return (0);
332 
333 	SS.len = 0;				/* Clean substitute space. */
334 	n = cp->u.s->n;
335 	switch (n) {
336 	case 0:					/* Global */
337 		do {
338 			/* Locate start of replaced string. */
339 			re_off = cp->u.s->pmatch[0].rm_so;
340 			/* Locate end of replaced string + 1. */
341 			endp = s + cp->u.s->pmatch[0].rm_eo;
342 			/* Copy leading retained string. */
343 			cspace(&SS, s, re_off, 0);
344 			/* Add in regular expression. */
345 			regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
346 			/* Move past this match. */
347 			s += cp->u.s->pmatch[0].rm_eo;
348 		} while(regexec_check(re, s, re->re_nsub + 1,
349 		    cp->u.s->pmatch, REG_NOTBOL) != REG_NOMATCH);
350 		/* Copy trailing retained string. */
351 		cspace(&SS, s, strlen(s), 0);
352 		break;
353 	default:				/* Nth occurrence */
354 		while (--n) {
355 			s += cp->u.s->pmatch[0].rm_eo;
356 			if (regexec_check(re, s, re->re_nsub + 1,
357 			    cp->u.s->pmatch, REG_NOTBOL) == REG_NOMATCH)
358 				return (0);
359 		}
360 		/* FALLTHROUGH */
361 	case 1:					/* 1st occurrence */
362 		/* Locate start of replaced string. */
363 		re_off = cp->u.s->pmatch[0].rm_so + s - ps;
364 		/* Copy leading retained string. */
365 		cspace(&SS, ps, re_off, 0);
366 		/* Add in regular expression. */
367 		regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
368 		/* Copy trailing retained string. */
369 		s += cp->u.s->pmatch[0].rm_eo;
370 		cspace(&SS, s, strlen(s), 0);
371 		break;
372 	}
373 
374 	/*
375 	 * Swap the substitute space and the pattern space, and make sure
376 	 * that any leftover pointers into stdio memory get lost.
377 	 */
378 	tspace = PS;
379 	PS = SS;
380 	SS = tspace;
381 	SS.space = SS.back;
382 
383 	/* Handle the 'p' flag. */
384 	if (cp->u.s->p)
385 		(void)printf("%s\n", ps);
386 
387 	/* Handle the 'w' flag. */
388 	if (cp->u.s->wfile && !pd) {
389 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
390 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
391 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
392 		iov[0].iov_base = ps;
393 		iov[0].iov_len = psl;
394 		if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
395 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
396 	}
397 	return (1);
398 }
399 
400 /*
401  * Flush append requests.  Always called before reading a line,
402  * therefore it also resets the substitution done (sdone) flag.
403  */
404 static void
405 flush_appends()
406 {
407 	FILE *f;
408 	int count, i;
409 	char buf[8 * 1024];
410 
411 	for (i = 0; i < appendx; i++)
412 		switch (appends[i].type) {
413 		case AP_STRING:
414 			(void)printf("%s", appends[i].s);
415 			break;
416 		case AP_FILE:
417 			/*
418 			 * Read files probably shouldn't be cached.  Since
419 			 * it's not an error to read a non-existent file,
420 			 * it's possible that another program is interacting
421 			 * with the sed script through the file system.  It
422 			 * would be truly bizarre, but possible.  It's probably
423 			 * not that big a performance win, anyhow.
424 			 */
425 			if ((f = fopen(appends[i].s, "r")) == NULL)
426 				break;
427 			while (count = fread(buf, 1, sizeof(buf), f))
428 				(void)fwrite(buf, 1, count, stdout);
429 			(void)fclose(f);
430 			break;
431 		}
432 	if (ferror(stdout))
433 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
434 	appendx = 0;
435 	sdone = 0;
436 }
437 
438 static void
439 lputs(s)
440 	register char *s;
441 {
442 	register int count;
443 	register char *escapes, *p;
444 	struct winsize win;
445 	static int termwidth = -1;
446 
447 	if (termwidth == -1)
448 		if (p = getenv("COLUMNS"))
449 			termwidth = atoi(p);
450 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
451 		    win.ws_col > 0)
452 			termwidth = win.ws_col;
453 		else
454 			termwidth = 60;
455 
456 	for (count = 0; *s; ++s) {
457 		if (count >= termwidth) {
458 			(void)printf("\\\n");
459 			count = 0;
460 		}
461 		if (isascii(*s) && isprint(*s) && *s != '\\') {
462 			(void)putchar(*s);
463 			count++;
464 		} else {
465 			escapes = "\\\a\b\f\n\r\t\v";
466 			(void)putchar('\\');
467 			if (p = strchr(escapes, *s)) {
468 				(void)putchar("\\abfnrtv"[p - escapes]);
469 				count += 2;
470 			} else {
471 				(void)printf("%03o", (u_char)*s);
472 				count += 4;
473 			}
474 		}
475 	}
476 	(void)putchar('$');
477 	(void)putchar('\n');
478 	if (ferror(stdout))
479 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
480 }
481 
482 /*
483  * Regexec with checking for errors
484  */
485 static int
486 regexec_check(preg, string, nmatch, pmatch, eflags)
487 	regex_t *preg;
488 	const char *string;
489 	int nmatch;
490 	regmatch_t pmatch[];
491 	int eflags;
492 {
493 	int eval;
494 
495 	switch (eval = regexec(preg, string, nmatch, pmatch, eflags)) {
496 	case 0:
497 		return (0);
498 	case REG_NOMATCH:
499 		return (REG_NOMATCH);
500 	default:
501 		err(FATAL, "RE error: %s", strregerror(eval, preg));
502 	}
503 	/* NOTREACHED */
504 }
505 
506 /*
507  * regsub - perform substitutions after a regexp match
508  * Based on a routine by Henry Spencer
509  */
510 static void
511 regsub(pmatch, string, src, sp)
512 	regmatch_t *pmatch;
513 	char *string, *src;
514 	SPACE *sp;
515 {
516 	register int len, no;
517 	register char c, *dst;
518 
519 #define	NEEDSP(reqlen)							\
520 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
521 		sp->blen += (reqlen) + 1024;				\
522 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
523 		dst = sp->space + sp->len;				\
524 	}
525 
526 	dst = sp->space + sp->len;
527 	while ((c = *src++) != '\0') {
528 		if (c == '&')
529 			no = 0;
530 		else if (c == '\\' && isdigit(*src))
531 			no = *src++ - '0';
532 		else
533 			no = -1;
534 		if (no < 0) {		/* Ordinary character. */
535  			if (c == '\\' && (*src == '\\' || *src == '&'))
536  				c = *src++;
537 			NEEDSP(1);
538  			*dst++ = c;
539 			++sp->len;
540  		} else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) {
541 			len = pmatch[no].rm_eo - pmatch[no].rm_so;
542 			NEEDSP(len);
543 			memmove(dst, string + pmatch[no].rm_so, len);
544 			dst += len;
545 			sp->len += len;
546 		}
547 	}
548 	NEEDSP(1);
549 	*dst = '\0';
550 }
551 
552 /*
553  * aspace --
554  *	Append the source space to the destination space, allocating new
555  *	space as necessary.
556  */
557 static void
558 cspace(sp, p, len, append)
559 	SPACE *sp;
560 	char *p;
561 	size_t len;
562 	int append;
563 {
564 	size_t tlen;
565 	int needcopy;
566 
567 	/* Current pointer may point to something else at the moment. */
568 	needcopy = sp->space != sp->back;
569 
570 	/*
571 	 * Make sure SPACE has enough memory and ramp up quickly.
572 	 * Add in two extra bytes, one for the newline, one for a
573 	 * terminating NULL.
574 	 */
575 	tlen = sp->len + len + 2;
576 	if (tlen > sp->blen) {
577 		sp->blen = tlen + 1024;
578 		sp->back = xrealloc(sp->back, sp->blen);
579 	}
580 
581 	if (needcopy)
582 		memmove(sp->back, sp->space, sp->len + 1);
583 	sp->space = sp->back;
584 
585 	/* May just be copying out of a stdio buffer. */
586 	if (len == NULL)
587 		return;
588 
589 	/* Append a separating newline. */
590 	if (append)
591 		sp->space[sp->len++] = '\n';
592 
593 	/* Append the new stuff, plus its terminating NULL. */
594 	memmove(sp->space + sp->len, p, len + 1);
595 	sp->len += len;
596 }
597 
598 /*
599  * Close all cached opened files and report any errors
600  */
601 void
602 cfclose(cp)
603 	register struct s_command *cp;
604 {
605 
606 	for (; cp != NULL; cp = cp->next)
607 		switch(cp->code) {
608 		case 's':
609 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
610 				err(FATAL,
611 				    "%s: %s", cp->u.s->wfile, strerror(errno));
612 			break;
613 		case 'w':
614 			if (cp->u.fd != -1 && close(cp->u.fd))
615 				err(FATAL, "%s: %s", cp->t, strerror(errno));
616 			break;
617 		case '{':
618 			cfclose(cp->u.c);
619 			break;
620 		}
621 }
622