xref: /csrg-svn/usr.bin/sed/process.c (revision 56005)
1 /*-
2  * Copyright (c) 1992 Diomidis Spinellis.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Diomidis Spinellis of Imperial College, University of London.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c	5.2 (Berkeley) 08/24/92";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20 
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "defs.h"
32 #include "extern.h"
33 
34 typedef struct {
35 	char *space;		/* Current space pointer. */
36 	size_t len;		/* Current length. */
37 	int deleted;		/* If deleted. */
38 	char *back;		/* Backing memory. */
39 	size_t blen;		/* Backing memory length. */
40 } SPACE;
41 static SPACE HS, PS, SS;
42 #define	pd		PS.deleted
43 #define	ps		PS.space
44 #define	psl		PS.len
45 #define	hs		HS.space
46 #define	hsl		HS.len
47 
48 static inline int	 applies __P((struct s_command *));
49 static void		 cspace __P((SPACE *, char *, size_t, int));
50 static void		 flush_appends __P((void));
51 static void		 lputs __P((char *));
52 static inline int	 match __P((struct s_addr *));
53 static int		 regexec_check __P((regex_t *, const char *,
54 			    int, regmatch_t[], int));
55 static void		 regsub __P((regmatch_t *, char *, char *, SPACE *));
56 static int		 substitute __P((struct s_command *));
57 
58 struct s_appends *appends;	/* Array of pointers to strings to append. */
59 static int appendx;		/* Index into appends array. */
60 int appendnum;			/* Size of appends array. */
61 
62 static int lastaddr;		/* Set by applies if last address of a range. */
63 static int sdone;		/* If any substitutes since last line input. */
64 				/* Iov structure for 'w' commands. */
65 static struct iovec iov[2] = { NULL, 0, "\n", 1 };
66 
67 void
68 process()
69 {
70 	struct s_command *cp;
71 	SPACE tspace;
72 	size_t len;
73 	char oldc, *p;
74 
75 	for (linenum = 0; ps = mf_fgets(&psl);) {
76 		pd = 0;
77 		cp = prog;
78 redirect:
79 		while (cp != NULL) {
80 			if (!applies(cp)) {
81 				cp = cp->next;
82 				continue;
83 			}
84 			switch (cp->code) {
85 			case '{':
86 				cp = cp->u.c;
87 				goto redirect;
88 			case 'a':
89 				if (appendx >= appendnum)
90 					appends = xrealloc(appends,
91 					    sizeof(struct s_appends) *
92 					    (appendnum *= 2));
93 				appends[appendx].type = AP_STRING;
94 				appends[appendx].s = cp->t;
95 				appendx++;
96 				break;
97 			case 'b':
98 				cp = cp->u.c;
99 				goto redirect;
100 			case 'c':
101 				pd = 1;
102 				psl = 0;
103 				if (cp->a2 == NULL || lastaddr)
104 					(void)printf("%s", cp->t);
105 				break;
106 			case 'd':
107 				if (pd)
108 					goto new;
109 				pd = 1;
110 				goto new;
111 			case 'D':
112 				if (pd)
113 					goto new;
114 				if ((p = strchr(ps, '\n')) == NULL)
115 					pd = 1;
116 				else {
117 					psl -= (p - ps) - 1;
118 					memmove(ps, p + 1, psl);
119 				}
120 				goto new;
121 			case 'g':
122 				ps = hs;
123 				psl = hsl;
124 				break;
125 			case 'G':
126 				cspace(&PS, hs, hsl, 1);
127 				break;
128 			case 'h':
129 				cspace(&HS, ps, psl, 0);
130 				break;
131 			case 'H':
132 				cspace(&HS, ps, psl, 1);
133 				break;
134 			case 'i':
135 				(void)printf("%s", cp->t);
136 				break;
137 			case 'l':
138 				lputs(ps);
139 				break;
140 			case 'n':
141 				if (!nflag && !pd)
142 					(void)printf("%s\n", ps);
143 				flush_appends();
144 				ps = mf_fgets(&psl);
145 #ifdef HISTORIC_PRACTICE
146 				if (ps == NULL)
147 					exit(0);
148 #endif
149 				pd = 0;
150 				break;
151 			case 'N':
152 				flush_appends();
153 				if (ps != PS.back)
154 					cspace(&PS, NULL, 0, 0);
155 				if ((p = mf_fgets(&len)) == NULL) {
156 					if (!nflag && !pd)
157 						(void)printf("%s\n", ps);
158 					exit(0);
159 				}
160 				cspace(&PS, p, len, 1);
161 				break;
162 			case 'p':
163 				if (pd)
164 					break;
165 				(void)printf("%s\n", ps);
166 				break;
167 			case 'P':
168 				if (pd)
169 					break;
170 				if ((p = strchr(ps, '\n')) != NULL) {
171 					oldc = *p;
172 					*p = '\0';
173 				}
174 				(void)printf("%s\n", ps);
175 				if (p != NULL)
176 					*p = oldc;
177 				break;
178 			case 'q':
179 				if (!nflag && !pd)
180 					(void)printf("%s\n", ps);
181 				flush_appends();
182 				exit(0);
183 			case 'r':
184 				if (appendx >= appendnum)
185 					appends = xrealloc(appends,
186 					    sizeof(struct s_appends) *
187 					    (appendnum *= 2));
188 				appends[appendx].type = AP_FILE;
189 				appends[appendx].s = cp->t;
190 				appendx++;
191 				break;
192 			case 's':
193 				sdone = substitute(cp);
194 				break;
195 			case 't':
196 				if (sdone) {
197 					sdone = 0;
198 					cp = cp->u.c;
199 					goto redirect;
200 				}
201 				break;
202 			case 'w':
203 				if (pd)
204 					break;
205 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
206 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
207 				    DEFFILEMODE)) == -1)
208 					err(FATAL, "%s: %s\n",
209 					    cp->t, strerror(errno));
210 				iov[0].iov_base = ps;
211 				iov[0].iov_len = psl;
212 				if (writev(cp->u.fd, iov, 2) != psl + 1)
213 					err(FATAL, "%s: %s\n",
214 					    cp->t, strerror(errno));
215 				break;
216 			case 'x':
217 				tspace = PS;
218 				PS = HS;
219 				HS = tspace;
220 				break;
221 			case 'y':
222 				if (pd)
223 					break;
224 				for (p = ps, len = psl; len--; ++p)
225 					*p = cp->u.y[*p];
226 				break;
227 			case ':':
228 			case '}':
229 				break;
230 			case '=':
231 				(void)printf("%lu\n", linenum);
232 			}
233 			cp = cp->next;
234 		} /* for all cp */
235 
236 new:		if (!nflag && !pd)
237 			(void)printf("%s\n", ps);
238 		flush_appends();
239 	} /* for all lines */
240 }
241 
242 /*
243  * Return TRUE if the command applies to the current line.  Sets the inrange
244  * flag to process ranges.  Interprets the non-select (``!'') flag.
245  */
246 static inline int
247 applies(cp)
248 	struct s_command *cp;
249 {
250 	int r;
251 
252 	lastaddr = 0;
253 	if (cp->a1 == NULL && cp->a2 == NULL)
254 		r = 1;
255 	else if (cp->a2)
256 		if (cp->inrange) {
257 			if (match(cp->a2)) {
258 				cp->inrange = 0;
259 				lastaddr = 1;
260 			}
261 			r = 1;
262 		} else if (match(cp->a1)) {
263 			/*
264 			 * If the second address is a number less than or
265 			 * equal to the line number first selected, only
266 			 * one line shall be selected.
267 			 *	-- POSIX 1003.2
268 			 */
269 			if (cp->a2->type == AT_LINE &&
270 			    linenum >= cp->a2->u.l)
271 				lastaddr = 1;
272 			else
273 				cp->inrange = 1;
274 			r = 1;
275 		} else
276 			r = 0;
277 	else
278 		r = match(cp->a1);
279 	return (cp->nonsel ? ! r : r);
280 }
281 
282 /*
283  * Return TRUE if the address passed matches the current program
284  * state (linenumber, ps, lastline)
285  */
286 static int inline
287 match(a)
288 	struct s_addr *a;
289 {
290 	int eval;
291 
292 	switch (a->type) {
293 	case AT_RE:
294 		switch (eval = regexec(a->u.r, ps, 0, NULL, 0)) {
295 		case 0:
296 			return (1);
297 		case REG_NOMATCH:
298 			return (0);
299 		default:
300 			err(FATAL, "RE error: %s", strregerror(eval, a->u.r));
301 		}
302 	case AT_LINE:
303 		return (linenum == a->u.l);
304 	case AT_LAST:
305 		return (lastline);
306 	}
307 	/* NOTREACHED */
308 }
309 
310 /*
311  * substitute --
312  *	Do substitutions in the pattern space.  Currently, we build a
313  *	copy of the new pattern space in the substitute space structure
314  *	and then swap them.
315  */
316 static int
317 substitute(cp)
318 	struct s_command *cp;
319 {
320 	SPACE tspace;
321 	static regex_t *re;
322 	int n, re_off;
323 	char *endp, *s;
324 
325 	s = ps;
326 	re = &cp->u.s->re;
327 	if (regexec_check(re,
328 	    s, re->re_nsub + 1, cp->u.s->pmatch, 0) == REG_NOMATCH)
329 		return (0);
330 
331 	SS.len = 0;				/* Clean substitute space. */
332 	n = cp->u.s->n;
333 	switch (n) {
334 	case 0:					/* Global */
335 		do {
336 			/* Locate start of replaced string. */
337 			re_off = cp->u.s->pmatch[0].rm_so;
338 			/* Locate end of replaced string + 1. */
339 			endp = s + cp->u.s->pmatch[0].rm_eo;
340 			/* Copy leading retained string. */
341 			cspace(&SS, s, re_off, 0);
342 			/* Add in regular expression. */
343 			regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
344 			/* Move past this match. */
345 			s += cp->u.s->pmatch[0].rm_eo;
346 		} while(regexec_check(re, s, re->re_nsub + 1,
347 		    cp->u.s->pmatch, REG_NOTBOL) != REG_NOMATCH);
348 		/* Copy trailing retained string. */
349 		cspace(&SS, s, strlen(s), 0);
350 		break;
351 	default:				/* Nth occurrence */
352 		while (--n) {
353 			s += cp->u.s->pmatch[0].rm_eo;
354 			if (regexec_check(re, s, re->re_nsub + 1,
355 			    cp->u.s->pmatch, REG_NOTBOL) == REG_NOMATCH)
356 				return (0);
357 		}
358 		/* FALLTHROUGH */
359 	case 1:					/* 1st occurrence */
360 		/* Locate start of replaced string. */
361 		re_off = cp->u.s->pmatch[0].rm_so + s - ps;
362 		/* Copy leading retained string. */
363 		cspace(&SS, ps, re_off, 0);
364 		/* Add in regular expression. */
365 		regsub(cp->u.s->pmatch, s, cp->u.s->new, &SS);
366 		/* Copy trailing retained string. */
367 		s += cp->u.s->pmatch[0].rm_eo;
368 		cspace(&SS, s, strlen(s), 0);
369 		break;
370 	}
371 
372 	/*
373 	 * Swap the substitute space and the pattern space, and make sure
374 	 * that any leftover pointers into stdio memory get lost.
375 	 */
376 	tspace = PS;
377 	PS = SS;
378 	SS = tspace;
379 	SS.space = SS.back;
380 
381 	/* Handle the 'p' flag. */
382 	if (cp->u.s->p)
383 		(void)printf("%s\n", ps);
384 
385 	/* Handle the 'w' flag. */
386 	if (cp->u.s->wfile && !pd) {
387 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
388 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
389 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
390 		iov[0].iov_base = ps;
391 		iov[0].iov_len = psl;
392 		if (writev(cp->u.s->wfd, iov, 2) != psl + 1)
393 			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
394 	}
395 	return (1);
396 }
397 
398 /*
399  * Flush append requests.  Always called before reading a line,
400  * therefore it also resets the substitution done (sdone) flag.
401  */
402 static void
403 flush_appends()
404 {
405 	FILE *f;
406 	int count, i;
407 	char buf[8 * 1024];
408 
409 	for (i = 0; i < appendx; i++)
410 		switch (appends[i].type) {
411 		case AP_STRING:
412 			(void)printf("%s", appends[i].s);
413 			break;
414 		case AP_FILE:
415 			/*
416 			 * Read files probably shouldn't be cached.  Since
417 			 * it's not an error to read a non-existent file,
418 			 * it's possible that another program is interacting
419 			 * with the sed script through the file system.  It
420 			 * would be truly bizarre, but possible.  It's probably
421 			 * not that big a performance win, anyhow.
422 			 */
423 			if ((f = fopen(appends[i].s, "r")) == NULL)
424 				break;
425 			while (count = fread(buf, 1, sizeof(buf), f))
426 				(void)fwrite(buf, 1, count, stdout);
427 			(void)fclose(f);
428 			break;
429 		}
430 	if (ferror(stdout))
431 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
432 	appendx = 0;
433 	sdone = 0;
434 }
435 
436 static void
437 lputs(s)
438 	register char *s;
439 {
440 	register int count;
441 	register char *escapes, *p;
442 	struct winsize win;
443 	static int termwidth = -1;
444 
445 	if (termwidth == -1)
446 		if (p = getenv("COLUMNS"))
447 			termwidth = atoi(p);
448 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
449 		    win.ws_col > 0)
450 			termwidth = win.ws_col;
451 		else
452 			termwidth = 60;
453 
454 	for (count = 0; *s; ++s) {
455 		if (count >= termwidth) {
456 			(void)printf("\\\n");
457 			count = 0;
458 		}
459 		if (isascii(*s) && isprint(*s) && *s != '\\') {
460 			(void)putchar(*s);
461 			count++;
462 		} else {
463 			escapes = "\\\a\b\f\n\r\t\v";
464 			(void)putchar('\\');
465 			if (p = strchr(escapes, *s)) {
466 				(void)putchar("\\abfnrtv"[p - escapes]);
467 				count += 2;
468 			} else {
469 				(void)printf("%03o", (u_char)*s);
470 				count += 4;
471 			}
472 		}
473 	}
474 	(void)putchar('$');
475 	(void)putchar('\n');
476 	if (ferror(stdout))
477 		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
478 }
479 
480 /*
481  * Regexec with checking for errors
482  */
483 static int
484 regexec_check(preg, string, nmatch, pmatch, eflags)
485 	regex_t *preg;
486 	const char *string;
487 	int nmatch;
488 	regmatch_t pmatch[];
489 	int eflags;
490 {
491 	int eval;
492 
493 	switch (eval = regexec(preg, string, nmatch, pmatch, eflags)) {
494 	case 0:
495 		return (0);
496 	case REG_NOMATCH:
497 		return (REG_NOMATCH);
498 	default:
499 		err(FATAL, "RE error: %s", strregerror(eval, preg));
500 	}
501 	/* NOTREACHED */
502 }
503 
504 /*
505  * regsub - perform substitutions after a regexp match
506  * Based on a routine by Henry Spencer
507  */
508 static void
509 regsub(pmatch, string, src, sp)
510 	regmatch_t *pmatch;
511 	char *string, *src;
512 	SPACE *sp;
513 {
514 	register int len, no;
515 	register char c, *dst;
516 
517 #define	NEEDSP(reqlen)							\
518 	if (sp->len >= sp->blen - (reqlen) - 1) {			\
519 		sp->blen += (reqlen) + 1024;				\
520 		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
521 		dst = sp->space + sp->len;				\
522 	}
523 
524 	dst = sp->space + sp->len;
525 	while ((c = *src++) != '\0') {
526 		if (c == '&')
527 			no = 0;
528 		else if (c == '\\' && isdigit(*src))
529 			no = *src++ - '0';
530 		else
531 			no = -1;
532 		if (no < 0) {		/* Ordinary character. */
533  			if (c == '\\' && (*src == '\\' || *src == '&'))
534  				c = *src++;
535 			NEEDSP(1);
536  			*dst++ = c;
537 			++sp->len;
538  		} else if (pmatch[no].rm_so != -1 && pmatch[no].rm_eo != -1) {
539 			len = pmatch[no].rm_eo - pmatch[no].rm_so;
540 			NEEDSP(len);
541 			memmove(dst, string + pmatch[no].rm_so, len);
542 			dst += len;
543 			sp->len += len;
544 		}
545 	}
546 	NEEDSP(1);
547 	*dst = '\0';
548 }
549 
550 /*
551  * aspace --
552  *	Append the source space to the destination space, allocating new
553  *	space as necessary.
554  */
555 static void
556 cspace(sp, p, len, append)
557 	SPACE *sp;
558 	char *p;
559 	size_t len;
560 	int append;
561 {
562 	size_t tlen;
563 	int needcopy;
564 
565 	/* Current pointer may point to something else at the moment. */
566 	needcopy = sp->space != sp->back;
567 
568 	/*
569 	 * Make sure SPACE has enough memory and ramp up quickly.
570 	 * Add in two extra bytes, one for the newline, one for a
571 	 * terminating NULL.
572 	 */
573 	tlen = sp->len + len + 2;
574 	if (tlen > sp->blen) {
575 		sp->blen = tlen + 1024;
576 		sp->back = xrealloc(sp->back, sp->blen);
577 	}
578 
579 	if (needcopy)
580 		memmove(sp->back, sp->space, sp->len + 1);
581 	sp->space = sp->back;
582 
583 	/* May just be copying out of a stdio buffer. */
584 	if (len == NULL)
585 		return;
586 
587 	/* Append a separating newline. */
588 	if (append)
589 		sp->space[sp->len++] = '\n';
590 
591 	/* Append the new stuff, plus its terminating NULL. */
592 	memmove(sp->space + sp->len, p, len + 1);
593 	sp->len += len;
594 }
595 
596 /*
597  * Close all cached opened files and report any errors
598  */
599 void
600 cfclose(cp)
601 	register struct s_command *cp;
602 {
603 
604 	for (; cp != NULL; cp = cp->next)
605 		switch(cp->code) {
606 		case 's':
607 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
608 				err(FATAL,
609 				    "%s: %s", cp->u.s->wfile, strerror(errno));
610 			break;
611 		case 'w':
612 			if (cp->u.fd != -1 && close(cp->u.fd))
613 				err(FATAL, "%s: %s", cp->t, strerror(errno));
614 			break;
615 		case '{':
616 			cfclose(cp->u.c);
617 			break;
618 		}
619 }
620