xref: /openbsd-src/usr.bin/sed/process.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: process.c,v 1.33 2017/12/13 16:06:34 millert Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Diomidis Spinellis.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis of Imperial College, University of London.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 
40 #include <ctype.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <regex.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "defs.h"
51 #include "extern.h"
52 
53 static SPACE HS, PS, SS;
54 #define	pd		PS.deleted
55 #define	ps		PS.space
56 #define	psl		PS.len
57 #define	psanl		PS.append_newline
58 #define	hs		HS.space
59 #define	hsl		HS.len
60 
61 static inline int	 applies(struct s_command *);
62 static void		 flush_appends(void);
63 static void		 lputs(char *);
64 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65 			     size_t);
66 static void		 regsub(SPACE *, char *, char *);
67 static int		 substitute(struct s_command *);
68 
69 struct s_appends *appends;	/* Array of pointers to strings to append. */
70 static size_t appendx;		/* Index into appends array. */
71 size_t appendnum;		/* Size of appends array. */
72 
73 static int lastaddr;		/* Set by applies if last address of a range. */
74 static int sdone;		/* If any substitutes since last line input. */
75 				/* Iov structure for 'w' commands. */
76 static regex_t *defpreg;
77 size_t maxnsub;
78 regmatch_t *match;
79 
80 #define OUT() do {\
81 	fwrite(ps, 1, psl, outfile);\
82 	if (psanl) fputc('\n', outfile);\
83 } while (0)
84 
85 void
86 process(void)
87 {
88 	struct s_command *cp;
89 	SPACE tspace;
90 	size_t len, oldpsl;
91 	char *p;
92 
93 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
94 		pd = 0;
95 top:
96 		cp = prog;
97 redirect:
98 		while (cp != NULL) {
99 			if (!applies(cp)) {
100 				cp = cp->next;
101 				continue;
102 			}
103 			switch (cp->code) {
104 			case '{':
105 				cp = cp->u.c;
106 				goto redirect;
107 			case 'a':
108 				if (appendx >= appendnum) {
109 					appends = xreallocarray(appends,
110 					    appendnum,
111 					    2 * sizeof(struct s_appends));
112 					appendnum *= 2;
113 				}
114 				appends[appendx].type = AP_STRING;
115 				appends[appendx].s = cp->t;
116 				appends[appendx].len = strlen(cp->t);
117 				appendx++;
118 				break;
119 			case 'b':
120 				cp = cp->u.c;
121 				goto redirect;
122 			case 'c':
123 				pd = 1;
124 				psl = 0;
125 				if (cp->a2 == NULL || lastaddr || lastline())
126 					(void)fprintf(outfile, "%s", cp->t);
127 				break;
128 			case 'd':
129 				pd = 1;
130 				goto new;
131 			case 'D':
132 				if (pd)
133 					goto new;
134 				if (psl == 0 ||
135 				    (p = memchr(ps, '\n', psl)) == NULL) {
136 					pd = 1;
137 					goto new;
138 				} else {
139 					psl -= (p + 1) - ps;
140 					memmove(ps, p + 1, psl);
141 					goto top;
142 				}
143 			case 'g':
144 				cspace(&PS, hs, hsl, REPLACE);
145 				break;
146 			case 'G':
147 				cspace(&PS, "\n", 1, 0);
148 				cspace(&PS, hs, hsl, 0);
149 				break;
150 			case 'h':
151 				cspace(&HS, ps, psl, REPLACE);
152 				break;
153 			case 'H':
154 				cspace(&HS, "\n", 1, 0);
155 				cspace(&HS, ps, psl, 0);
156 				break;
157 			case 'i':
158 				(void)fprintf(outfile, "%s", cp->t);
159 				break;
160 			case 'l':
161 				lputs(ps);
162 				break;
163 			case 'n':
164 				if (!nflag && !pd)
165 					OUT();
166 				flush_appends();
167 				if (!mf_fgets(&PS, REPLACE))
168 					exit(0);
169 				pd = 0;
170 				break;
171 			case 'N':
172 				flush_appends();
173 				cspace(&PS, "\n", 1, 0);
174 				if (!mf_fgets(&PS, 0))
175 					exit(0);
176 				break;
177 			case 'p':
178 				if (pd)
179 					break;
180 				OUT();
181 				break;
182 			case 'P':
183 				if (pd)
184 					break;
185 				if ((p = memchr(ps, '\n', psl)) != NULL) {
186 					oldpsl = psl;
187 					psl = p - ps;
188 					psanl = 1;
189 					OUT();
190 					psl = oldpsl;
191 				} else {
192 					OUT();
193 				}
194 				break;
195 			case 'q':
196 				if (!nflag && !pd)
197 					OUT();
198 				flush_appends();
199 				exit(0);
200 			case 'r':
201 				if (appendx >= appendnum) {
202 					appends = xreallocarray(appends,
203 					    appendnum,
204 					    2 * sizeof(struct s_appends));
205 					appendnum *= 2;
206 				}
207 				appends[appendx].type = AP_FILE;
208 				appends[appendx].s = cp->t;
209 				appends[appendx].len = strlen(cp->t);
210 				appendx++;
211 				break;
212 			case 's':
213 				sdone |= substitute(cp);
214 				break;
215 			case 't':
216 				if (sdone) {
217 					sdone = 0;
218 					cp = cp->u.c;
219 					goto redirect;
220 				}
221 				break;
222 			case 'w':
223 				if (pd)
224 					break;
225 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
226 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
227 				    DEFFILEMODE)) == -1)
228 					error(FATAL, "%s: %s",
229 					    cp->t, strerror(errno));
230 				if ((size_t)write(cp->u.fd, ps, psl) != psl ||
231 				    write(cp->u.fd, "\n", 1) != 1)
232 					error(FATAL, "%s: %s",
233 					    cp->t, strerror(errno));
234 				break;
235 			case 'x':
236 				if (hs == NULL)
237 					cspace(&HS, "", 0, REPLACE);
238 				tspace = PS;
239 				PS = HS;
240 				psanl = tspace.append_newline;
241 				HS = tspace;
242 				break;
243 			case 'y':
244 				if (pd || psl == 0)
245 					break;
246 				for (p = ps, len = psl; len--; ++p)
247 					*p = cp->u.y[(unsigned char)*p];
248 				break;
249 			case ':':
250 			case '}':
251 				break;
252 			case '=':
253 				(void)fprintf(outfile, "%lu\n", linenum);
254 			}
255 			cp = cp->next;
256 		} /* for all cp */
257 
258 new:		if (!nflag && !pd)
259 			OUT();
260 		flush_appends();
261 	} /* for all lines */
262 }
263 
264 /*
265  * TRUE if the address passed matches the current program state
266  * (lastline, linenumber, ps).
267  */
268 #define	MATCH(a)						\
269 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
270 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
271 
272 /*
273  * Return TRUE if the command applies to the current line.  Sets the inrange
274  * flag to process ranges.  Interprets the non-select (``!'') flag.
275  */
276 static inline int
277 applies(struct s_command *cp)
278 {
279 	int r;
280 
281 	lastaddr = 0;
282 	if (cp->a1 == NULL && cp->a2 == NULL)
283 		r = 1;
284 	else if (cp->a2)
285 		if (cp->inrange) {
286 			if (MATCH(cp->a2)) {
287 				cp->inrange = 0;
288 				lastaddr = 1;
289 			}
290 			r = 1;
291 		} else if (MATCH(cp->a1)) {
292 			/*
293 			 * If the second address is a number less than or
294 			 * equal to the line number first selected, only
295 			 * one line shall be selected.
296 			 *	-- POSIX 1003.2
297 			 */
298 			if (cp->a2->type == AT_LINE &&
299 			    linenum >= cp->a2->u.l)
300 				lastaddr = 1;
301 			else
302 				cp->inrange = 1;
303 			r = 1;
304 		} else
305 			r = 0;
306 	else
307 		r = MATCH(cp->a1);
308 	return (cp->nonsel ? !r : r);
309 }
310 
311 /*
312  * Reset all inrange markers.
313  */
314 void
315 resetranges(void)
316 {
317 	struct s_command *cp;
318 
319 	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
320 		if (cp->a2)
321 			cp->inrange = 0;
322 }
323 
324 /*
325  * substitute --
326  *	Do substitutions in the pattern space.  Currently, we build a
327  *	copy of the new pattern space in the substitute space structure
328  *	and then swap them.
329  */
330 static int
331 substitute(struct s_command *cp)
332 {
333 	SPACE tspace;
334 	regex_t *re;
335 	regoff_t slen;
336 	int n, lastempty;
337 	regoff_t le = 0;
338 	char *s;
339 
340 	s = ps;
341 	re = cp->u.s->re;
342 	if (re == NULL) {
343 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
344 			linenum = cp->u.s->linenum;
345 			error(COMPILE, "\\%d not defined in the RE",
346 			    cp->u.s->maxbref);
347 		}
348 	}
349 	if (!regexec_e(re, ps, 0, 0, 0, psl))
350 		return (0);
351 
352 	SS.len = 0;				/* Clean substitute space. */
353 	slen = psl;
354 	n = cp->u.s->n;
355 	lastempty = 1;
356 
357 	do {
358 		/* Copy the leading retained string. */
359 		if (n <= 1 && (match[0].rm_so > le))
360 			cspace(&SS, s, match[0].rm_so - le, APPEND);
361 
362 		/* Skip zero-length matches right after other matches. */
363 		if (lastempty || (match[0].rm_so - le) ||
364 		    match[0].rm_so != match[0].rm_eo) {
365 			if (n <= 1) {
366 				/* Want this match: append replacement. */
367 				regsub(&SS, ps, cp->u.s->new);
368 				if (n == 1)
369 					n = -1;
370 			} else {
371 				/* Want a later match: append original. */
372 				if (match[0].rm_eo - le)
373 					cspace(&SS, s, match[0].rm_eo - le,
374 					    APPEND);
375 				n--;
376 			}
377 		}
378 
379 		/* Move past this match. */
380 		s = ps + match[0].rm_eo;
381 		slen = psl - match[0].rm_eo;
382 		le = match[0].rm_eo;
383 
384 		/*
385 		 * After a zero-length match, advance one byte,
386 		 * and at the end of the line, terminate.
387 		 */
388 		if (match[0].rm_so == match[0].rm_eo) {
389 			if (*s == '\0' || *s == '\n')
390 				slen = -1;
391 			else
392 				slen--;
393 			if (*s != '\0') {
394 				cspace(&SS, s++, 1, APPEND);
395 				le++;
396 			}
397 			lastempty = 1;
398 		} else
399 			lastempty = 0;
400 
401 	} while (n >= 0 && slen >= 0 &&
402 	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
403 
404 	/* Did not find the requested number of matches. */
405 	if (n > 0)
406 		return (0);
407 
408 	/* Copy the trailing retained string. */
409 	if (slen > 0)
410 		cspace(&SS, s, slen, APPEND);
411 
412 	/*
413 	 * Swap the substitute space and the pattern space, and make sure
414 	 * that any leftover pointers into stdio memory get lost.
415 	 */
416 	tspace = PS;
417 	PS = SS;
418 	psanl = tspace.append_newline;
419 	SS = tspace;
420 	SS.space = SS.back;
421 
422 	/* Handle the 'p' flag. */
423 	if (cp->u.s->p)
424 		OUT();
425 
426 	/* Handle the 'w' flag. */
427 	if (cp->u.s->wfile && !pd) {
428 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
429 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
430 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
431 		if ((size_t)write(cp->u.s->wfd, ps, psl) != psl ||
432 		    write(cp->u.s->wfd, "\n", 1) != 1)
433 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
434 	}
435 	return (1);
436 }
437 
438 /*
439  * Flush append requests.  Always called before reading a line,
440  * therefore it also resets the substitution done (sdone) flag.
441  */
442 static void
443 flush_appends(void)
444 {
445 	FILE *f;
446 	size_t count, idx;
447 	char buf[8 * 1024];
448 
449 	for (idx = 0; idx < appendx; idx++)
450 		switch (appends[idx].type) {
451 		case AP_STRING:
452 			fwrite(appends[idx].s, sizeof(char), appends[idx].len,
453 			    outfile);
454 			break;
455 		case AP_FILE:
456 			/*
457 			 * Read files probably shouldn't be cached.  Since
458 			 * it's not an error to read a non-existent file,
459 			 * it's possible that another program is interacting
460 			 * with the sed script through the file system.  It
461 			 * would be truly bizarre, but possible.  It's probably
462 			 * not that big a performance win, anyhow.
463 			 */
464 			if ((f = fopen(appends[idx].s, "r")) == NULL)
465 				break;
466 			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
467 				(void)fwrite(buf, sizeof(char), count, outfile);
468 			(void)fclose(f);
469 			break;
470 		}
471 	if (ferror(outfile))
472 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
473 	appendx = sdone = 0;
474 }
475 
476 static void
477 lputs(char *s)
478 {
479 	int count;
480 	extern int termwidth;
481 	const char *escapes;
482 	char *p;
483 
484 	for (count = 0; *s; ++s) {
485 		if (count >= termwidth) {
486 			(void)fprintf(outfile, "\\\n");
487 			count = 0;
488 		}
489 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
490 		    && *s != '\\') {
491 			(void)fputc(*s, outfile);
492 			count++;
493 		} else if (*s == '\n') {
494 			(void)fputc('$', outfile);
495 			(void)fputc('\n', outfile);
496 			count = 0;
497 		} else {
498 			escapes = "\\\a\b\f\r\t\v";
499 			(void)fputc('\\', outfile);
500 			if ((p = strchr(escapes, *s))) {
501 				(void)fputc("\\abfrtv"[p - escapes], outfile);
502 				count += 2;
503 			} else {
504 				(void)fprintf(outfile, "%03o", *(u_char *)s);
505 				count += 4;
506 			}
507 		}
508 	}
509 	(void)fputc('$', outfile);
510 	(void)fputc('\n', outfile);
511 	if (ferror(outfile))
512 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
513 }
514 
515 static inline int
516 regexec_e(regex_t *preg, const char *string, int eflags,
517     int nomatch, size_t start, size_t stop)
518 {
519 	int eval;
520 
521 	if (preg == NULL) {
522 		if (defpreg == NULL)
523 			error(FATAL, "first RE may not be empty");
524 	} else
525 		defpreg = preg;
526 
527 	/* Set anchors */
528 	match[0].rm_so = start;
529 	match[0].rm_eo = stop;
530 
531 	eval = regexec(defpreg, string,
532 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
533 	switch (eval) {
534 	case 0:
535 		return (1);
536 	case REG_NOMATCH:
537 		return (0);
538 	}
539 	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
540 }
541 
542 /*
543  * regsub - perform substitutions after a regexp match
544  * Based on a routine by Henry Spencer
545  */
546 static void
547 regsub(SPACE *sp, char *string, char *src)
548 {
549 	int len, no;
550 	char c, *dst;
551 
552 #define	NEEDSP(reqlen)							\
553 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
554 		size_t newlen = sp->blen + (reqlen) + 1024;		\
555 		sp->space = sp->back = xrealloc(sp->back, newlen);	\
556 		sp->blen = newlen;					\
557 		dst = sp->space + sp->len;				\
558 	}
559 
560 	dst = sp->space + sp->len;
561 	while ((c = *src++) != '\0') {
562 		if (c == '&')
563 			no = 0;
564 		else if (c == '\\' && isdigit((unsigned char)*src))
565 			no = *src++ - '0';
566 		else
567 			no = -1;
568 		if (no < 0) {		/* Ordinary character. */
569 			if (c == '\\' && (*src == '\\' || *src == '&'))
570 				c = *src++;
571 			NEEDSP(1);
572 			*dst++ = c;
573 			++sp->len;
574 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
575 			len = match[no].rm_eo - match[no].rm_so;
576 			NEEDSP(len);
577 			memmove(dst, string + match[no].rm_so, len);
578 			dst += len;
579 			sp->len += len;
580 		}
581 	}
582 	NEEDSP(1);
583 	*dst = '\0';
584 }
585 
586 /*
587  * aspace --
588  *	Append the source space to the destination space, allocating new
589  *	space as necessary.
590  */
591 void
592 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
593 {
594 	size_t tlen;
595 
596 	/* Make sure SPACE has enough memory and ramp up quickly. */
597 	tlen = sp->len + len + 1;
598 	if (tlen > sp->blen) {
599 		size_t newlen = tlen + 1024;
600 		sp->space = sp->back = xrealloc(sp->back, newlen);
601 		sp->blen = newlen;
602 	}
603 
604 	if (spflag == REPLACE)
605 		sp->len = 0;
606 
607 	memmove(sp->space + sp->len, p, len);
608 
609 	sp->space[sp->len += len] = '\0';
610 }
611 
612 /*
613  * Close all cached opened files and report any errors
614  */
615 void
616 cfclose(struct s_command *cp, struct s_command *end)
617 {
618 
619 	for (; cp != end; cp = cp->next)
620 		switch (cp->code) {
621 		case 's':
622 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
623 				error(FATAL,
624 				    "%s: %s", cp->u.s->wfile, strerror(errno));
625 			cp->u.s->wfd = -1;
626 			break;
627 		case 'w':
628 			if (cp->u.fd != -1 && close(cp->u.fd))
629 				error(FATAL, "%s: %s", cp->t, strerror(errno));
630 			cp->u.fd = -1;
631 			break;
632 		case '{':
633 			cfclose(cp->u.c, cp->next);
634 			break;
635 		}
636 }
637