xref: /openbsd-src/usr.bin/sed/process.c (revision fb8aa7497fded39583f40e800732f9c046411717)
1 /*	$OpenBSD: process.c,v 1.28 2016/05/30 18:10:29 martijn Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Diomidis Spinellis.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis of Imperial College, University of London.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/uio.h>
39 
40 #include <ctype.h>
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <limits.h>
44 #include <regex.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 
50 #include "defs.h"
51 #include "extern.h"
52 
53 static SPACE HS, PS, SS;
54 #define	pd		PS.deleted
55 #define	ps		PS.space
56 #define	psl		PS.len
57 #define	psanl		PS.append_newline
58 #define	hs		HS.space
59 #define	hsl		HS.len
60 
61 static inline int	 applies(struct s_command *);
62 static void		 flush_appends(void);
63 static void		 lputs(char *);
64 static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65 			     size_t);
66 static void		 regsub(SPACE *, char *, char *);
67 static int		 substitute(struct s_command *);
68 
69 struct s_appends *appends;	/* Array of pointers to strings to append. */
70 static int appendx;		/* Index into appends array. */
71 size_t appendnum;		/* Size of appends array. */
72 
73 static int lastaddr;		/* Set by applies if last address of a range. */
74 static int sdone;		/* If any substitutes since last line input. */
75 				/* Iov structure for 'w' commands. */
76 static regex_t *defpreg;
77 size_t maxnsub;
78 regmatch_t *match;
79 
80 #define OUT() do {\
81 	fwrite(ps, 1, psl, outfile);\
82 	if (psanl) fputc('\n', outfile);\
83 } while (0)
84 
85 void
86 process(void)
87 {
88 	struct s_command *cp;
89 	SPACE tspace;
90 	size_t len, oldpsl;
91 	char *p;
92 	int oldpsanl;
93 
94 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
95 		pd = 0;
96 top:
97 		cp = prog;
98 redirect:
99 		while (cp != NULL) {
100 			if (!applies(cp)) {
101 				cp = cp->next;
102 				continue;
103 			}
104 			switch (cp->code) {
105 			case '{':
106 				cp = cp->u.c;
107 				goto redirect;
108 			case 'a':
109 				if (appendx >= appendnum) {
110 					appends = xreallocarray(appends,
111 					    appendnum,
112 					    2 * sizeof(struct s_appends));
113 					appendnum *= 2;
114 				}
115 				appends[appendx].type = AP_STRING;
116 				appends[appendx].s = cp->t;
117 				appends[appendx].len = strlen(cp->t);
118 				appendx++;
119 				break;
120 			case 'b':
121 				cp = cp->u.c;
122 				goto redirect;
123 			case 'c':
124 				pd = 1;
125 				psl = 0;
126 				if (cp->a2 == NULL || lastaddr || lastline())
127 					(void)fprintf(outfile, "%s", cp->t);
128 				break;
129 			case 'd':
130 				pd = 1;
131 				goto new;
132 			case 'D':
133 				if (pd)
134 					goto new;
135 				if (psl == 0 ||
136 				    (p = memchr(ps, '\n', psl)) == NULL) {
137 					pd = 1;
138 					goto new;
139 				} else {
140 					psl -= (p + 1) - ps;
141 					memmove(ps, p + 1, psl);
142 					goto top;
143 				}
144 			case 'g':
145 				cspace(&PS, hs, hsl, REPLACE);
146 				break;
147 			case 'G':
148 				cspace(&PS, "\n", 1, 0);
149 				cspace(&PS, hs, hsl, 0);
150 				break;
151 			case 'h':
152 				cspace(&HS, ps, psl, REPLACE);
153 				break;
154 			case 'H':
155 				cspace(&HS, "\n", 1, 0);
156 				cspace(&HS, ps, psl, 0);
157 				break;
158 			case 'i':
159 				(void)fprintf(outfile, "%s", cp->t);
160 				break;
161 			case 'l':
162 				lputs(ps);
163 				break;
164 			case 'n':
165 				if (!nflag && !pd)
166 					OUT();
167 				flush_appends();
168 				if (!mf_fgets(&PS, REPLACE))
169 					exit(0);
170 				pd = 0;
171 				break;
172 			case 'N':
173 				flush_appends();
174 				cspace(&PS, "\n", 1, 0);
175 				if (!mf_fgets(&PS, 0))
176 					exit(0);
177 				break;
178 			case 'p':
179 				if (pd)
180 					break;
181 				OUT();
182 				break;
183 			case 'P':
184 				if (pd)
185 					break;
186 				if ((p = memchr(ps, '\n', psl)) != NULL) {
187 					oldpsl = psl;
188 					oldpsanl = psanl;
189 					psl = p - ps;
190 					psanl = 1;
191 					OUT();
192 					psl = oldpsl;
193 				} else {
194 					OUT();
195 				}
196 				break;
197 			case 'q':
198 				if (!nflag && !pd)
199 					OUT();
200 				flush_appends();
201 				exit(0);
202 			case 'r':
203 				if (appendx >= appendnum) {
204 					appends = xreallocarray(appends,
205 					    appendnum,
206 					    2 * sizeof(struct s_appends));
207 					appendnum *= 2;
208 				}
209 				appends[appendx].type = AP_FILE;
210 				appends[appendx].s = cp->t;
211 				appends[appendx].len = strlen(cp->t);
212 				appendx++;
213 				break;
214 			case 's':
215 				sdone |= substitute(cp);
216 				break;
217 			case 't':
218 				if (sdone) {
219 					sdone = 0;
220 					cp = cp->u.c;
221 					goto redirect;
222 				}
223 				break;
224 			case 'w':
225 				if (pd)
226 					break;
227 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
228 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
229 				    DEFFILEMODE)) == -1)
230 					error(FATAL, "%s: %s",
231 					    cp->t, strerror(errno));
232 				if (write(cp->u.fd, ps, psl) != psl ||
233 				    write(cp->u.fd, "\n", 1) != 1)
234 					error(FATAL, "%s: %s",
235 					    cp->t, strerror(errno));
236 				break;
237 			case 'x':
238 				if (hs == NULL)
239 					cspace(&HS, "", 0, REPLACE);
240 				tspace = PS;
241 				PS = HS;
242 				psanl = tspace.append_newline;
243 				HS = tspace;
244 				break;
245 			case 'y':
246 				if (pd || psl == 0)
247 					break;
248 				for (p = ps, len = psl; len--; ++p)
249 					*p = cp->u.y[(unsigned char)*p];
250 				break;
251 			case ':':
252 			case '}':
253 				break;
254 			case '=':
255 				(void)fprintf(outfile, "%lu\n", linenum);
256 			}
257 			cp = cp->next;
258 		} /* for all cp */
259 
260 new:		if (!nflag && !pd)
261 			OUT();
262 		flush_appends();
263 	} /* for all lines */
264 }
265 
266 /*
267  * TRUE if the address passed matches the current program state
268  * (lastline, linenumber, ps).
269  */
270 #define	MATCH(a)						\
271 	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
272 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
273 
274 /*
275  * Return TRUE if the command applies to the current line.  Sets the inrange
276  * flag to process ranges.  Interprets the non-select (``!'') flag.
277  */
278 static inline int
279 applies(struct s_command *cp)
280 {
281 	int r;
282 
283 	lastaddr = 0;
284 	if (cp->a1 == NULL && cp->a2 == NULL)
285 		r = 1;
286 	else if (cp->a2)
287 		if (cp->inrange) {
288 			if (MATCH(cp->a2)) {
289 				cp->inrange = 0;
290 				lastaddr = 1;
291 			}
292 			r = 1;
293 		} else if (MATCH(cp->a1)) {
294 			/*
295 			 * If the second address is a number less than or
296 			 * equal to the line number first selected, only
297 			 * one line shall be selected.
298 			 *	-- POSIX 1003.2
299 			 */
300 			if (cp->a2->type == AT_LINE &&
301 			    linenum >= cp->a2->u.l)
302 				lastaddr = 1;
303 			else
304 				cp->inrange = 1;
305 			r = 1;
306 		} else
307 			r = 0;
308 	else
309 		r = MATCH(cp->a1);
310 	return (cp->nonsel ? !r : r);
311 }
312 
313 /*
314  * Reset all inrange markers.
315  */
316 void
317 resetranges(void)
318 {
319 	struct s_command *cp;
320 
321 	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
322 		if (cp->a2)
323 			cp->inrange = 0;
324 }
325 
326 /*
327  * substitute --
328  *	Do substitutions in the pattern space.  Currently, we build a
329  *	copy of the new pattern space in the substitute space structure
330  *	and then swap them.
331  */
332 static int
333 substitute(struct s_command *cp)
334 {
335 	SPACE tspace;
336 	regex_t *re;
337 	regoff_t slen;
338 	int n, lastempty;
339 	size_t le = 0;
340 	char *s;
341 
342 	s = ps;
343 	re = cp->u.s->re;
344 	if (re == NULL) {
345 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
346 			linenum = cp->u.s->linenum;
347 			error(COMPILE, "\\%d not defined in the RE",
348 			    cp->u.s->maxbref);
349 		}
350 	}
351 	if (!regexec_e(re, ps, 0, 0, 0, psl))
352 		return (0);
353 
354 	SS.len = 0;				/* Clean substitute space. */
355 	slen = psl;
356 	n = cp->u.s->n;
357 	lastempty = 1;
358 
359 	do {
360 		/* Copy the leading retained string. */
361 		if (n <= 1 && (match[0].rm_so > le))
362 			cspace(&SS, s, match[0].rm_so - le, APPEND);
363 
364 		/* Skip zero-length matches right after other matches. */
365 		if (lastempty || (match[0].rm_so - le) ||
366 		    match[0].rm_so != match[0].rm_eo) {
367 			if (n <= 1) {
368 				/* Want this match: append replacement. */
369 				regsub(&SS, ps, cp->u.s->new);
370 				if (n == 1)
371 					n = -1;
372 			} else {
373 				/* Want a later match: append original. */
374 				if (match[0].rm_eo - le)
375 					cspace(&SS, s, match[0].rm_eo - le,
376 					    APPEND);
377 				n--;
378 			}
379 		}
380 
381 		/* Move past this match. */
382 		s = ps + match[0].rm_eo;
383 		slen = psl - match[0].rm_eo;
384 		le = match[0].rm_eo;
385 
386 		/*
387 		 * After a zero-length match, advance one byte,
388 		 * and at the end of the line, terminate.
389 		 */
390 		if (match[0].rm_so == match[0].rm_eo) {
391 			if (*s == '\0' || *s == '\n')
392 				slen = -1;
393 			else
394 				slen--;
395 			if (*s != '\0') {
396 			 	cspace(&SS, s++, 1, APPEND);
397 				le++;
398 			}
399 			lastempty = 1;
400 		} else
401 			lastempty = 0;
402 
403 	} while (n >= 0 && slen >= 0 &&
404 	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
405 
406 	/* Did not find the requested number of matches. */
407 	if (n > 1)
408 		return (0);
409 
410 	/* Copy the trailing retained string. */
411 	if (slen > 0)
412 		cspace(&SS, s, slen, APPEND);
413 
414 	/*
415 	 * Swap the substitute space and the pattern space, and make sure
416 	 * that any leftover pointers into stdio memory get lost.
417 	 */
418 	tspace = PS;
419 	PS = SS;
420 	psanl = tspace.append_newline;
421 	SS = tspace;
422 	SS.space = SS.back;
423 
424 	/* Handle the 'p' flag. */
425 	if (cp->u.s->p)
426 		OUT();
427 
428 	/* Handle the 'w' flag. */
429 	if (cp->u.s->wfile && !pd) {
430 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
431 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
432 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
433 		if (write(cp->u.s->wfd, ps, psl) != psl ||
434 		    write(cp->u.s->wfd, "\n", 1) != 1)
435 			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
436 	}
437 	return (1);
438 }
439 
440 /*
441  * Flush append requests.  Always called before reading a line,
442  * therefore it also resets the substitution done (sdone) flag.
443  */
444 static void
445 flush_appends(void)
446 {
447 	FILE *f;
448 	int count, i;
449 	char buf[8 * 1024];
450 
451 	for (i = 0; i < appendx; i++)
452 		switch (appends[i].type) {
453 		case AP_STRING:
454 			fwrite(appends[i].s, sizeof(char), appends[i].len,
455 			    outfile);
456 			break;
457 		case AP_FILE:
458 			/*
459 			 * Read files probably shouldn't be cached.  Since
460 			 * it's not an error to read a non-existent file,
461 			 * it's possible that another program is interacting
462 			 * with the sed script through the file system.  It
463 			 * would be truly bizarre, but possible.  It's probably
464 			 * not that big a performance win, anyhow.
465 			 */
466 			if ((f = fopen(appends[i].s, "r")) == NULL)
467 				break;
468 			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
469 				(void)fwrite(buf, sizeof(char), count, outfile);
470 			(void)fclose(f);
471 			break;
472 		}
473 	if (ferror(outfile))
474 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
475 	appendx = sdone = 0;
476 }
477 
478 static void
479 lputs(char *s)
480 {
481 	int count;
482 	extern int termwidth;
483 	const char *escapes;
484 	char *p;
485 
486 	for (count = 0; *s; ++s) {
487 		if (count >= termwidth) {
488 			(void)fprintf(outfile, "\\\n");
489 			count = 0;
490 		}
491 		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
492 		    && *s != '\\') {
493 			(void)fputc(*s, outfile);
494 			count++;
495 		} else if (*s == '\n') {
496 			(void)fputc('$', outfile);
497 			(void)fputc('\n', outfile);
498 			count = 0;
499 		} else {
500 			escapes = "\\\a\b\f\r\t\v";
501 			(void)fputc('\\', outfile);
502 			if ((p = strchr(escapes, *s))) {
503 				(void)fputc("\\abfrtv"[p - escapes], outfile);
504 				count += 2;
505 			} else {
506 				(void)fprintf(outfile, "%03o", *(u_char *)s);
507 				count += 4;
508 			}
509 		}
510 	}
511 	(void)fputc('$', outfile);
512 	(void)fputc('\n', outfile);
513 	if (ferror(outfile))
514 		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
515 }
516 
517 static inline int
518 regexec_e(regex_t *preg, const char *string, int eflags,
519     int nomatch, size_t start, size_t stop)
520 {
521 	int eval;
522 
523 	if (preg == NULL) {
524 		if (defpreg == NULL)
525 			error(FATAL, "first RE may not be empty");
526 	} else
527 		defpreg = preg;
528 
529 	/* Set anchors */
530 	match[0].rm_so = start;
531 	match[0].rm_eo = stop;
532 
533 	eval = regexec(defpreg, string,
534 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
535 	switch (eval) {
536 	case 0:
537 		return (1);
538 	case REG_NOMATCH:
539 		return (0);
540 	}
541 	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
542 	/* NOTREACHED */
543 }
544 
545 /*
546  * regsub - perform substitutions after a regexp match
547  * Based on a routine by Henry Spencer
548  */
549 static void
550 regsub(SPACE *sp, char *string, char *src)
551 {
552 	int len, no;
553 	char c, *dst;
554 
555 #define	NEEDSP(reqlen)							\
556 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
557 		size_t newlen = sp->blen + (reqlen) + 1024;		\
558 		sp->space = sp->back = xrealloc(sp->back, newlen);	\
559 		sp->blen = newlen;					\
560 		dst = sp->space + sp->len;				\
561 	}
562 
563 	dst = sp->space + sp->len;
564 	while ((c = *src++) != '\0') {
565 		if (c == '&')
566 			no = 0;
567 		else if (c == '\\' && isdigit((unsigned char)*src))
568 			no = *src++ - '0';
569 		else
570 			no = -1;
571 		if (no < 0) {		/* Ordinary character. */
572  			if (c == '\\' && (*src == '\\' || *src == '&'))
573  				c = *src++;
574 			NEEDSP(1);
575  			*dst++ = c;
576 			++sp->len;
577  		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
578 			len = match[no].rm_eo - match[no].rm_so;
579 			NEEDSP(len);
580 			memmove(dst, string + match[no].rm_so, len);
581 			dst += len;
582 			sp->len += len;
583 		}
584 	}
585 	NEEDSP(1);
586 	*dst = '\0';
587 }
588 
589 /*
590  * aspace --
591  *	Append the source space to the destination space, allocating new
592  *	space as necessary.
593  */
594 void
595 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
596 {
597 	size_t tlen;
598 
599 	/* Make sure SPACE has enough memory and ramp up quickly. */
600 	tlen = sp->len + len + 1;
601 	if (tlen > sp->blen) {
602 		size_t newlen = tlen + 1024;
603 		sp->space = sp->back = xrealloc(sp->back, newlen);
604 		sp->blen = newlen;
605 	}
606 
607 	if (spflag == REPLACE)
608 		sp->len = 0;
609 
610 	memmove(sp->space + sp->len, p, len);
611 
612 	sp->space[sp->len += len] = '\0';
613 }
614 
615 /*
616  * Close all cached opened files and report any errors
617  */
618 void
619 cfclose(struct s_command *cp, struct s_command *end)
620 {
621 
622 	for (; cp != end; cp = cp->next)
623 		switch (cp->code) {
624 		case 's':
625 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
626 				error(FATAL,
627 				    "%s: %s", cp->u.s->wfile, strerror(errno));
628 			cp->u.s->wfd = -1;
629 			break;
630 		case 'w':
631 			if (cp->u.fd != -1 && close(cp->u.fd))
632 				error(FATAL, "%s: %s", cp->t, strerror(errno));
633 			cp->u.fd = -1;
634 			break;
635 		case '{':
636 			cfclose(cp->u.c, cp->next);
637 			break;
638 		}
639 }
640