1 /*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * %sccs.include.redist.c%
10 */
11
12 #ifndef lint
13 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 04/20/94";
14 #endif /* not lint */
15
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/ioctl.h>
19 #include <sys/uio.h>
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <regex.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "defs.h"
32 #include "extern.h"
33
34 static SPACE HS, PS, SS;
35 #define pd PS.deleted
36 #define ps PS.space
37 #define psl PS.len
38 #define hs HS.space
39 #define hsl HS.len
40
41 static inline int applies __P((struct s_command *));
42 static void flush_appends __P((void));
43 static void lputs __P((char *));
44 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
45 static void regsub __P((SPACE *, char *, char *));
46 static int substitute __P((struct s_command *));
47
48 struct s_appends *appends; /* Array of pointers to strings to append. */
49 static int appendx; /* Index into appends array. */
50 int appendnum; /* Size of appends array. */
51
52 static int lastaddr; /* Set by applies if last address of a range. */
53 static int sdone; /* If any substitutes since last line input. */
54 /* Iov structure for 'w' commands. */
55 static regex_t *defpreg;
56 size_t maxnsub;
57 regmatch_t *match;
58
59 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
60
61 void
process()62 process()
63 {
64 struct s_command *cp;
65 SPACE tspace;
66 size_t len;
67 char oldc, *p;
68
69 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
70 pd = 0;
71 cp = prog;
72 redirect:
73 while (cp != NULL) {
74 if (!applies(cp)) {
75 cp = cp->next;
76 continue;
77 }
78 switch (cp->code) {
79 case '{':
80 cp = cp->u.c;
81 goto redirect;
82 case 'a':
83 if (appendx >= appendnum)
84 appends = xrealloc(appends,
85 sizeof(struct s_appends) *
86 (appendnum *= 2));
87 appends[appendx].type = AP_STRING;
88 appends[appendx].s = cp->t;
89 appends[appendx].len = strlen(cp->t);
90 appendx++;
91 break;
92 case 'b':
93 cp = cp->u.c;
94 goto redirect;
95 case 'c':
96 pd = 1;
97 psl = 0;
98 if (cp->a2 == NULL || lastaddr)
99 (void)printf("%s", cp->t);
100 break;
101 case 'd':
102 pd = 1;
103 goto new;
104 case 'D':
105 if (pd)
106 goto new;
107 if ((p = memchr(ps, '\n', psl)) == NULL)
108 pd = 1;
109 else {
110 psl -= (p - ps) + 1;
111 memmove(ps, p + 1, psl);
112 }
113 goto new;
114 case 'g':
115 cspace(&PS, hs, hsl, REPLACE);
116 break;
117 case 'G':
118 cspace(&PS, hs, hsl, 0);
119 break;
120 case 'h':
121 cspace(&HS, ps, psl, REPLACE);
122 break;
123 case 'H':
124 cspace(&HS, ps, psl, 0);
125 break;
126 case 'i':
127 (void)printf("%s", cp->t);
128 break;
129 case 'l':
130 lputs(ps);
131 break;
132 case 'n':
133 if (!nflag && !pd)
134 OUT(ps)
135 flush_appends();
136 if (!mf_fgets(&PS, REPLACE))
137 exit(0);
138 pd = 0;
139 break;
140 case 'N':
141 flush_appends();
142 if (!mf_fgets(&PS, 0)) {
143 if (!nflag && !pd)
144 OUT(ps)
145 exit(0);
146 }
147 break;
148 case 'p':
149 if (pd)
150 break;
151 OUT(ps)
152 break;
153 case 'P':
154 if (pd)
155 break;
156 if ((p = memchr(ps, '\n', psl)) != NULL) {
157 oldc = *p;
158 *p = '\0';
159 }
160 OUT(ps)
161 if (p != NULL)
162 *p = oldc;
163 break;
164 case 'q':
165 if (!nflag && !pd)
166 OUT(ps)
167 flush_appends();
168 exit(0);
169 case 'r':
170 if (appendx >= appendnum)
171 appends = xrealloc(appends,
172 sizeof(struct s_appends) *
173 (appendnum *= 2));
174 appends[appendx].type = AP_FILE;
175 appends[appendx].s = cp->t;
176 appends[appendx].len = strlen(cp->t);
177 appendx++;
178 break;
179 case 's':
180 sdone |= substitute(cp);
181 break;
182 case 't':
183 if (sdone) {
184 sdone = 0;
185 cp = cp->u.c;
186 goto redirect;
187 }
188 break;
189 case 'w':
190 if (pd)
191 break;
192 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
193 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
194 DEFFILEMODE)) == -1)
195 err(FATAL, "%s: %s\n",
196 cp->t, strerror(errno));
197 if (write(cp->u.fd, ps, psl) != psl)
198 err(FATAL, "%s: %s\n",
199 cp->t, strerror(errno));
200 break;
201 case 'x':
202 if (hs == NULL)
203 cspace(&HS, "", 0, REPLACE);
204 tspace = PS;
205 PS = HS;
206 HS = tspace;
207 break;
208 case 'y':
209 if (pd)
210 break;
211 for (p = ps, len = psl; --len; ++p)
212 *p = cp->u.y[*p];
213 break;
214 case ':':
215 case '}':
216 break;
217 case '=':
218 (void)printf("%lu\n", linenum);
219 }
220 cp = cp->next;
221 } /* for all cp */
222
223 new: if (!nflag && !pd)
224 OUT(ps)
225 flush_appends();
226 } /* for all lines */
227 }
228
229 /*
230 * TRUE if the address passed matches the current program state
231 * (lastline, linenumber, ps).
232 */
233 #define MATCH(a) \
234 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
235 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
236
237 /*
238 * Return TRUE if the command applies to the current line. Sets the inrange
239 * flag to process ranges. Interprets the non-select (``!'') flag.
240 */
241 static inline int
applies(cp)242 applies(cp)
243 struct s_command *cp;
244 {
245 int r;
246
247 lastaddr = 0;
248 if (cp->a1 == NULL && cp->a2 == NULL)
249 r = 1;
250 else if (cp->a2)
251 if (cp->inrange) {
252 if (MATCH(cp->a2)) {
253 cp->inrange = 0;
254 lastaddr = 1;
255 }
256 r = 1;
257 } else if (MATCH(cp->a1)) {
258 /*
259 * If the second address is a number less than or
260 * equal to the line number first selected, only
261 * one line shall be selected.
262 * -- POSIX 1003.2
263 */
264 if (cp->a2->type == AT_LINE &&
265 linenum >= cp->a2->u.l)
266 lastaddr = 1;
267 else
268 cp->inrange = 1;
269 r = 1;
270 } else
271 r = 0;
272 else
273 r = MATCH(cp->a1);
274 return (cp->nonsel ? ! r : r);
275 }
276
277 /*
278 * substitute --
279 * Do substitutions in the pattern space. Currently, we build a
280 * copy of the new pattern space in the substitute space structure
281 * and then swap them.
282 */
283 static int
substitute(cp)284 substitute(cp)
285 struct s_command *cp;
286 {
287 SPACE tspace;
288 regex_t *re;
289 size_t re_off, slen;
290 int lastempty, n;
291 char *s;
292
293 s = ps;
294 re = cp->u.s->re;
295 if (re == NULL) {
296 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
297 linenum = cp->u.s->linenum;
298 err(COMPILE, "\\%d not defined in the RE",
299 cp->u.s->maxbref);
300 }
301 }
302 if (!regexec_e(re, s, 0, 0, psl))
303 return (0);
304
305 SS.len = 0; /* Clean substitute space. */
306 slen = psl;
307 n = cp->u.s->n;
308 lastempty = 1;
309
310 switch (n) {
311 case 0: /* Global */
312 do {
313 if (lastempty || match[0].rm_so != match[0].rm_eo) {
314 /* Locate start of replaced string. */
315 re_off = match[0].rm_so;
316 /* Copy leading retained string. */
317 cspace(&SS, s, re_off, APPEND);
318 /* Add in regular expression. */
319 regsub(&SS, s, cp->u.s->new);
320 }
321
322 /* Move past this match. */
323 if (match[0].rm_so != match[0].rm_eo) {
324 s += match[0].rm_eo;
325 slen -= match[0].rm_eo;
326 lastempty = 0;
327 } else {
328 if (match[0].rm_so == 0)
329 cspace(&SS,
330 s, match[0].rm_so + 1, APPEND);
331 else
332 cspace(&SS,
333 s + match[0].rm_so, 1, APPEND);
334 s += match[0].rm_so + 1;
335 slen -= match[0].rm_so + 1;
336 lastempty = 1;
337 }
338 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
339 /* Copy trailing retained string. */
340 if (slen > 0)
341 cspace(&SS, s, slen, APPEND);
342 break;
343 default: /* Nth occurrence */
344 while (--n) {
345 s += match[0].rm_eo;
346 slen -= match[0].rm_eo;
347 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
348 return (0);
349 }
350 /* FALLTHROUGH */
351 case 1: /* 1st occurrence */
352 /* Locate start of replaced string. */
353 re_off = match[0].rm_so + (s - ps);
354 /* Copy leading retained string. */
355 cspace(&SS, ps, re_off, APPEND);
356 /* Add in regular expression. */
357 regsub(&SS, s, cp->u.s->new);
358 /* Copy trailing retained string. */
359 s += match[0].rm_eo;
360 slen -= match[0].rm_eo;
361 cspace(&SS, s, slen, APPEND);
362 break;
363 }
364
365 /*
366 * Swap the substitute space and the pattern space, and make sure
367 * that any leftover pointers into stdio memory get lost.
368 */
369 tspace = PS;
370 PS = SS;
371 SS = tspace;
372 SS.space = SS.back;
373
374 /* Handle the 'p' flag. */
375 if (cp->u.s->p)
376 OUT(ps)
377
378 /* Handle the 'w' flag. */
379 if (cp->u.s->wfile && !pd) {
380 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
381 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
382 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
383 if (write(cp->u.s->wfd, ps, psl) != psl)
384 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
385 }
386 return (1);
387 }
388
389 /*
390 * Flush append requests. Always called before reading a line,
391 * therefore it also resets the substitution done (sdone) flag.
392 */
393 static void
flush_appends()394 flush_appends()
395 {
396 FILE *f;
397 int count, i;
398 char buf[8 * 1024];
399
400 for (i = 0; i < appendx; i++)
401 switch (appends[i].type) {
402 case AP_STRING:
403 fwrite(appends[i].s, sizeof(char), appends[i].len,
404 stdout);
405 break;
406 case AP_FILE:
407 /*
408 * Read files probably shouldn't be cached. Since
409 * it's not an error to read a non-existent file,
410 * it's possible that another program is interacting
411 * with the sed script through the file system. It
412 * would be truly bizarre, but possible. It's probably
413 * not that big a performance win, anyhow.
414 */
415 if ((f = fopen(appends[i].s, "r")) == NULL)
416 break;
417 while (count = fread(buf, sizeof(char), sizeof(buf), f))
418 (void)fwrite(buf, sizeof(char), count, stdout);
419 (void)fclose(f);
420 break;
421 }
422 if (ferror(stdout))
423 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
424 appendx = sdone = 0;
425 }
426
427 static void
lputs(s)428 lputs(s)
429 register char *s;
430 {
431 register int count;
432 register char *escapes, *p;
433 struct winsize win;
434 static int termwidth = -1;
435
436 if (termwidth == -1)
437 if (p = getenv("COLUMNS"))
438 termwidth = atoi(p);
439 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
440 win.ws_col > 0)
441 termwidth = win.ws_col;
442 else
443 termwidth = 60;
444
445 for (count = 0; *s; ++s) {
446 if (count >= termwidth) {
447 (void)printf("\\\n");
448 count = 0;
449 }
450 if (isascii(*s) && isprint(*s) && *s != '\\') {
451 (void)putchar(*s);
452 count++;
453 } else {
454 escapes = "\\\a\b\f\n\r\t\v";
455 (void)putchar('\\');
456 if (p = strchr(escapes, *s)) {
457 (void)putchar("\\abfnrtv"[p - escapes]);
458 count += 2;
459 } else {
460 (void)printf("%03o", *(u_char *)s);
461 count += 4;
462 }
463 }
464 }
465 (void)putchar('$');
466 (void)putchar('\n');
467 if (ferror(stdout))
468 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
469 }
470
471 static inline int
regexec_e(preg,string,eflags,nomatch,slen)472 regexec_e(preg, string, eflags, nomatch, slen)
473 regex_t *preg;
474 const char *string;
475 int eflags, nomatch;
476 size_t slen;
477 {
478 int eval;
479
480 if (preg == NULL) {
481 if (defpreg == NULL)
482 err(FATAL, "first RE may not be empty");
483 } else
484 defpreg = preg;
485
486 /* Set anchors, discounting trailing newline (if any). */
487 if (slen > 0 && string[slen - 1] == '\n')
488 slen--;
489 match[0].rm_so = 0;
490 match[0].rm_eo = slen;
491
492 eval = regexec(defpreg, string,
493 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
494 switch(eval) {
495 case 0:
496 return (1);
497 case REG_NOMATCH:
498 return (0);
499 }
500 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
501 /* NOTREACHED */
502 }
503
504 /*
505 * regsub - perform substitutions after a regexp match
506 * Based on a routine by Henry Spencer
507 */
508 static void
regsub(sp,string,src)509 regsub(sp, string, src)
510 SPACE *sp;
511 char *string, *src;
512 {
513 register int len, no;
514 register char c, *dst;
515
516 #define NEEDSP(reqlen) \
517 if (sp->len >= sp->blen - (reqlen) - 1) { \
518 sp->blen += (reqlen) + 1024; \
519 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
520 dst = sp->space + sp->len; \
521 }
522
523 dst = sp->space + sp->len;
524 while ((c = *src++) != '\0') {
525 if (c == '&')
526 no = 0;
527 else if (c == '\\' && isdigit(*src))
528 no = *src++ - '0';
529 else
530 no = -1;
531 if (no < 0) { /* Ordinary character. */
532 if (c == '\\' && (*src == '\\' || *src == '&'))
533 c = *src++;
534 NEEDSP(1);
535 *dst++ = c;
536 ++sp->len;
537 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
538 len = match[no].rm_eo - match[no].rm_so;
539 NEEDSP(len);
540 memmove(dst, string + match[no].rm_so, len);
541 dst += len;
542 sp->len += len;
543 }
544 }
545 NEEDSP(1);
546 *dst = '\0';
547 }
548
549 /*
550 * aspace --
551 * Append the source space to the destination space, allocating new
552 * space as necessary.
553 */
554 void
cspace(sp,p,len,spflag)555 cspace(sp, p, len, spflag)
556 SPACE *sp;
557 char *p;
558 size_t len;
559 enum e_spflag spflag;
560 {
561 size_t tlen;
562
563 /* Make sure SPACE has enough memory and ramp up quickly. */
564 tlen = sp->len + len + 1;
565 if (tlen > sp->blen) {
566 sp->blen = tlen + 1024;
567 sp->space = sp->back = xrealloc(sp->back, sp->blen);
568 }
569
570 if (spflag == REPLACE)
571 sp->len = 0;
572
573 memmove(sp->space + sp->len, p, len);
574
575 sp->space[sp->len += len] = '\0';
576 }
577
578 /*
579 * Close all cached opened files and report any errors
580 */
581 void
cfclose(cp,end)582 cfclose(cp, end)
583 register struct s_command *cp, *end;
584 {
585
586 for (; cp != end; cp = cp->next)
587 switch(cp->code) {
588 case 's':
589 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
590 err(FATAL,
591 "%s: %s", cp->u.s->wfile, strerror(errno));
592 cp->u.s->wfd = -1;
593 break;
594 case 'w':
595 if (cp->u.fd != -1 && close(cp->u.fd))
596 err(FATAL, "%s: %s", cp->t, strerror(errno));
597 cp->u.fd = -1;
598 break;
599 case '{':
600 cfclose(cp->u.c, cp->next);
601 break;
602 }
603 }
604