1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 /*static char sccsid[] = "from: @(#)process.c 5.10 (Berkeley) 12/2/92";*/ 40 static char rcsid[] = "$Id: process.c,v 1.7 1993/08/14 20:00:00 mycroft Exp $"; 41 #endif /* not lint */ 42 43 #include <sys/types.h> 44 #include <sys/stat.h> 45 #include <sys/ioctl.h> 46 #include <sys/uio.h> 47 48 #include <ctype.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <limits.h> 52 #include <regex.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 58 #include "defs.h" 59 #include "extern.h" 60 61 static SPACE HS = {""}, PS, SS; 62 #define pd PS.deleted 63 #define ps PS.space 64 #define psl PS.len 65 #define hs HS.space 66 #define hsl HS.len 67 68 static inline int applies __P((struct s_command *)); 69 static void flush_appends __P((void)); 70 static void lputs __P((char *)); 71 static inline int regexec_e __P((regex_t *, const char *, int, int)); 72 static void regsub __P((SPACE *, char *, char *)); 73 static int substitute __P((struct s_command *)); 74 75 struct s_appends *appends; /* Array of pointers to strings to append. */ 76 static int appendx; /* Index into appends array. */ 77 int appendnum; /* Size of appends array. */ 78 79 static int lastaddr; /* Set by applies if last address of a range. */ 80 static int sdone; /* If any substitutes since last line input. */ 81 /* Iov structure for 'w' commands. */ 82 static struct iovec iov[2] = { NULL, 0, "\n", 1 }; 83 84 static regex_t *defpreg; 85 size_t maxnsub; 86 regmatch_t *match; 87 88 void 89 process() 90 { 91 struct s_command *cp; 92 SPACE tspace; 93 size_t len; 94 int r; 95 char oldc, *p; 96 97 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 98 pd = 0; 99 cp = prog; 100 redirect: 101 while (cp != NULL) { 102 if (!applies(cp)) { 103 cp = cp->next; 104 continue; 105 } 106 switch (cp->code) { 107 case '{': 108 cp = cp->u.c; 109 goto redirect; 110 case 'a': 111 if (appendx >= appendnum) 112 appends = xrealloc(appends, 113 sizeof(struct s_appends) * 114 (appendnum *= 2)); 115 appends[appendx].type = AP_STRING; 116 appends[appendx].s = cp->t; 117 appendx++; 118 break; 119 case 'b': 120 cp = cp->u.c; 121 goto redirect; 122 case 'c': 123 pd = 1; 124 psl = 0; 125 if (cp->a2 == NULL || lastaddr) 126 (void)printf("%s", cp->t); 127 break; 128 case 'd': 129 pd = 1; 130 goto new; 131 case 'D': 132 if (pd) 133 goto new; 134 if ((p = strchr(ps, '\n')) == NULL) 135 pd = 1; 136 else { 137 psl -= (p - ps) - 1; 138 memmove(ps, p + 1, psl); 139 } 140 goto new; 141 case 'g': 142 cspace(&PS, hs, hsl, REPLACE); 143 break; 144 case 'G': 145 cspace(&PS, hs, hsl, APPENDNL); 146 break; 147 case 'h': 148 cspace(&HS, ps, psl, REPLACE); 149 break; 150 case 'H': 151 cspace(&HS, ps, psl, APPENDNL); 152 break; 153 case 'i': 154 (void)printf("%s", cp->t); 155 break; 156 case 'l': 157 lputs(ps); 158 break; 159 case 'n': 160 if (!nflag && !pd) 161 (void)printf("%s\n", ps); 162 flush_appends(); 163 r = mf_fgets(&PS, REPLACE); 164 #ifdef HISTORIC_PRACTICE 165 if (!r) 166 exit(0); 167 #endif 168 pd = 0; 169 break; 170 case 'N': 171 flush_appends(); 172 if (!mf_fgets(&PS, APPENDNL)) { 173 if (!nflag && !pd) 174 (void)printf("%s\n", ps); 175 exit(0); 176 } 177 break; 178 case 'p': 179 if (pd) 180 break; 181 (void)printf("%s\n", ps); 182 break; 183 case 'P': 184 if (pd) 185 break; 186 if ((p = strchr(ps, '\n')) != NULL) { 187 oldc = *p; 188 *p = '\0'; 189 } 190 (void)printf("%s\n", ps); 191 if (p != NULL) 192 *p = oldc; 193 break; 194 case 'q': 195 if (!nflag && !pd) 196 (void)printf("%s\n", ps); 197 flush_appends(); 198 exit(0); 199 case 'r': 200 if (appendx >= appendnum) 201 appends = xrealloc(appends, 202 sizeof(struct s_appends) * 203 (appendnum *= 2)); 204 appends[appendx].type = AP_FILE; 205 appends[appendx].s = cp->t; 206 appendx++; 207 break; 208 case 's': 209 sdone |= substitute(cp); 210 break; 211 case 't': 212 if (sdone) { 213 sdone = 0; 214 cp = cp->u.c; 215 goto redirect; 216 } 217 break; 218 case 'w': 219 if (pd) 220 break; 221 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 222 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 223 DEFFILEMODE)) == -1) 224 err(FATAL, "%s: %s\n", 225 cp->t, strerror(errno)); 226 iov[0].iov_base = ps; 227 iov[0].iov_len = psl; 228 if (writev(cp->u.fd, iov, 2) != psl + 1) 229 err(FATAL, "%s: %s\n", 230 cp->t, strerror(errno)); 231 break; 232 case 'x': 233 tspace = PS; 234 PS = HS; 235 HS = tspace; 236 break; 237 case 'y': 238 if (pd) 239 break; 240 for (p = ps, len = psl; len--; ++p) 241 *p = cp->u.y[*p]; 242 break; 243 case ':': 244 case '}': 245 break; 246 case '=': 247 (void)printf("%lu\n", linenum); 248 } 249 cp = cp->next; 250 } /* for all cp */ 251 252 new: if (!nflag && !pd) 253 (void)printf("%s\n", ps); 254 flush_appends(); 255 } /* for all lines */ 256 } 257 258 /* 259 * TRUE if the address passed matches the current program state 260 * (lastline, linenumber, ps). 261 */ 262 #define MATCH(a) \ 263 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1) : \ 264 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 265 266 /* 267 * Return TRUE if the command applies to the current line. Sets the inrange 268 * flag to process ranges. Interprets the non-select (``!'') flag. 269 */ 270 static inline int 271 applies(cp) 272 struct s_command *cp; 273 { 274 int r; 275 276 lastaddr = 0; 277 if (cp->a1 == NULL && cp->a2 == NULL) 278 r = 1; 279 else if (cp->a2) 280 if (cp->inrange) { 281 if (MATCH(cp->a2)) { 282 cp->inrange = 0; 283 lastaddr = 1; 284 } 285 r = 1; 286 } else if (MATCH(cp->a1)) { 287 /* 288 * If the second address is a number less than or 289 * equal to the line number first selected, only 290 * one line shall be selected. 291 * -- POSIX 1003.2 292 */ 293 if (cp->a2->type == AT_LINE && 294 linenum >= cp->a2->u.l) 295 lastaddr = 1; 296 else 297 cp->inrange = 1; 298 r = 1; 299 } else 300 r = 0; 301 else 302 r = MATCH(cp->a1); 303 return (cp->nonsel ? ! r : r); 304 } 305 306 /* 307 * substitute -- 308 * Do substitutions in the pattern space. Currently, we build a 309 * copy of the new pattern space in the substitute space structure 310 * and then swap them. 311 */ 312 static int 313 substitute(cp) 314 struct s_command *cp; 315 { 316 SPACE tspace; 317 regex_t *re; 318 size_t re_off; 319 size_t re_eoff; 320 int n; 321 char *s; 322 char *eos; 323 324 s = ps; 325 eos = s + strlen(s); 326 re = cp->u.s->re; 327 if (re == NULL) { 328 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 329 linenum = cp->u.s->linenum; 330 err(COMPILE, "\\%d not defined in the RE", 331 cp->u.s->maxbref); 332 } 333 } 334 if (!regexec_e(re, s, 0, 0)) 335 return (0); 336 337 SS.len = 0; /* Clean substitute space. */ 338 n = cp->u.s->n; 339 switch (n) { 340 case 0: /* Global */ 341 do { 342 /* Locate start of replaced string. */ 343 re_off = match[0].rm_so; 344 re_eoff = match[0].rm_eo; 345 /* Copy leading retained string. */ 346 cspace(&SS, s, re_off, APPEND); 347 /* Add in regular expression. */ 348 regsub(&SS, s, cp->u.s->new); 349 /* Move past this match. */ 350 s += match[0].rm_eo; 351 } while(*s && re_eoff && regexec_e(re, s, REG_NOTBOL, 0)); 352 if (eos - s > 0 && !re_eoff) 353 err(FATAL, "infinite substitution loop"); 354 /* Copy trailing retained string. */ 355 cspace(&SS, s, strlen(s), APPEND); 356 break; 357 default: /* Nth occurrence */ 358 while (--n) { 359 s += match[0].rm_eo; 360 if (!regexec_e(re, s, REG_NOTBOL, 0)) 361 return (0); 362 } 363 /* FALLTHROUGH */ 364 case 1: /* 1st occurrence */ 365 /* Locate start of replaced string. */ 366 re_off = match[0].rm_so + (s - ps); 367 /* Copy leading retained string. */ 368 cspace(&SS, ps, re_off, APPEND); 369 /* Add in regular expression. */ 370 regsub(&SS, s, cp->u.s->new); 371 /* Copy trailing retained string. */ 372 s += match[0].rm_eo; 373 cspace(&SS, s, strlen(s), APPEND); 374 break; 375 } 376 377 /* 378 * Swap the substitute space and the pattern space, and make sure 379 * that any leftover pointers into stdio memory get lost. 380 */ 381 tspace = PS; 382 PS = SS; 383 SS = tspace; 384 SS.space = SS.back; 385 386 /* Handle the 'p' flag. */ 387 if (cp->u.s->p) 388 (void)printf("%s\n", ps); 389 390 /* Handle the 'w' flag. */ 391 if (cp->u.s->wfile && !pd) { 392 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 393 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 394 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 395 iov[0].iov_base = ps; 396 iov[0].iov_len = psl; 397 if (writev(cp->u.s->wfd, iov, 2) != psl + 1) 398 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 399 } 400 return (1); 401 } 402 403 /* 404 * Flush append requests. Always called before reading a line, 405 * therefore it also resets the substitution done (sdone) flag. 406 */ 407 static void 408 flush_appends() 409 { 410 FILE *f; 411 int count, i; 412 char buf[8 * 1024]; 413 414 for (i = 0; i < appendx; i++) 415 switch (appends[i].type) { 416 case AP_STRING: 417 (void)printf("%s", appends[i].s); 418 break; 419 case AP_FILE: 420 /* 421 * Read files probably shouldn't be cached. Since 422 * it's not an error to read a non-existent file, 423 * it's possible that another program is interacting 424 * with the sed script through the file system. It 425 * would be truly bizarre, but possible. It's probably 426 * not that big a performance win, anyhow. 427 */ 428 if ((f = fopen(appends[i].s, "r")) == NULL) 429 break; 430 while (count = fread(buf, 1, sizeof(buf), f)) 431 (void)fwrite(buf, 1, count, stdout); 432 (void)fclose(f); 433 break; 434 } 435 if (ferror(stdout)) 436 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 437 appendx = sdone = 0; 438 } 439 440 static void 441 lputs(s) 442 register char *s; 443 { 444 register int count; 445 register char *escapes, *p; 446 struct winsize win; 447 static int termwidth = -1; 448 449 if (termwidth == -1) 450 if (p = getenv("COLUMNS")) 451 termwidth = atoi(p); 452 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 453 win.ws_col > 0) 454 termwidth = win.ws_col; 455 else 456 termwidth = 60; 457 458 for (count = 0; *s; ++s) { 459 if (count >= termwidth) { 460 (void)printf("\\\n"); 461 count = 0; 462 } 463 if (isascii(*s) && isprint(*s) && *s != '\\') { 464 (void)putchar(*s); 465 count++; 466 } else { 467 escapes = "\\\a\b\f\n\r\t\v"; 468 (void)putchar('\\'); 469 if (p = strchr(escapes, *s)) { 470 (void)putchar("\\abfnrtv"[p - escapes]); 471 count += 2; 472 } else { 473 (void)printf("%03o", (u_char)*s); 474 count += 4; 475 } 476 } 477 } 478 (void)putchar('$'); 479 (void)putchar('\n'); 480 if (ferror(stdout)) 481 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 482 } 483 484 static inline int 485 regexec_e(preg, string, eflags, nomatch) 486 regex_t *preg; 487 const char *string; 488 int eflags, nomatch; 489 { 490 int eval; 491 492 if (preg == NULL) { 493 if (defpreg == NULL) 494 err(FATAL, "first RE may not be empty"); 495 } else 496 defpreg = preg; 497 498 eval = regexec(defpreg, string, 499 nomatch ? 0 : maxnsub + 1, match, eflags); 500 switch(eval) { 501 case 0: 502 return (1); 503 case REG_NOMATCH: 504 return (0); 505 } 506 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 507 /* NOTREACHED */ 508 } 509 510 /* 511 * regsub - perform substitutions after a regexp match 512 * Based on a routine by Henry Spencer 513 */ 514 static void 515 regsub(sp, string, src) 516 SPACE *sp; 517 char *string, *src; 518 { 519 register int len, no; 520 register char c, *dst; 521 522 #define NEEDSP(reqlen) \ 523 if (sp->len >= sp->blen - (reqlen) - 1) { \ 524 sp->blen += (reqlen) + 1024; \ 525 sp->space = sp->back = xrealloc(sp->back, sp->blen); \ 526 dst = sp->space + sp->len; \ 527 } 528 529 dst = sp->space + sp->len; 530 while ((c = *src++) != '\0') { 531 if (c == '&') 532 no = 0; 533 else if (c == '\\' && isdigit(*src)) 534 no = *src++ - '0'; 535 else 536 no = -1; 537 if (no < 0) { /* Ordinary character. */ 538 if (c == '\\' && (*src == '\\' || *src == '&')) 539 c = *src++; 540 NEEDSP(1); 541 *dst++ = c; 542 ++sp->len; 543 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 544 len = match[no].rm_eo - match[no].rm_so; 545 NEEDSP(len); 546 memmove(dst, string + match[no].rm_so, len); 547 dst += len; 548 sp->len += len; 549 } 550 } 551 NEEDSP(1); 552 *dst = '\0'; 553 } 554 555 /* 556 * aspace -- 557 * Append the source space to the destination space, allocating new 558 * space as necessary. 559 */ 560 void 561 cspace(sp, p, len, spflag) 562 SPACE *sp; 563 char *p; 564 size_t len; 565 enum e_spflag spflag; 566 { 567 size_t tlen; 568 569 /* 570 * Make sure SPACE has enough memory and ramp up quickly. Appends 571 * need two extra bytes, one for the newline, one for a terminating 572 * NULL. 573 */ 574 /* tlen = sp->len + len + spflag == APPENDNL ? 2 : 1; */ 575 tlen = sp->len + len + (spflag == APPENDNL ? 2 : 1); /* XXX */ 576 if (tlen > sp->blen) { 577 sp->blen = tlen + 1024; 578 sp->space = sp->back = xrealloc(sp->back, sp->blen); 579 } 580 581 if (spflag == APPENDNL) 582 sp->space[sp->len++] = '\n'; 583 else if (spflag == REPLACE) 584 sp->len = 0; 585 586 memmove(sp->space + sp->len, p, len); 587 sp->space[sp->len += len] = '\0'; 588 } 589 590 /* 591 * Close all cached opened files and report any errors 592 */ 593 void 594 cfclose(cp, end) 595 register struct s_command *cp, *end; 596 { 597 598 for (; cp != end; cp = cp->next) 599 switch(cp->code) { 600 case 's': 601 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 602 err(FATAL, 603 "%s: %s", cp->u.s->wfile, strerror(errno)); 604 cp->u.s->wfd = -1; 605 break; 606 case 'w': 607 if (cp->u.fd != -1 && close(cp->u.fd)) 608 err(FATAL, "%s: %s", cp->t, strerror(errno)); 609 cp->u.fd = -1; 610 break; 611 case '{': 612 cfclose(cp->u.c, cp->next); 613 break; 614 } 615 } 616