1 /* $NetBSD: process.c,v 1.35 2003/11/07 04:44:57 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Diomidis Spinellis of Imperial College, University of London. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /*- 36 * Copyright (c) 1992 Diomidis Spinellis. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Diomidis Spinellis of Imperial College, University of London. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Berkeley and its contributors. 53 * 4. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 #include <sys/cdefs.h> 71 #ifndef lint 72 #if 0 73 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; 74 #else 75 __RCSID("$NetBSD: process.c,v 1.35 2003/11/07 04:44:57 itojun Exp $"); 76 #endif 77 #endif /* not lint */ 78 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/ioctl.h> 82 #include <sys/uio.h> 83 84 #include <ctype.h> 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <limits.h> 88 #include <regex.h> 89 #include <stdio.h> 90 #include <stdlib.h> 91 #include <string.h> 92 #include <unistd.h> 93 94 #include "defs.h" 95 #include "extern.h" 96 97 static SPACE HS, PS, SS; 98 #define pd PS.deleted 99 #define ps PS.space 100 #define psl PS.len 101 #define hs HS.space 102 #define hsl HS.len 103 104 static inline int applies(struct s_command *); 105 static void flush_appends(void); 106 static void lputs(char *); 107 static inline int regexec_e(regex_t *, const char *, int, int, size_t); 108 static void regsub(SPACE *, char *, char *); 109 static int substitute(struct s_command *); 110 111 struct s_appends *appends; /* Array of pointers to strings to append. */ 112 static int appendx; /* Index into appends array. */ 113 int appendnum; /* Size of appends array. */ 114 115 static int lastaddr; /* Set by applies if last address of a range. */ 116 static int sdone; /* If any substitutes since last line input. */ 117 /* Iov structure for 'w' commands. */ 118 static regex_t *defpreg; 119 size_t maxnsub; 120 regmatch_t *match; 121 122 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); } 123 124 void 125 process(void) 126 { 127 struct s_command *cp; 128 SPACE tspace; 129 size_t len, oldpsl; 130 char *p; 131 132 oldpsl = 0; 133 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 134 pd = 0; 135 top: 136 cp = prog; 137 redirect: 138 while (cp != NULL) { 139 if (!applies(cp)) { 140 cp = cp->next; 141 continue; 142 } 143 switch (cp->code) { 144 case '{': 145 cp = cp->u.c; 146 goto redirect; 147 case 'a': 148 if (appendx >= appendnum) { 149 appends = xrealloc(appends, 150 sizeof(struct s_appends) * 151 (appendnum * 2)); 152 appendnum *= 2; 153 } 154 appends[appendx].type = AP_STRING; 155 appends[appendx].s = cp->t; 156 appends[appendx].len = strlen(cp->t); 157 appendx++; 158 break; 159 case 'b': 160 cp = cp->u.c; 161 goto redirect; 162 case 'c': 163 pd = 1; 164 psl = 0; 165 if (cp->a2 == NULL || lastaddr) 166 (void)printf("%s", cp->t); 167 break; 168 case 'd': 169 pd = 1; 170 goto new; 171 case 'D': 172 if (psl == 0) 173 pd = 1; 174 if (pd) 175 goto new; 176 if ((p = memchr(ps, '\n', psl - 1)) == NULL) { 177 pd = 1; 178 goto new; 179 } else { 180 psl -= (p + 1) - ps; 181 memmove(ps, p + 1, psl); 182 goto top; 183 } 184 case 'g': 185 cspace(&PS, hs, hsl, REPLACE); 186 break; 187 case 'G': 188 if (hs == NULL) 189 cspace(&HS, "\n", 1, REPLACE); 190 cspace(&PS, hs, hsl, 0); 191 break; 192 case 'h': 193 cspace(&HS, ps, psl, REPLACE); 194 break; 195 case 'H': 196 cspace(&HS, ps, psl, 0); 197 break; 198 case 'i': 199 (void)printf("%s", cp->t); 200 break; 201 case 'l': 202 lputs(ps); 203 break; 204 case 'n': 205 if (!nflag && !pd) 206 OUT(ps) 207 flush_appends(); 208 if (!mf_fgets(&PS, REPLACE)) 209 exit(0); 210 pd = 0; 211 break; 212 case 'N': 213 flush_appends(); 214 if (!mf_fgets(&PS, 0)) { 215 if (!nflag && !pd) 216 OUT(ps) 217 exit(0); 218 } 219 break; 220 case 'p': 221 if (pd) 222 break; 223 OUT(ps) 224 break; 225 case 'P': 226 if (pd) 227 break; 228 if ((p = memchr(ps, '\n', psl - 1)) != NULL) { 229 oldpsl = psl; 230 psl = (p + 1) - ps; 231 } 232 OUT(ps) 233 if (p != NULL) 234 psl = oldpsl; 235 break; 236 case 'q': 237 if (!nflag && !pd) 238 OUT(ps) 239 flush_appends(); 240 exit(0); 241 case 'r': 242 if (appendx >= appendnum) { 243 appends = xrealloc(appends, 244 sizeof(struct s_appends) * 245 (appendnum * 2)); 246 appendnum *= 2; 247 } 248 appends[appendx].type = AP_FILE; 249 appends[appendx].s = cp->t; 250 appends[appendx].len = strlen(cp->t); 251 appendx++; 252 break; 253 case 's': 254 sdone |= substitute(cp); 255 break; 256 case 't': 257 if (sdone) { 258 sdone = 0; 259 cp = cp->u.c; 260 goto redirect; 261 } 262 break; 263 case 'w': 264 if (pd) 265 break; 266 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 267 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 268 DEFFILEMODE)) == -1) 269 err(FATAL, "%s: %s", 270 cp->t, strerror(errno)); 271 if (write(cp->u.fd, ps, psl) != psl) 272 err(FATAL, "%s: %s", 273 cp->t, strerror(errno)); 274 break; 275 case 'x': 276 if (hs == NULL) 277 cspace(&HS, "\n", 1, REPLACE); 278 tspace = PS; 279 PS = HS; 280 HS = tspace; 281 break; 282 case 'y': 283 if (pd) 284 break; 285 for (p = ps, len = psl; --len; ++p) 286 *p = cp->u.y[(int)*p]; 287 break; 288 case ':': 289 case '}': 290 break; 291 case '=': 292 (void)printf("%lu\n", linenum); 293 } 294 cp = cp->next; 295 } /* for all cp */ 296 297 new: if (!nflag && !pd) 298 OUT(ps) 299 flush_appends(); 300 } /* for all lines */ 301 } 302 303 /* 304 * TRUE if the address passed matches the current program state 305 * (lastline, linenumber, ps). 306 */ 307 #define MATCH(a) \ 308 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 309 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 310 311 /* 312 * Return TRUE if the command applies to the current line. Sets the inrange 313 * flag to process ranges. Interprets the non-select (``!'') flag. 314 */ 315 static inline int 316 applies(struct s_command *cp) 317 { 318 int r; 319 320 lastaddr = 0; 321 if (cp->a1 == NULL && cp->a2 == NULL) 322 r = 1; 323 else if (cp->a2) { 324 if (cp->inrange) { 325 if (MATCH(cp->a2)) { 326 cp->inrange = 0; 327 lastaddr = 1; 328 } 329 r = 1; 330 } else if (MATCH(cp->a1)) { 331 /* 332 * If the second address is a number less than or 333 * equal to the line number first selected, only 334 * one line shall be selected. 335 * -- POSIX 1003.2 336 */ 337 if (cp->a2->type == AT_LINE && 338 linenum >= cp->a2->u.l) 339 lastaddr = 1; 340 else 341 cp->inrange = 1; 342 r = 1; 343 } else 344 r = 0; 345 } else 346 r = MATCH(cp->a1); 347 return (cp->nonsel ? ! r : r); 348 } 349 350 /* 351 * substitute -- 352 * Do substitutions in the pattern space. Currently, we build a 353 * copy of the new pattern space in the substitute space structure 354 * and then swap them. 355 */ 356 static int 357 substitute(struct s_command *cp) 358 { 359 SPACE tspace; 360 regex_t *re; 361 size_t re_off, slen; 362 int lastempty, n; 363 char *s; 364 365 s = ps; 366 re = cp->u.s->re; 367 if (re == NULL) { 368 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 369 linenum = cp->u.s->linenum; 370 err(COMPILE, "\\%d not defined in the RE", 371 cp->u.s->maxbref); 372 } 373 } 374 if (!regexec_e(re, s, 0, 0, psl)) 375 return (0); 376 377 SS.len = 0; /* Clean substitute space. */ 378 slen = psl; 379 n = cp->u.s->n; 380 lastempty = 1; 381 382 switch (n) { 383 case 0: /* Global */ 384 do { 385 if (lastempty || match[0].rm_so != match[0].rm_eo) { 386 /* Locate start of replaced string. */ 387 re_off = match[0].rm_so; 388 /* Copy leading retained string. */ 389 cspace(&SS, s, re_off, APPEND); 390 /* Add in regular expression. */ 391 regsub(&SS, s, cp->u.s->new); 392 } 393 394 /* Move past this match. */ 395 if (match[0].rm_so != match[0].rm_eo) { 396 s += match[0].rm_eo; 397 slen -= match[0].rm_eo; 398 lastempty = 0; 399 } else { 400 if (match[0].rm_so == 0) 401 cspace(&SS, 402 s, match[0].rm_so + 1, APPEND); 403 else 404 cspace(&SS, 405 s + match[0].rm_so, 1, APPEND); 406 s += match[0].rm_so + 1; 407 slen -= match[0].rm_so + 1; 408 lastempty = 1; 409 } 410 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); 411 /* Copy trailing retained string. */ 412 if (slen > 0) 413 cspace(&SS, s, slen, APPEND); 414 break; 415 default: /* Nth occurrence */ 416 while (--n) { 417 s += match[0].rm_eo; 418 slen -= match[0].rm_eo; 419 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 420 return (0); 421 } 422 /* FALLTHROUGH */ 423 case 1: /* 1st occurrence */ 424 /* Locate start of replaced string. */ 425 re_off = match[0].rm_so + (s - ps); 426 /* Copy leading retained string. */ 427 cspace(&SS, ps, re_off, APPEND); 428 /* Add in regular expression. */ 429 regsub(&SS, s, cp->u.s->new); 430 /* Copy trailing retained string. */ 431 s += match[0].rm_eo; 432 slen -= match[0].rm_eo; 433 cspace(&SS, s, slen, APPEND); 434 break; 435 } 436 437 /* 438 * Swap the substitute space and the pattern space, and make sure 439 * that any leftover pointers into stdio memory get lost. 440 */ 441 tspace = PS; 442 PS = SS; 443 SS = tspace; 444 SS.space = SS.back; 445 446 /* Handle the 'p' flag. */ 447 if (cp->u.s->p) 448 OUT(ps) 449 450 /* Handle the 'w' flag. */ 451 if (cp->u.s->wfile && !pd) { 452 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 453 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 454 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 455 if (write(cp->u.s->wfd, ps, psl) != psl) 456 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 457 } 458 return (1); 459 } 460 461 /* 462 * Flush append requests. Always called before reading a line, 463 * therefore it also resets the substitution done (sdone) flag. 464 */ 465 static void 466 flush_appends(void) 467 { 468 FILE *f; 469 int count, i; 470 char buf[8 * 1024]; 471 472 for (i = 0; i < appendx; i++) 473 switch (appends[i].type) { 474 case AP_STRING: 475 fwrite(appends[i].s, sizeof(char), appends[i].len, 476 stdout); 477 break; 478 case AP_FILE: 479 /* 480 * Read files probably shouldn't be cached. Since 481 * it's not an error to read a non-existent file, 482 * it's possible that another program is interacting 483 * with the sed script through the file system. It 484 * would be truly bizarre, but possible. It's probably 485 * not that big a performance win, anyhow. 486 */ 487 if ((f = fopen(appends[i].s, "r")) == NULL) 488 break; 489 while ((count = 490 fread(buf, sizeof(char), sizeof(buf), f)) > 0) 491 (void)fwrite(buf, sizeof(char), count, stdout); 492 (void)fclose(f); 493 break; 494 } 495 if (ferror(stdout)) 496 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 497 appendx = sdone = 0; 498 } 499 500 static void 501 lputs(char *s) 502 { 503 int count; 504 char *escapes, *p; 505 struct winsize win; 506 static int termwidth = -1; 507 508 if (termwidth == -1) { 509 if ((p = getenv("COLUMNS")) != NULL) 510 termwidth = atoi(p); 511 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 512 win.ws_col > 0) 513 termwidth = win.ws_col; 514 else 515 termwidth = 60; 516 } 517 for (count = 0; *s; ++s) { 518 if (count >= termwidth) { 519 (void)printf("\\\n"); 520 count = 0; 521 } 522 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) && 523 *s != '\\') { 524 (void)putchar(*s); 525 count++; 526 } else { 527 escapes = "\\\a\b\f\n\r\t\v"; 528 (void)putchar('\\'); 529 if ((p = strchr(escapes, *s)) != NULL) { 530 (void)putchar("\\abfnrtv"[p - escapes]); 531 count += 2; 532 } else { 533 (void)printf("%03o", *(u_char *)s); 534 count += 4; 535 } 536 } 537 } 538 (void)putchar('$'); 539 (void)putchar('\n'); 540 if (ferror(stdout)) 541 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 542 } 543 544 static inline int 545 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen) 546 { 547 int eval; 548 #ifndef REG_STARTEND 549 char *buf; 550 #endif 551 552 if (preg == NULL) { 553 if (defpreg == NULL) 554 err(FATAL, "first RE may not be empty"); 555 } else 556 defpreg = preg; 557 558 /* Set anchors, discounting trailing newline (if any). */ 559 if (slen > 0 && string[slen - 1] == '\n') 560 slen--; 561 562 #ifndef REG_STARTEND 563 if ((buf = malloc(slen + 1)) == NULL) 564 err(1, NULL); 565 (void)memcpy(buf, string, slen); 566 buf[slen] = '\0'; 567 eval = regexec(defpreg, buf, 568 nomatch ? 0 : maxnsub + 1, match, eflags); 569 free(buf); 570 #else 571 match[0].rm_so = 0; 572 match[0].rm_eo = slen; 573 eval = regexec(defpreg, string, 574 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 575 #endif 576 switch(eval) { 577 case 0: 578 return (1); 579 case REG_NOMATCH: 580 return (0); 581 } 582 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 583 /* NOTREACHED */ 584 return (0); 585 } 586 587 /* 588 * regsub - perform substitutions after a regexp match 589 * Based on a routine by Henry Spencer 590 */ 591 static void 592 regsub(SPACE *sp, char *string, char *src) 593 { 594 int len, no; 595 char c, *dst; 596 597 #define NEEDSP(reqlen) \ 598 if (sp->len + (reqlen) + 1 >= sp->blen) { \ 599 size_t newlen = sp->blen + (reqlen) + 1024; \ 600 sp->space = sp->back = xrealloc(sp->back, newlen); \ 601 sp->blen = newlen; \ 602 dst = sp->space + sp->len; \ 603 } 604 605 dst = sp->space + sp->len; 606 while ((c = *src++) != '\0') { 607 if (c == '&') 608 no = 0; 609 else if (c == '\\' && isdigit((unsigned char)*src)) 610 no = *src++ - '0'; 611 else 612 no = -1; 613 if (no < 0) { /* Ordinary character. */ 614 if (c == '\\' && (*src == '\\' || *src == '&')) 615 c = *src++; 616 NEEDSP(1); 617 *dst++ = c; 618 ++sp->len; 619 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 620 len = match[no].rm_eo - match[no].rm_so; 621 NEEDSP(len); 622 memmove(dst, string + match[no].rm_so, len); 623 dst += len; 624 sp->len += len; 625 } 626 } 627 NEEDSP(1); 628 *dst = '\0'; 629 } 630 631 /* 632 * aspace -- 633 * Append the source space to the destination space, allocating new 634 * space as necessary. 635 */ 636 void 637 cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag) 638 { 639 size_t tlen; 640 641 /* Make sure SPACE has enough memory and ramp up quickly. */ 642 tlen = sp->len + len + 1; 643 if (tlen > sp->blen) { 644 size_t newlen = tlen + 1024; 645 sp->space = sp->back = xrealloc(sp->back, newlen); 646 sp->blen = newlen; 647 } 648 649 if (spflag == REPLACE) 650 sp->len = 0; 651 652 memmove(sp->space + sp->len, p, len); 653 654 sp->space[sp->len += len] = '\0'; 655 } 656 657 /* 658 * Close all cached opened files and report any errors 659 */ 660 void 661 cfclose(struct s_command *cp, struct s_command *end) 662 { 663 664 for (; cp != end; cp = cp->next) 665 switch(cp->code) { 666 case 's': 667 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 668 err(FATAL, 669 "%s: %s", cp->u.s->wfile, strerror(errno)); 670 cp->u.s->wfd = -1; 671 break; 672 case 'w': 673 if (cp->u.fd != -1 && close(cp->u.fd)) 674 err(FATAL, "%s: %s", cp->t, strerror(errno)); 675 cp->u.fd = -1; 676 break; 677 case '{': 678 cfclose(cp->u.c, cp->next); 679 break; 680 } 681 } 682