1 /* $NetBSD: process.c,v 1.38 2009/04/13 07:29:55 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Diomidis Spinellis of Imperial College, University of London. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /*- 36 * Copyright (c) 1992 Diomidis Spinellis. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Diomidis Spinellis of Imperial College, University of London. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Berkeley and its contributors. 53 * 4. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 #if HAVE_NBTOOL_CONFIG_H 71 #include "nbtool_config.h" 72 #endif 73 74 #include <sys/cdefs.h> 75 #ifndef lint 76 #if 0 77 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; 78 #else 79 __RCSID("$NetBSD: process.c,v 1.38 2009/04/13 07:29:55 lukem Exp $"); 80 #endif 81 #endif /* not lint */ 82 83 #include <sys/types.h> 84 #include <sys/stat.h> 85 #include <sys/ioctl.h> 86 #include <sys/uio.h> 87 88 #include <ctype.h> 89 #include <errno.h> 90 #include <fcntl.h> 91 #include <limits.h> 92 #include <regex.h> 93 #include <stdio.h> 94 #include <stdlib.h> 95 #include <string.h> 96 #include <unistd.h> 97 98 #include "defs.h" 99 #include "extern.h" 100 101 static SPACE HS, PS, SS; 102 #define pd PS.deleted 103 #define ps PS.space 104 #define psl PS.len 105 #define hs HS.space 106 #define hsl HS.len 107 108 static inline int applies(struct s_command *); 109 static void flush_appends(void); 110 static void lputs(char *); 111 static inline int regexec_e(regex_t *, const char *, int, int, size_t); 112 static void regsub(SPACE *, char *, char *); 113 static int substitute(struct s_command *); 114 115 struct s_appends *appends; /* Array of pointers to strings to append. */ 116 static int appendx; /* Index into appends array. */ 117 int appendnum; /* Size of appends array. */ 118 119 static int lastaddr; /* Set by applies if last address of a range. */ 120 static int sdone; /* If any substitutes since last line input. */ 121 /* Iov structure for 'w' commands. */ 122 static regex_t *defpreg; 123 size_t maxnsub; 124 regmatch_t *match; 125 126 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); } 127 128 void 129 process(void) 130 { 131 struct s_command *cp; 132 SPACE tspace; 133 size_t len, oldpsl; 134 char *p; 135 136 oldpsl = 0; 137 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 138 pd = 0; 139 top: 140 cp = prog; 141 redirect: 142 while (cp != NULL) { 143 if (!applies(cp)) { 144 cp = cp->next; 145 continue; 146 } 147 switch (cp->code) { 148 case '{': 149 cp = cp->u.c; 150 goto redirect; 151 case 'a': 152 if (appendx >= appendnum) { 153 appends = xrealloc(appends, 154 sizeof(struct s_appends) * 155 (appendnum * 2)); 156 appendnum *= 2; 157 } 158 appends[appendx].type = AP_STRING; 159 appends[appendx].s = cp->t; 160 appends[appendx].len = strlen(cp->t); 161 appendx++; 162 break; 163 case 'b': 164 cp = cp->u.c; 165 goto redirect; 166 case 'c': 167 pd = 1; 168 psl = 0; 169 if (cp->a2 == NULL || lastaddr) 170 (void)printf("%s", cp->t); 171 break; 172 case 'd': 173 pd = 1; 174 goto new; 175 case 'D': 176 if (psl == 0) 177 pd = 1; 178 if (pd) 179 goto new; 180 if ((p = memchr(ps, '\n', psl - 1)) == NULL) { 181 pd = 1; 182 goto new; 183 } else { 184 psl -= (p + 1) - ps; 185 memmove(ps, p + 1, psl); 186 goto top; 187 } 188 case 'g': 189 cspace(&PS, hs, hsl, REPLACE); 190 break; 191 case 'G': 192 if (hs == NULL) 193 cspace(&HS, "\n", 1, REPLACE); 194 cspace(&PS, hs, hsl, 0); 195 break; 196 case 'h': 197 cspace(&HS, ps, psl, REPLACE); 198 break; 199 case 'H': 200 cspace(&HS, ps, psl, 0); 201 break; 202 case 'i': 203 (void)printf("%s", cp->t); 204 break; 205 case 'l': 206 lputs(ps); 207 break; 208 case 'n': 209 if (!nflag && !pd) 210 OUT(ps) 211 flush_appends(); 212 if (!mf_fgets(&PS, REPLACE)) 213 exit(0); 214 pd = 0; 215 break; 216 case 'N': 217 flush_appends(); 218 if (!mf_fgets(&PS, 0)) { 219 if (!nflag && !pd) 220 OUT(ps) 221 exit(0); 222 } 223 break; 224 case 'p': 225 if (pd) 226 break; 227 OUT(ps) 228 break; 229 case 'P': 230 if (pd) 231 break; 232 if ((p = memchr(ps, '\n', psl - 1)) != NULL) { 233 oldpsl = psl; 234 psl = (p + 1) - ps; 235 } 236 OUT(ps) 237 if (p != NULL) 238 psl = oldpsl; 239 break; 240 case 'q': 241 if (!nflag && !pd) 242 OUT(ps) 243 flush_appends(); 244 exit(0); 245 case 'r': 246 if (appendx >= appendnum) { 247 appends = xrealloc(appends, 248 sizeof(struct s_appends) * 249 (appendnum * 2)); 250 appendnum *= 2; 251 } 252 appends[appendx].type = AP_FILE; 253 appends[appendx].s = cp->t; 254 appends[appendx].len = strlen(cp->t); 255 appendx++; 256 break; 257 case 's': 258 sdone |= substitute(cp); 259 break; 260 case 't': 261 if (sdone) { 262 sdone = 0; 263 cp = cp->u.c; 264 goto redirect; 265 } 266 break; 267 case 'w': 268 if (pd) 269 break; 270 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 271 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 272 DEFFILEMODE)) == -1) 273 err(FATAL, "%s: %s", 274 cp->t, strerror(errno)); 275 if ((size_t)write(cp->u.fd, ps, psl) != psl) 276 err(FATAL, "%s: %s", 277 cp->t, strerror(errno)); 278 break; 279 case 'x': 280 if (hs == NULL) 281 cspace(&HS, "\n", 1, REPLACE); 282 tspace = PS; 283 PS = HS; 284 HS = tspace; 285 break; 286 case 'y': 287 if (pd) 288 break; 289 for (p = ps, len = psl; --len; ++p) 290 *p = cp->u.y[(int)*p]; 291 break; 292 case ':': 293 case '}': 294 break; 295 case '=': 296 (void)printf("%lu\n", linenum); 297 } 298 cp = cp->next; 299 } /* for all cp */ 300 301 new: if (!nflag && !pd) 302 OUT(ps) 303 flush_appends(); 304 } /* for all lines */ 305 } 306 307 /* 308 * TRUE if the address passed matches the current program state 309 * (lastline, linenumber, ps). 310 */ 311 #define MATCH(a) \ 312 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 313 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 314 315 /* 316 * Return TRUE if the command applies to the current line. Sets the inrange 317 * flag to process ranges. Interprets the non-select (``!'') flag. 318 */ 319 static inline int 320 applies(struct s_command *cp) 321 { 322 int r; 323 324 lastaddr = 0; 325 if (cp->a1 == NULL && cp->a2 == NULL) 326 r = 1; 327 else if (cp->a2) { 328 if (cp->inrange) { 329 if (MATCH(cp->a2)) { 330 cp->inrange = 0; 331 lastaddr = 1; 332 } 333 r = 1; 334 } else if (cp->a1 && MATCH(cp->a1)) { 335 /* 336 * If the second address is a number less than or 337 * equal to the line number first selected, only 338 * one line shall be selected. 339 * -- POSIX 1003.2 340 */ 341 if (cp->a2->type == AT_LINE && 342 linenum >= cp->a2->u.l) 343 lastaddr = 1; 344 else 345 cp->inrange = 1; 346 r = 1; 347 } else 348 r = 0; 349 } else 350 r = MATCH(cp->a1); 351 return (cp->nonsel ? ! r : r); 352 } 353 354 /* 355 * substitute -- 356 * Do substitutions in the pattern space. Currently, we build a 357 * copy of the new pattern space in the substitute space structure 358 * and then swap them. 359 */ 360 static int 361 substitute(struct s_command *cp) 362 { 363 SPACE tspace; 364 regex_t *re; 365 size_t re_off, slen; 366 int lastempty, n; 367 char *s; 368 369 s = ps; 370 re = cp->u.s->re; 371 if (re == NULL) { 372 if (defpreg != NULL && (size_t)cp->u.s->maxbref > defpreg->re_nsub) { 373 linenum = cp->u.s->linenum; 374 err(COMPILE, "\\%d not defined in the RE", 375 cp->u.s->maxbref); 376 } 377 } 378 if (!regexec_e(re, s, 0, 0, psl)) 379 return (0); 380 381 SS.len = 0; /* Clean substitute space. */ 382 slen = psl; 383 n = cp->u.s->n; 384 lastempty = 1; 385 386 switch (n) { 387 case 0: /* Global */ 388 do { 389 if (lastempty || match[0].rm_so != match[0].rm_eo) { 390 /* Locate start of replaced string. */ 391 re_off = match[0].rm_so; 392 /* Copy leading retained string. */ 393 cspace(&SS, s, re_off, APPEND); 394 /* Add in regular expression. */ 395 regsub(&SS, s, cp->u.s->new); 396 } 397 398 /* Move past this match. */ 399 if (match[0].rm_so != match[0].rm_eo) { 400 s += match[0].rm_eo; 401 slen -= match[0].rm_eo; 402 lastempty = 0; 403 } else { 404 if (match[0].rm_so == 0) 405 cspace(&SS, 406 s, match[0].rm_so + 1, APPEND); 407 else 408 cspace(&SS, 409 s + match[0].rm_so, 1, APPEND); 410 s += match[0].rm_so + 1; 411 slen -= match[0].rm_so + 1; 412 lastempty = 1; 413 } 414 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); 415 /* Copy trailing retained string. */ 416 if (slen > 0) 417 cspace(&SS, s, slen, APPEND); 418 break; 419 default: /* Nth occurrence */ 420 while (--n) { 421 s += match[0].rm_eo; 422 slen -= match[0].rm_eo; 423 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 424 return (0); 425 } 426 /* FALLTHROUGH */ 427 case 1: /* 1st occurrence */ 428 /* Locate start of replaced string. */ 429 re_off = match[0].rm_so + (s - ps); 430 /* Copy leading retained string. */ 431 cspace(&SS, ps, re_off, APPEND); 432 /* Add in regular expression. */ 433 regsub(&SS, s, cp->u.s->new); 434 /* Copy trailing retained string. */ 435 s += match[0].rm_eo; 436 slen -= match[0].rm_eo; 437 cspace(&SS, s, slen, APPEND); 438 break; 439 } 440 441 /* 442 * Swap the substitute space and the pattern space, and make sure 443 * that any leftover pointers into stdio memory get lost. 444 */ 445 tspace = PS; 446 PS = SS; 447 SS = tspace; 448 SS.space = SS.back; 449 450 /* Handle the 'p' flag. */ 451 if (cp->u.s->p) 452 OUT(ps) 453 454 /* Handle the 'w' flag. */ 455 if (cp->u.s->wfile && !pd) { 456 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 457 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 458 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 459 if ((size_t)write(cp->u.s->wfd, ps, psl) != psl) 460 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 461 } 462 return (1); 463 } 464 465 /* 466 * Flush append requests. Always called before reading a line, 467 * therefore it also resets the substitution done (sdone) flag. 468 */ 469 static void 470 flush_appends(void) 471 { 472 FILE *f; 473 int count, i; 474 char buf[8 * 1024]; 475 476 for (i = 0; i < appendx; i++) 477 switch (appends[i].type) { 478 case AP_STRING: 479 fwrite(appends[i].s, sizeof(char), appends[i].len, 480 stdout); 481 break; 482 case AP_FILE: 483 /* 484 * Read files probably shouldn't be cached. Since 485 * it's not an error to read a non-existent file, 486 * it's possible that another program is interacting 487 * with the sed script through the file system. It 488 * would be truly bizarre, but possible. It's probably 489 * not that big a performance win, anyhow. 490 */ 491 if ((f = fopen(appends[i].s, "r")) == NULL) 492 break; 493 while ((count = 494 fread(buf, sizeof(char), sizeof(buf), f)) > 0) 495 (void)fwrite(buf, sizeof(char), count, stdout); 496 (void)fclose(f); 497 break; 498 } 499 if (ferror(stdout)) 500 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 501 appendx = sdone = 0; 502 } 503 504 static void 505 lputs(char *s) 506 { 507 int count; 508 const char *escapes, *p; 509 #ifndef HAVE_NBTOOL_CONFIG_H 510 struct winsize win; 511 #endif 512 static int termwidth = -1; 513 514 if (termwidth == -1) { 515 if ((p = getenv("COLUMNS")) != NULL) 516 termwidth = atoi(p); 517 #ifndef HAVE_NBTOOL_CONFIG_H 518 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 519 win.ws_col > 0) 520 termwidth = win.ws_col; 521 #endif 522 else 523 termwidth = 60; 524 } 525 for (count = 0; *s; ++s) { 526 if (count >= termwidth) { 527 (void)printf("\\\n"); 528 count = 0; 529 } 530 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) && 531 *s != '\\') { 532 (void)putchar(*s); 533 count++; 534 } else { 535 escapes = "\\\a\b\f\n\r\t\v"; 536 (void)putchar('\\'); 537 if ((p = strchr(escapes, *s)) != NULL) { 538 (void)putchar("\\abfnrtv"[p - escapes]); 539 count += 2; 540 } else { 541 (void)printf("%03o", *(u_char *)s); 542 count += 4; 543 } 544 } 545 } 546 (void)putchar('$'); 547 (void)putchar('\n'); 548 if (ferror(stdout)) 549 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 550 } 551 552 static inline int 553 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen) 554 { 555 int eval; 556 #ifndef REG_STARTEND 557 char *buf; 558 #endif 559 560 if (preg == NULL) { 561 if (defpreg == NULL) 562 err(FATAL, "first RE may not be empty"); 563 } else 564 defpreg = preg; 565 566 /* Set anchors, discounting trailing newline (if any). */ 567 if (slen > 0 && string[slen - 1] == '\n') 568 slen--; 569 570 #ifndef REG_STARTEND 571 if ((buf = malloc(slen + 1)) == NULL) 572 err(1, NULL); 573 (void)memcpy(buf, string, slen); 574 buf[slen] = '\0'; 575 eval = regexec(defpreg, buf, 576 nomatch ? 0 : maxnsub + 1, match, eflags); 577 free(buf); 578 #else 579 match[0].rm_so = 0; 580 match[0].rm_eo = slen; 581 eval = regexec(defpreg, string, 582 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 583 #endif 584 switch(eval) { 585 case 0: 586 return (1); 587 case REG_NOMATCH: 588 return (0); 589 } 590 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 591 /* NOTREACHED */ 592 return (0); 593 } 594 595 /* 596 * regsub - perform substitutions after a regexp match 597 * Based on a routine by Henry Spencer 598 */ 599 static void 600 regsub(SPACE *sp, char *string, char *src) 601 { 602 int len, no; 603 char c, *dst; 604 605 #define NEEDSP(reqlen) \ 606 if (sp->len + (reqlen) + 1 >= sp->blen) { \ 607 size_t newlen = sp->blen + (reqlen) + 1024; \ 608 sp->space = sp->back = xrealloc(sp->back, newlen); \ 609 sp->blen = newlen; \ 610 dst = sp->space + sp->len; \ 611 } 612 613 dst = sp->space + sp->len; 614 while ((c = *src++) != '\0') { 615 if (c == '&') 616 no = 0; 617 else if (c == '\\' && isdigit((unsigned char)*src)) 618 no = *src++ - '0'; 619 else 620 no = -1; 621 if (no < 0) { /* Ordinary character. */ 622 if (c == '\\' && (*src == '\\' || *src == '&')) 623 c = *src++; 624 NEEDSP(1); 625 *dst++ = c; 626 ++sp->len; 627 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 628 len = match[no].rm_eo - match[no].rm_so; 629 NEEDSP(len); 630 memmove(dst, string + match[no].rm_so, len); 631 dst += len; 632 sp->len += len; 633 } 634 } 635 NEEDSP(1); 636 *dst = '\0'; 637 } 638 639 /* 640 * aspace -- 641 * Append the source space to the destination space, allocating new 642 * space as necessary. 643 */ 644 void 645 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) 646 { 647 size_t tlen; 648 649 /* Make sure SPACE has enough memory and ramp up quickly. */ 650 tlen = sp->len + len + 1; 651 if (tlen > sp->blen) { 652 size_t newlen = tlen + 1024; 653 sp->space = sp->back = xrealloc(sp->back, newlen); 654 sp->blen = newlen; 655 } 656 657 if (spflag == REPLACE) 658 sp->len = 0; 659 660 memmove(sp->space + sp->len, p, len); 661 662 sp->space[sp->len += len] = '\0'; 663 } 664 665 /* 666 * Close all cached opened files and report any errors 667 */ 668 void 669 cfclose(struct s_command *cp, struct s_command *end) 670 { 671 672 for (; cp != end; cp = cp->next) 673 switch(cp->code) { 674 case 's': 675 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 676 err(FATAL, 677 "%s: %s", cp->u.s->wfile, strerror(errno)); 678 cp->u.s->wfd = -1; 679 break; 680 case 'w': 681 if (cp->u.fd != -1 && close(cp->u.fd)) 682 err(FATAL, "%s: %s", cp->t, strerror(errno)); 683 cp->u.fd = -1; 684 break; 685 case '{': 686 cfclose(cp->u.c, cp->next); 687 break; 688 } 689 } 690