1 /* $NetBSD: process.c,v 1.38 2009/04/13 07:29:55 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Diomidis Spinellis of Imperial College, University of London. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /*- 36 * Copyright (c) 1992 Diomidis Spinellis. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Diomidis Spinellis of Imperial College, University of London. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by the University of 52 * California, Berkeley and its contributors. 53 * 4. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 */ 69 70 #if HAVE_NBTOOL_CONFIG_H 71 #include "nbtool_config.h" 72 #endif 73 74 #include <sys/cdefs.h> 75 #ifndef lint 76 #if 0 77 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; 78 #else 79 __RCSID("$NetBSD: process.c,v 1.38 2009/04/13 07:29:55 lukem Exp $"); 80 #endif 81 #endif /* not lint */ 82 83 #include <sys/types.h> 84 #include <sys/stat.h> 85 #include <sys/ioctl.h> 86 #include <sys/uio.h> 87 #ifdef __minix 88 #include <minix/termios.h> 89 #endif 90 #include <ctype.h> 91 #include <errno.h> 92 #include <fcntl.h> 93 #include <limits.h> 94 #include <regex.h> 95 #include <stdio.h> 96 #include <stdlib.h> 97 #include <string.h> 98 #include <unistd.h> 99 100 #include "defs.h" 101 #include "extern.h" 102 103 static SPACE HS, PS, SS; 104 #define pd PS.deleted 105 #define ps PS.space 106 #define psl PS.len 107 #define hs HS.space 108 #define hsl HS.len 109 110 static inline int applies(struct s_command *); 111 static void flush_appends(void); 112 static void lputs(char *); 113 static inline int regexec_e(regex_t *, const char *, int, int, size_t); 114 static void regsub(SPACE *, char *, char *); 115 static int substitute(struct s_command *); 116 117 struct s_appends *appends; /* Array of pointers to strings to append. */ 118 static int appendx; /* Index into appends array. */ 119 int appendnum; /* Size of appends array. */ 120 121 static int lastaddr; /* Set by applies if last address of a range. */ 122 static int sdone; /* If any substitutes since last line input. */ 123 /* Iov structure for 'w' commands. */ 124 static regex_t *defpreg; 125 size_t maxnsub; 126 regmatch_t *match; 127 128 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); } 129 130 void 131 process(void) 132 { 133 struct s_command *cp; 134 SPACE tspace; 135 size_t len, oldpsl; 136 char *p; 137 138 oldpsl = 0; 139 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 140 pd = 0; 141 top: 142 cp = prog; 143 redirect: 144 while (cp != NULL) { 145 if (!applies(cp)) { 146 cp = cp->next; 147 continue; 148 } 149 switch (cp->code) { 150 case '{': 151 cp = cp->u.c; 152 goto redirect; 153 case 'a': 154 if (appendx >= appendnum) { 155 appends = xrealloc(appends, 156 sizeof(struct s_appends) * 157 (appendnum * 2)); 158 appendnum *= 2; 159 } 160 appends[appendx].type = AP_STRING; 161 appends[appendx].s = cp->t; 162 appends[appendx].len = strlen(cp->t); 163 appendx++; 164 break; 165 case 'b': 166 cp = cp->u.c; 167 goto redirect; 168 case 'c': 169 pd = 1; 170 psl = 0; 171 if (cp->a2 == NULL || lastaddr) 172 (void)printf("%s", cp->t); 173 break; 174 case 'd': 175 pd = 1; 176 goto new; 177 case 'D': 178 if (psl == 0) 179 pd = 1; 180 if (pd) 181 goto new; 182 if ((p = memchr(ps, '\n', psl - 1)) == NULL) { 183 pd = 1; 184 goto new; 185 } else { 186 psl -= (p + 1) - ps; 187 memmove(ps, p + 1, psl); 188 goto top; 189 } 190 case 'g': 191 cspace(&PS, hs, hsl, REPLACE); 192 break; 193 case 'G': 194 if (hs == NULL) 195 cspace(&HS, "\n", 1, REPLACE); 196 cspace(&PS, hs, hsl, 0); 197 break; 198 case 'h': 199 cspace(&HS, ps, psl, REPLACE); 200 break; 201 case 'H': 202 cspace(&HS, ps, psl, 0); 203 break; 204 case 'i': 205 (void)printf("%s", cp->t); 206 break; 207 case 'l': 208 lputs(ps); 209 break; 210 case 'n': 211 if (!nflag && !pd) 212 OUT(ps) 213 flush_appends(); 214 if (!mf_fgets(&PS, REPLACE)) 215 exit(0); 216 pd = 0; 217 break; 218 case 'N': 219 flush_appends(); 220 if (!mf_fgets(&PS, 0)) { 221 if (!nflag && !pd) 222 OUT(ps) 223 exit(0); 224 } 225 break; 226 case 'p': 227 if (pd) 228 break; 229 OUT(ps) 230 break; 231 case 'P': 232 if (pd) 233 break; 234 if ((p = memchr(ps, '\n', psl - 1)) != NULL) { 235 oldpsl = psl; 236 psl = (p + 1) - ps; 237 } 238 OUT(ps) 239 if (p != NULL) 240 psl = oldpsl; 241 break; 242 case 'q': 243 if (!nflag && !pd) 244 OUT(ps) 245 flush_appends(); 246 exit(0); 247 case 'r': 248 if (appendx >= appendnum) { 249 appends = xrealloc(appends, 250 sizeof(struct s_appends) * 251 (appendnum * 2)); 252 appendnum *= 2; 253 } 254 appends[appendx].type = AP_FILE; 255 appends[appendx].s = cp->t; 256 appends[appendx].len = strlen(cp->t); 257 appendx++; 258 break; 259 case 's': 260 sdone |= substitute(cp); 261 break; 262 case 't': 263 if (sdone) { 264 sdone = 0; 265 cp = cp->u.c; 266 goto redirect; 267 } 268 break; 269 case 'w': 270 if (pd) 271 break; 272 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 273 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 274 DEFFILEMODE)) == -1) 275 err(FATAL, "%s: %s", 276 cp->t, strerror(errno)); 277 if ((size_t)write(cp->u.fd, ps, psl) != psl) 278 err(FATAL, "%s: %s", 279 cp->t, strerror(errno)); 280 break; 281 case 'x': 282 if (hs == NULL) 283 cspace(&HS, "\n", 1, REPLACE); 284 tspace = PS; 285 PS = HS; 286 HS = tspace; 287 break; 288 case 'y': 289 if (pd) 290 break; 291 for (p = ps, len = psl; --len; ++p) 292 *p = cp->u.y[(int)*p]; 293 break; 294 case ':': 295 case '}': 296 break; 297 case '=': 298 (void)printf("%lu\n", linenum); 299 } 300 cp = cp->next; 301 } /* for all cp */ 302 303 new: if (!nflag && !pd) 304 OUT(ps) 305 flush_appends(); 306 } /* for all lines */ 307 } 308 309 /* 310 * TRUE if the address passed matches the current program state 311 * (lastline, linenumber, ps). 312 */ 313 #define MATCH(a) \ 314 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 315 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 316 317 /* 318 * Return TRUE if the command applies to the current line. Sets the inrange 319 * flag to process ranges. Interprets the non-select (``!'') flag. 320 */ 321 static inline int 322 applies(struct s_command *cp) 323 { 324 int r; 325 326 lastaddr = 0; 327 if (cp->a1 == NULL && cp->a2 == NULL) 328 r = 1; 329 else if (cp->a2) { 330 if (cp->inrange) { 331 if (MATCH(cp->a2)) { 332 cp->inrange = 0; 333 lastaddr = 1; 334 } 335 r = 1; 336 } else if (cp->a1 && MATCH(cp->a1)) { 337 /* 338 * If the second address is a number less than or 339 * equal to the line number first selected, only 340 * one line shall be selected. 341 * -- POSIX 1003.2 342 */ 343 if (cp->a2->type == AT_LINE && 344 linenum >= cp->a2->u.l) 345 lastaddr = 1; 346 else 347 cp->inrange = 1; 348 r = 1; 349 } else 350 r = 0; 351 } else 352 r = MATCH(cp->a1); 353 return (cp->nonsel ? ! r : r); 354 } 355 356 /* 357 * substitute -- 358 * Do substitutions in the pattern space. Currently, we build a 359 * copy of the new pattern space in the substitute space structure 360 * and then swap them. 361 */ 362 static int 363 substitute(struct s_command *cp) 364 { 365 SPACE tspace; 366 regex_t *re; 367 size_t re_off, slen; 368 int lastempty, n; 369 char *s; 370 371 s = ps; 372 re = cp->u.s->re; 373 if (re == NULL) { 374 if (defpreg != NULL && (size_t)cp->u.s->maxbref > defpreg->re_nsub) { 375 linenum = cp->u.s->linenum; 376 err(COMPILE, "\\%d not defined in the RE", 377 cp->u.s->maxbref); 378 } 379 } 380 if (!regexec_e(re, s, 0, 0, psl)) 381 return (0); 382 383 SS.len = 0; /* Clean substitute space. */ 384 slen = psl; 385 n = cp->u.s->n; 386 lastempty = 1; 387 388 switch (n) { 389 case 0: /* Global */ 390 do { 391 if (lastempty || match[0].rm_so != match[0].rm_eo) { 392 /* Locate start of replaced string. */ 393 re_off = match[0].rm_so; 394 /* Copy leading retained string. */ 395 cspace(&SS, s, re_off, APPEND); 396 /* Add in regular expression. */ 397 regsub(&SS, s, cp->u.s->new); 398 } 399 400 /* Move past this match. */ 401 if (match[0].rm_so != match[0].rm_eo) { 402 s += match[0].rm_eo; 403 slen -= match[0].rm_eo; 404 lastempty = 0; 405 } else { 406 if (match[0].rm_so == 0) 407 cspace(&SS, 408 s, match[0].rm_so + 1, APPEND); 409 else 410 cspace(&SS, 411 s + match[0].rm_so, 1, APPEND); 412 s += match[0].rm_so + 1; 413 slen -= match[0].rm_so + 1; 414 lastempty = 1; 415 } 416 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); 417 /* Copy trailing retained string. */ 418 if (slen > 0) 419 cspace(&SS, s, slen, APPEND); 420 break; 421 default: /* Nth occurrence */ 422 while (--n) { 423 s += match[0].rm_eo; 424 slen -= match[0].rm_eo; 425 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 426 return (0); 427 } 428 /* FALLTHROUGH */ 429 case 1: /* 1st occurrence */ 430 /* Locate start of replaced string. */ 431 re_off = match[0].rm_so + (s - ps); 432 /* Copy leading retained string. */ 433 cspace(&SS, ps, re_off, APPEND); 434 /* Add in regular expression. */ 435 regsub(&SS, s, cp->u.s->new); 436 /* Copy trailing retained string. */ 437 s += match[0].rm_eo; 438 slen -= match[0].rm_eo; 439 cspace(&SS, s, slen, APPEND); 440 break; 441 } 442 443 /* 444 * Swap the substitute space and the pattern space, and make sure 445 * that any leftover pointers into stdio memory get lost. 446 */ 447 tspace = PS; 448 PS = SS; 449 SS = tspace; 450 SS.space = SS.back; 451 452 /* Handle the 'p' flag. */ 453 if (cp->u.s->p) 454 OUT(ps) 455 456 /* Handle the 'w' flag. */ 457 if (cp->u.s->wfile && !pd) { 458 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 459 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 460 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 461 if ((size_t)write(cp->u.s->wfd, ps, psl) != psl) 462 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 463 } 464 return (1); 465 } 466 467 /* 468 * Flush append requests. Always called before reading a line, 469 * therefore it also resets the substitution done (sdone) flag. 470 */ 471 static void 472 flush_appends(void) 473 { 474 FILE *f; 475 int count, i; 476 char buf[8 * 1024]; 477 478 for (i = 0; i < appendx; i++) 479 switch (appends[i].type) { 480 case AP_STRING: 481 fwrite(appends[i].s, sizeof(char), appends[i].len, 482 stdout); 483 break; 484 case AP_FILE: 485 /* 486 * Read files probably shouldn't be cached. Since 487 * it's not an error to read a non-existent file, 488 * it's possible that another program is interacting 489 * with the sed script through the file system. It 490 * would be truly bizarre, but possible. It's probably 491 * not that big a performance win, anyhow. 492 */ 493 if ((f = fopen(appends[i].s, "r")) == NULL) 494 break; 495 while ((count = 496 fread(buf, sizeof(char), sizeof(buf), f)) > 0) 497 (void)fwrite(buf, sizeof(char), count, stdout); 498 (void)fclose(f); 499 break; 500 } 501 if (ferror(stdout)) 502 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 503 appendx = sdone = 0; 504 } 505 506 static void 507 lputs(char *s) 508 { 509 int count; 510 const char *escapes, *p; 511 #ifndef HAVE_NBTOOL_CONFIG_H 512 struct winsize win; 513 #endif 514 static int termwidth = -1; 515 516 if (termwidth == -1) { 517 if ((p = getenv("COLUMNS")) != NULL) 518 termwidth = atoi(p); 519 #ifndef HAVE_NBTOOL_CONFIG_H 520 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 521 win.ws_col > 0) 522 termwidth = win.ws_col; 523 #endif 524 else 525 termwidth = 60; 526 } 527 for (count = 0; *s; ++s) { 528 if (count >= termwidth) { 529 (void)printf("\\\n"); 530 count = 0; 531 } 532 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) && 533 *s != '\\') { 534 (void)putchar(*s); 535 count++; 536 } else { 537 escapes = "\\\a\b\f\n\r\t\v"; 538 (void)putchar('\\'); 539 if ((p = strchr(escapes, *s)) != NULL) { 540 (void)putchar("\\abfnrtv"[p - escapes]); 541 count += 2; 542 } else { 543 (void)printf("%03o", *(u_char *)s); 544 count += 4; 545 } 546 } 547 } 548 (void)putchar('$'); 549 (void)putchar('\n'); 550 if (ferror(stdout)) 551 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 552 } 553 554 static inline int 555 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen) 556 { 557 int eval; 558 #ifndef REG_STARTEND 559 char *buf; 560 #endif 561 562 if (preg == NULL) { 563 if (defpreg == NULL) 564 err(FATAL, "first RE may not be empty"); 565 } else 566 defpreg = preg; 567 568 /* Set anchors, discounting trailing newline (if any). */ 569 if (slen > 0 && string[slen - 1] == '\n') 570 slen--; 571 572 #ifndef REG_STARTEND 573 if ((buf = malloc(slen + 1)) == NULL) 574 err(1, NULL); 575 (void)memcpy(buf, string, slen); 576 buf[slen] = '\0'; 577 eval = regexec(defpreg, buf, 578 nomatch ? 0 : maxnsub + 1, match, eflags); 579 free(buf); 580 #else 581 match[0].rm_so = 0; 582 match[0].rm_eo = slen; 583 eval = regexec(defpreg, string, 584 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 585 #endif 586 switch(eval) { 587 case 0: 588 return (1); 589 case REG_NOMATCH: 590 return (0); 591 } 592 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 593 /* NOTREACHED */ 594 return (0); 595 } 596 597 /* 598 * regsub - perform substitutions after a regexp match 599 * Based on a routine by Henry Spencer 600 */ 601 static void 602 regsub(SPACE *sp, char *string, char *src) 603 { 604 int len, no; 605 char c, *dst; 606 607 #define NEEDSP(reqlen) \ 608 if (sp->len + (reqlen) + 1 >= sp->blen) { \ 609 size_t newlen = sp->blen + (reqlen) + 1024; \ 610 sp->space = sp->back = xrealloc(sp->back, newlen); \ 611 sp->blen = newlen; \ 612 dst = sp->space + sp->len; \ 613 } 614 615 dst = sp->space + sp->len; 616 while ((c = *src++) != '\0') { 617 if (c == '&') 618 no = 0; 619 else if (c == '\\' && isdigit((unsigned char)*src)) 620 no = *src++ - '0'; 621 else 622 no = -1; 623 if (no < 0) { /* Ordinary character. */ 624 if (c == '\\' && (*src == '\\' || *src == '&')) 625 c = *src++; 626 NEEDSP(1); 627 *dst++ = c; 628 ++sp->len; 629 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 630 len = match[no].rm_eo - match[no].rm_so; 631 NEEDSP(len); 632 memmove(dst, string + match[no].rm_so, len); 633 dst += len; 634 sp->len += len; 635 } 636 } 637 NEEDSP(1); 638 *dst = '\0'; 639 } 640 641 /* 642 * aspace -- 643 * Append the source space to the destination space, allocating new 644 * space as necessary. 645 */ 646 void 647 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) 648 { 649 size_t tlen; 650 651 /* Make sure SPACE has enough memory and ramp up quickly. */ 652 tlen = sp->len + len + 1; 653 if (tlen > sp->blen) { 654 size_t newlen = tlen + 1024; 655 sp->space = sp->back = xrealloc(sp->back, newlen); 656 sp->blen = newlen; 657 } 658 659 if (spflag == REPLACE) 660 sp->len = 0; 661 662 memmove(sp->space + sp->len, p, len); 663 664 sp->space[sp->len += len] = '\0'; 665 } 666 667 /* 668 * Close all cached opened files and report any errors 669 */ 670 void 671 cfclose(struct s_command *cp, struct s_command *end) 672 { 673 674 for (; cp != end; cp = cp->next) 675 switch(cp->code) { 676 case 's': 677 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 678 err(FATAL, 679 "%s: %s", cp->u.s->wfile, strerror(errno)); 680 cp->u.s->wfd = -1; 681 break; 682 case 'w': 683 if (cp->u.fd != -1 && close(cp->u.fd)) 684 err(FATAL, "%s: %s", cp->t, strerror(errno)); 685 cp->u.fd = -1; 686 break; 687 case '{': 688 cfclose(cp->u.c, cp->next); 689 break; 690 } 691 } 692