1 /* 2 * sed -- stream editor 3 * 4 * 5 */ 6 #include <u.h> 7 #include <libc.h> 8 #include <bio.h> 9 #include <regexp.h> 10 11 enum { 12 DEPTH = 20, /* max nesting depth of {} */ 13 MAXCMDS = 512, /* max sed commands */ 14 ADDSIZE = 10000, /* size of add & read buffer */ 15 MAXADDS = 20, /* max pending adds and reads */ 16 LBSIZE = 8192, /* input line size */ 17 LABSIZE = 50, /* max label name size */ 18 MAXSUB = 10, /* max number of sub reg exp */ 19 MAXFILES = 120, /* max output files */ 20 }; 21 /* An address is a line #, a R.E., "$", a reference to the last 22 * R.E., or nothing. 23 */ 24 typedef struct { 25 enum { 26 A_NONE, 27 A_DOL, 28 A_LINE, 29 A_RE, 30 A_LAST, 31 }type; 32 union { 33 long line; /* Line # */ 34 Reprog *rp; /* Compiled R.E. */ 35 }; 36 } Addr; 37 38 typedef struct SEDCOM { 39 Addr ad1; /* optional start address */ 40 Addr ad2; /* optional end address */ 41 union { 42 Reprog *re1; /* compiled R.E. */ 43 Rune *text; /* added text or file name */ 44 struct SEDCOM *lb1; /* destination command of branch */ 45 }; 46 Rune *rhs; /* Right-hand side of substitution */ 47 Biobuf* fcode; /* File ID for read and write */ 48 char command; /* command code -see below */ 49 char gfl; /* 'Global' flag for substitutions */ 50 char pfl; /* 'print' flag for substitutions */ 51 char active; /* 1 => data between start and end */ 52 char negfl; /* negation flag */ 53 } SedCom; 54 55 /* Command Codes for field SedCom.command */ 56 #define ACOM 01 57 #define BCOM 020 58 #define CCOM 02 59 #define CDCOM 025 60 #define CNCOM 022 61 #define COCOM 017 62 #define CPCOM 023 63 #define DCOM 03 64 #define ECOM 015 65 #define EQCOM 013 66 #define FCOM 016 67 #define GCOM 027 68 #define CGCOM 030 69 #define HCOM 031 70 #define CHCOM 032 71 #define ICOM 04 72 #define LCOM 05 73 #define NCOM 012 74 #define PCOM 010 75 #define QCOM 011 76 #define RCOM 06 77 #define SCOM 07 78 #define TCOM 021 79 #define WCOM 014 80 #define CWCOM 024 81 #define YCOM 026 82 #define XCOM 033 83 84 85 typedef struct label { /* Label symbol table */ 86 Rune asc[9]; /* Label name */ 87 SedCom *chain; 88 SedCom *address; /* Command associated with label */ 89 } Label; 90 91 typedef struct FILE_CACHE { /* Data file control block */ 92 struct FILE_CACHE *next; /* Forward Link */ 93 char *name; /* Name of file */ 94 } FileCache; 95 96 SedCom pspace[MAXCMDS]; /* Command storage */ 97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */ 98 SedCom *rep = pspace; /* Current fill point */ 99 100 Reprog *lastre = 0; /* Last regular expression */ 101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ 102 103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ 104 Rune *addend = addspace+ADDSIZE; 105 106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ 107 SedCom **aptr = abuf; 108 109 struct { /* Sed program input control block */ 110 enum PTYPE /* Either on command line or in file */ 111 { P_ARG, 112 P_FILE 113 } type; 114 union PCTL { /* Pointer to data */ 115 Biobuf *bp; 116 char *curr; 117 }; 118 } prog; 119 120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ 121 122 FileCache *fhead = 0; /* Head of File Cache Chain */ 123 FileCache *ftail = 0; /* Tail of File Cache Chain */ 124 125 Rune *loc1; /* Start of pattern match */ 126 Rune *loc2; /* End of pattern match */ 127 Rune seof; /* Pattern delimiter char */ 128 129 Rune linebuf[LBSIZE+1]; /* Input data buffer */ 130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ 131 Rune *spend = linebuf; /* End of input data */ 132 Rune *cp; /* Current scan point in linebuf */ 133 134 Rune holdsp[LBSIZE+1]; /* Hold buffer */ 135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ 136 Rune *hspend = holdsp; /* End of hold data */ 137 138 int nflag; /* Command line flags */ 139 int gflag; 140 141 int dolflag; /* Set when at true EOF */ 142 int sflag; /* Set when substitution done */ 143 int jflag; /* Set when jump required */ 144 int delflag; /* Delete current line when set */ 145 146 long lnum = 0; /* Input line count */ 147 148 char fname[MAXFILES][40]; /* File name cache */ 149 Biobuf *fcode[MAXFILES]; /* File ID cache */ 150 int nfiles = 0; /* Cache fill point */ 151 152 Biobuf fout; /* Output stream */ 153 Biobuf stdin; /* Default input */ 154 Biobuf* f = 0; /* Input data */ 155 156 Label ltab[LABSIZE]; /* Label name symbol table */ 157 Label *labend = ltab+LABSIZE; /* End of label table */ 158 Label *lab = ltab+1; /* Current Fill point */ 159 160 int depth = 0; /* {} stack pointer */ 161 162 Rune bad; /* Dummy err ptr reference */ 163 Rune *badp = &bad; 164 165 166 char CGMES[] = "Command garbled: %S"; 167 char TMMES[] = "Too much text: %S"; 168 char LTL[] = "Label too long: %S"; 169 char AD0MES[] = "No addresses allowed: %S"; 170 char AD1MES[] = "Only one address allowed: %S"; 171 172 void address(Addr *); 173 void arout(void); 174 int cmp(char *, char *); 175 int rcmp(Rune *, Rune *); 176 void command(SedCom *); 177 Reprog *compile(void); 178 Rune *compsub(Rune *, Rune *); 179 void dechain(void); 180 void dosub(Rune *); 181 int ecmp(Rune *, Rune *, int); 182 void enroll(char *); 183 void errexit(void); 184 int executable(SedCom *); 185 void execute(void); 186 void fcomp(void); 187 long getrune(void); 188 Rune *gline(Rune *); 189 int match(Reprog *, Rune *); 190 void newfile(enum PTYPE, char *); 191 int opendata(void); 192 Biobuf *open_file(char *); 193 Rune *place(Rune *, Rune *, Rune *); 194 void quit(char *, char *); 195 int rline(Rune *, Rune *); 196 Label *search(Label *); 197 int substitute(SedCom *); 198 char *text(char *); 199 Rune *stext(Rune *, Rune *); 200 int ycomp(SedCom *); 201 char * trans(int c); 202 void putline(Biobuf *bp, Rune *buf, int n); 203 204 void 205 main(int argc, char **argv) 206 { 207 int compfl; 208 209 lnum = 0; 210 Binit(&fout, 1, OWRITE); 211 fcode[nfiles++] = &fout; 212 compfl = 0; 213 214 if(argc == 1) 215 exits(0); 216 ARGBEGIN{ 217 case 'n': 218 nflag++; 219 continue; 220 case 'f': 221 if(argc <= 1) 222 quit("no pattern-file", 0); 223 newfile(P_FILE, ARGF()); 224 fcomp(); 225 compfl = 1; 226 continue; 227 case 'e': 228 if (argc <= 1) 229 quit("missing pattern", 0); 230 newfile(P_ARG, ARGF()); 231 fcomp(); 232 compfl = 1; 233 continue; 234 case 'g': 235 gflag++; 236 continue; 237 default: 238 fprint(2, "sed: Unknown flag: %c\n", ARGC()); 239 continue; 240 } ARGEND 241 242 if(compfl == 0) { 243 if (--argc < 0) 244 quit("missing pattern", 0); 245 newfile(P_ARG, *argv++); 246 fcomp(); 247 } 248 249 if(depth) 250 quit("Too many {'s", 0); 251 252 ltab[0].address = rep; 253 254 dechain(); 255 256 if(argc <= 0) 257 enroll(0); /* Add stdin to cache */ 258 else while(--argc >= 0) { 259 enroll(*argv++); 260 } 261 execute(); 262 exits(0); 263 } 264 void 265 fcomp(void) 266 { 267 Rune *tp; 268 SedCom *pt, *pt1; 269 int i; 270 Label *lpt; 271 272 static Rune *p = addspace; 273 static SedCom **cmpend[DEPTH]; /* stack of {} operations */ 274 275 while (rline(linebuf, lbend) >= 0) { 276 cp = linebuf; 277 comploop: 278 while(*cp == ' ' || *cp == '\t') 279 cp++; 280 if(*cp == '\0' || *cp == '#') 281 continue; 282 if(*cp == ';') { 283 cp++; 284 goto comploop; 285 } 286 287 address(&rep->ad1); 288 if (rep->ad1.type != A_NONE) { 289 if (rep->ad1.type == A_LAST) { 290 if (!lastre) 291 quit("First RE may not be null", 0); 292 rep->ad1.type = A_RE; 293 rep->ad1.rp = lastre; 294 } 295 if(*cp == ',' || *cp == ';') { 296 cp++; 297 address(&rep->ad2); 298 if (rep->ad2.type == A_LAST) { 299 rep->ad2.type = A_RE; 300 rep->ad2.rp = lastre; 301 } 302 } else 303 rep->ad2.type = A_NONE; 304 } 305 while(*cp == ' ' || *cp == '\t') 306 cp++; 307 308 swit: 309 switch(*cp++) { 310 311 default: 312 quit("Unrecognized command: %S", (char *)linebuf); 313 314 case '!': 315 rep->negfl = 1; 316 goto swit; 317 318 case '{': 319 rep->command = BCOM; 320 rep->negfl = !(rep->negfl); 321 cmpend[depth++] = &rep->lb1; 322 if(++rep >= pend) 323 quit("Too many commands: %S", (char *) linebuf); 324 if(*cp == '\0') continue; 325 goto comploop; 326 327 case '}': 328 if(rep->ad1.type != A_NONE) 329 quit(AD0MES, (char *) linebuf); 330 if(--depth < 0) 331 quit("Too many }'s", 0); 332 *cmpend[depth] = rep; 333 if(*cp == 0) continue; 334 goto comploop; 335 336 case '=': 337 rep->command = EQCOM; 338 if(rep->ad2.type != A_NONE) 339 quit(AD1MES, (char *) linebuf); 340 break; 341 342 case ':': 343 if(rep->ad1.type != A_NONE) 344 quit(AD0MES, (char *) linebuf); 345 346 while(*cp == ' ') 347 cp++; 348 tp = lab->asc; 349 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { 350 *tp++ = *cp++; 351 if(tp >= &(lab->asc[8])) 352 quit(LTL, (char *) linebuf); 353 } 354 *tp = '\0'; 355 356 if(lpt = search(lab)) { 357 if(lpt->address) 358 quit("Duplicate labels: %S", (char *) linebuf); 359 } else { 360 lab->chain = 0; 361 lpt = lab; 362 if(++lab >= labend) 363 quit("Too many labels: %S", (char *) linebuf); 364 } 365 lpt->address = rep; 366 if (*cp == '#') 367 continue; 368 rep--; /* reuse this slot */ 369 break; 370 371 case 'a': 372 rep->command = ACOM; 373 if(rep->ad2.type != A_NONE) 374 quit(AD1MES, (char *) linebuf); 375 if(*cp == '\\') cp++; 376 if(*cp++ != '\n') 377 quit(CGMES, (char *) linebuf); 378 rep->text = p; 379 p = stext(p, addend); 380 break; 381 case 'c': 382 rep->command = CCOM; 383 if(*cp == '\\') cp++; 384 if(*cp++ != '\n') 385 quit(CGMES, (char *) linebuf); 386 rep->text = p; 387 p = stext(p, addend); 388 break; 389 case 'i': 390 rep->command = ICOM; 391 if(rep->ad2.type != A_NONE) 392 quit(AD1MES, (char *) linebuf); 393 if(*cp == '\\') cp++; 394 if(*cp++ != '\n') 395 quit(CGMES, (char *) linebuf); 396 rep->text = p; 397 p = stext(p, addend); 398 break; 399 400 case 'g': 401 rep->command = GCOM; 402 break; 403 404 case 'G': 405 rep->command = CGCOM; 406 break; 407 408 case 'h': 409 rep->command = HCOM; 410 break; 411 412 case 'H': 413 rep->command = CHCOM; 414 break; 415 416 case 't': 417 rep->command = TCOM; 418 goto jtcommon; 419 420 case 'b': 421 rep->command = BCOM; 422 jtcommon: 423 while(*cp == ' ')cp++; 424 if(*cp == '\0') { 425 if(pt = ltab[0].chain) { 426 while(pt1 = pt->lb1) 427 pt = pt1; 428 pt->lb1 = rep; 429 } else 430 ltab[0].chain = rep; 431 break; 432 } 433 tp = lab->asc; 434 while((*tp++ = *cp++)) 435 if(tp >= &(lab->asc[8])) 436 quit(LTL, (char *) linebuf); 437 cp--; 438 tp[-1] = '\0'; 439 440 if(lpt = search(lab)) { 441 if(lpt->address) { 442 rep->lb1 = lpt->address; 443 } else { 444 pt = lpt->chain; 445 while(pt1 = pt->lb1) 446 pt = pt1; 447 pt->lb1 = rep; 448 } 449 } else { 450 lab->chain = rep; 451 lab->address = 0; 452 if(++lab >= labend) 453 quit("Too many labels: %S", 454 (char *) linebuf); 455 } 456 break; 457 458 case 'n': 459 rep->command = NCOM; 460 break; 461 462 case 'N': 463 rep->command = CNCOM; 464 break; 465 466 case 'p': 467 rep->command = PCOM; 468 break; 469 470 case 'P': 471 rep->command = CPCOM; 472 break; 473 474 case 'r': 475 rep->command = RCOM; 476 if(rep->ad2.type != A_NONE) 477 quit(AD1MES, (char *) linebuf); 478 if(*cp++ != ' ') 479 quit(CGMES, (char *) linebuf); 480 rep->text = p; 481 p = stext(p, addend); 482 break; 483 484 case 'd': 485 rep->command = DCOM; 486 break; 487 488 case 'D': 489 rep->command = CDCOM; 490 rep->lb1 = pspace; 491 break; 492 493 case 'q': 494 rep->command = QCOM; 495 if(rep->ad2.type != A_NONE) 496 quit(AD1MES, (char *) linebuf); 497 break; 498 499 case 'l': 500 rep->command = LCOM; 501 break; 502 503 case 's': 504 rep->command = SCOM; 505 seof = *cp++; 506 if ((rep->re1 = compile()) == 0) { 507 if(!lastre) 508 quit("First RE may not be null.", 0); 509 rep->re1 = lastre; 510 } 511 rep->rhs = p; 512 if((p = compsub(p, addend)) == 0) 513 quit(CGMES, (char *) linebuf); 514 if(*cp == 'g') { 515 cp++; 516 rep->gfl++; 517 } else if(gflag) 518 rep->gfl++; 519 520 if(*cp == 'p') { 521 cp++; 522 rep->pfl = 1; 523 } 524 525 if(*cp == 'P') { 526 cp++; 527 rep->pfl = 2; 528 } 529 530 if(*cp == 'w') { 531 cp++; 532 if(*cp++ != ' ') 533 quit(CGMES, (char *) linebuf); 534 text(fname[nfiles]); 535 for(i = nfiles - 1; i >= 0; i--) 536 if(cmp(fname[nfiles],fname[i]) == 0) { 537 rep->fcode = fcode[i]; 538 goto done; 539 } 540 if(nfiles >= MAXFILES) 541 quit("Too many files in w commands 1", 0); 542 rep->fcode = open_file(fname[nfiles]); 543 } 544 break; 545 546 case 'w': 547 rep->command = WCOM; 548 if(*cp++ != ' ') 549 quit(CGMES, (char *) linebuf); 550 text(fname[nfiles]); 551 for(i = nfiles - 1; i >= 0; i--) 552 if(cmp(fname[nfiles], fname[i]) == 0) { 553 rep->fcode = fcode[i]; 554 goto done; 555 } 556 if(nfiles >= MAXFILES){ 557 fprint(2, "sed: Too many files in w commands 2 \n"); 558 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); 559 errexit(); 560 } 561 rep->fcode = open_file(fname[nfiles]); 562 break; 563 564 case 'x': 565 rep->command = XCOM; 566 break; 567 568 case 'y': 569 rep->command = YCOM; 570 seof = *cp++; 571 if (ycomp(rep) == 0) 572 quit(CGMES, (char *) linebuf); 573 break; 574 575 } 576 done: 577 if(++rep >= pend) 578 quit("Too many commands, last: %S", (char *) linebuf); 579 580 if(*cp++ != '\0') { 581 if(cp[-1] == ';') 582 goto comploop; 583 quit(CGMES, (char *) linebuf); 584 } 585 586 } 587 } 588 589 Biobuf * 590 open_file(char *name) 591 { 592 Biobuf *bp; 593 int fd; 594 595 if ((bp = malloc(sizeof(Biobuf))) == 0) 596 quit("Out of memory", 0); 597 if ((fd = open(name, OWRITE)) < 0 && 598 (fd = create(name, OWRITE, 0666)) < 0) 599 quit("Cannot create %s", name); 600 Binit(bp, fd, OWRITE); 601 Bseek(bp, 0, 2); 602 fcode[nfiles++] = bp; 603 return bp; 604 } 605 606 Rune * 607 compsub(Rune *rhs, Rune *end) 608 { 609 Rune r; 610 611 while ((r = *cp++) != '\0') { 612 if(r == '\\') { 613 if (rhs < end) 614 *rhs++ = 0xFFFF; 615 else 616 return 0; 617 r = *cp++; 618 if(r == 'n') 619 r = '\n'; 620 } else { 621 if(r == seof) { 622 if (rhs < end) 623 *rhs++ = '\0'; 624 else 625 return 0; 626 return rhs; 627 } 628 } 629 if (rhs < end) 630 *rhs++ = r; 631 else 632 return 0; 633 634 } 635 return 0; 636 } 637 638 Reprog * 639 compile(void) 640 { 641 Rune c; 642 char *ep; 643 char expbuf[512]; 644 645 if((c = *cp++) == seof) /* '//' */ 646 return 0; 647 ep = expbuf; 648 do { 649 if (c == 0 || c == '\n') 650 quit(TMMES, (char *) linebuf); 651 if (c == '\\') { 652 if (ep >= expbuf+sizeof(expbuf)) 653 quit(TMMES, (char *) linebuf); 654 ep += runetochar(ep, &c); 655 if ((c = *cp++) == 'n') 656 c = '\n'; 657 } 658 if (ep >= expbuf+sizeof(expbuf)) 659 quit(TMMES, (char *) linebuf); 660 ep += runetochar(ep, &c); 661 } while ((c = *cp++) != seof); 662 *ep = 0; 663 return lastre = regcomp(expbuf); 664 } 665 666 void 667 regerror(char *s) 668 { 669 USED(s); 670 quit(CGMES, (char *) linebuf); 671 } 672 673 void 674 newfile(enum PTYPE type, char *name) 675 { 676 if (type == P_ARG) 677 prog.curr = name; 678 else if ((prog.bp = Bopen(name, OREAD)) == 0) 679 quit("Cannot open pattern-file: %s\n", name); 680 prog.type = type; 681 } 682 683 int 684 rline(Rune *buf, Rune *end) 685 { 686 long c; 687 Rune r; 688 689 while ((c = getrune()) >= 0) { 690 r = c; 691 if (r == '\\') { 692 if (buf <= end) 693 *buf++ = r; 694 if ((c = getrune()) < 0) 695 break; 696 r = c; 697 } else if (r == '\n') { 698 *buf = '\0'; 699 return(1); 700 } 701 if (buf <= end) 702 *buf++ = r; 703 } 704 *buf = '\0'; 705 return(-1); 706 } 707 708 long 709 getrune(void) 710 { 711 char *p; 712 long c; 713 Rune r; 714 715 if (prog.type == P_ARG) { 716 if ((p = prog.curr) != 0) { 717 if (*p) { 718 prog.curr += chartorune(&r, p); 719 c = r; 720 } else { 721 c = '\n'; /* fake an end-of-line */ 722 prog.curr = 0; 723 } 724 } else 725 c = -1; 726 } else if ((c = Bgetrune(prog.bp)) < 0) 727 Bterm(prog.bp); 728 return c; 729 } 730 731 void 732 address(Addr *ap) 733 { 734 int c; 735 long lno; 736 737 if((c = *cp++) == '$') 738 ap->type = A_DOL; 739 else if(c == '/') { 740 seof = c; 741 if (ap->rp = compile()) 742 ap->type = A_RE; 743 else 744 ap->type = A_LAST; 745 } 746 else if (c >= '0' && c <= '9') { 747 lno = c-'0'; 748 while ((c = *cp) >= '0' && c <= '9') 749 lno = lno*10 + *cp++-'0'; 750 if(!lno) 751 quit("line number 0 is illegal",0); 752 ap->type = A_LINE; 753 ap->line = lno; 754 } 755 else { 756 cp--; 757 ap->type = A_NONE; 758 } 759 } 760 761 cmp(char *a, char *b) /* compare characters */ 762 { 763 while(*a == *b++) 764 if (*a == '\0') 765 return(0); 766 else a++; 767 return(1); 768 } 769 rcmp(Rune *a, Rune *b) /* compare runes */ 770 { 771 while(*a == *b++) 772 if (*a == '\0') 773 return(0); 774 else a++; 775 return(1); 776 } 777 778 char * 779 text(char *p) /* extract character string */ 780 { 781 Rune r; 782 783 while(*cp == '\t' || *cp == ' ') 784 cp++; 785 while (*cp) { 786 if ((r = *cp++) == '\\') 787 if ((r = *cp++) == 0) 788 break;; 789 if (r == '\n') 790 while (*cp == '\t' || *cp == ' ') 791 cp++; 792 p += runetochar(p, &r); 793 } 794 *p++ = '\0'; 795 return p; 796 } 797 798 Rune * 799 stext(Rune *p, Rune *end) /* extract rune string */ 800 { 801 while(*cp == '\t' || *cp == ' ') 802 cp++; 803 while (*cp) { 804 if (*cp == '\\') 805 if (*++cp == 0) 806 break; 807 if (p >= end-1) 808 quit(TMMES, (char *) linebuf); 809 if ((*p++ = *cp++) == '\n') 810 while(*cp == '\t' || *cp == ' ') 811 cp++; 812 } 813 *p++ = 0; 814 return p; 815 } 816 817 818 Label * 819 search (Label *ptr) 820 { 821 Label *rp; 822 823 for (rp = ltab; rp < ptr; rp++) 824 if(rcmp(rp->asc, ptr->asc) == 0) 825 return(rp); 826 return(0); 827 } 828 829 void 830 dechain(void) 831 { 832 Label *lptr; 833 SedCom *rptr, *trptr; 834 835 for(lptr = ltab; lptr < lab; lptr++) { 836 837 if(lptr->address == 0) 838 quit("Undefined label: %S", (char *) lptr->asc); 839 840 if(lptr->chain) { 841 rptr = lptr->chain; 842 while(trptr = rptr->lb1) { 843 rptr->lb1 = lptr->address; 844 rptr = trptr; 845 } 846 rptr->lb1 = lptr->address; 847 } 848 } 849 } 850 851 int 852 ycomp(SedCom *r) 853 { 854 int i; 855 Rune *rp; 856 Rune c, *tsp, highc; 857 Rune *sp; 858 859 highc = 0; 860 for(tsp = cp; *tsp != seof; tsp++) { 861 if(*tsp == '\\') 862 tsp++; 863 if(*tsp == '\n' || *tsp == '\0') 864 return(0); 865 if (*tsp > highc) highc = *tsp; 866 } 867 tsp++; 868 if ((rp = r->text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) 869 quit("Out of memory", 0); 870 *rp++ = highc; /* save upper bound */ 871 for (i = 0; i <= highc; i++) 872 rp[i] = i; 873 sp = cp; 874 while((c = *sp++) != seof) { 875 if(c == '\\' && *sp == 'n') { 876 sp++; 877 c = '\n'; 878 } 879 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { 880 rp[c] = '\n'; 881 tsp++; 882 } 883 if(rp[c] == seof || rp[c] == '\0') { 884 free(r->re1); 885 r->re1 = 0; 886 return(0); 887 } 888 } 889 if(*tsp != seof) { 890 free(r->re1); 891 r->re1 = 0; 892 return(0); 893 } 894 cp = tsp+1; 895 return(1); 896 } 897 898 void 899 execute(void) 900 { 901 SedCom *ipc; 902 903 while (spend = gline(linebuf)){ 904 for(ipc = pspace; ipc->command; ) { 905 if (!executable(ipc)) { 906 ipc++; 907 continue; 908 } 909 command(ipc); 910 911 if(delflag) 912 break; 913 if(jflag) { 914 jflag = 0; 915 if((ipc = ipc->lb1) == 0) 916 break; 917 } else 918 ipc++; 919 920 } 921 if(!nflag && !delflag) 922 putline(&fout, linebuf, spend-linebuf); 923 if(aptr > abuf) { 924 arout(); 925 } 926 delflag = 0; 927 } 928 } 929 /* determine if a statement should be applied to an input line */ 930 int 931 executable(SedCom *ipc) 932 { 933 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ 934 if (ipc->active == 1) /* Second line */ 935 ipc->active = 2; 936 switch(ipc->ad2.type) { 937 case A_NONE: /* No second addr; use first */ 938 ipc->active = 0; 939 break; 940 case A_DOL: /* Accept everything */ 941 return !ipc->negfl; 942 case A_LINE: /* Line at end of range? */ 943 if (lnum <= ipc->ad2.line) { 944 if (ipc->ad2.line == lnum) 945 ipc->active = 0; 946 return !ipc->negfl; 947 } 948 ipc->active = 0; /* out of range */ 949 return ipc->negfl; 950 case A_RE: /* Check for matching R.E. */ 951 if (match(ipc->ad2.rp, linebuf)) 952 ipc->active = 0; 953 return !ipc->negfl; 954 default: /* internal error */ 955 quit("Internal error", 0); 956 } 957 } 958 switch (ipc->ad1.type) { /* Check first address */ 959 case A_NONE: /* Everything matches */ 960 return !ipc->negfl; 961 case A_DOL: /* Only last line */ 962 if (dolflag) 963 return !ipc->negfl; 964 break; 965 case A_LINE: /* Check line number */ 966 if (ipc->ad1.line == lnum) { 967 ipc->active = 1; /* In range */ 968 return !ipc->negfl; 969 } 970 break; 971 case A_RE: /* Check R.E. */ 972 if (match(ipc->ad1.rp, linebuf)) { 973 ipc->active = 1; /* In range */ 974 return !ipc->negfl; 975 } 976 break; 977 default: 978 quit("Internal error", 0); 979 } 980 return ipc->negfl; 981 } 982 match(Reprog *pattern, Rune *buf) 983 { 984 if (!pattern) 985 return 0; 986 subexp[0].rsp = buf; 987 subexp[0].ep = 0; 988 if (rregexec(pattern, linebuf, subexp, MAXSUB)) { 989 loc1 = subexp[0].rsp; 990 loc2 = subexp[0].rep; 991 return 1; 992 } 993 loc1 = loc2 = 0; 994 return 0; 995 } 996 substitute(SedCom *ipc) 997 { 998 int len; 999 1000 if(!match(ipc->re1, linebuf)) 1001 return 0; 1002 1003 /* 1004 * we have at least one match. some patterns, e.g. '$' or '^', can 1005 * produce zero-length matches, so during a global substitute we 1006 * must bump to the character after a zero-length match to keep from looping. 1007 */ 1008 sflag = 1; 1009 if(ipc->gfl == 0) /* single substitution */ 1010 dosub(ipc->rhs); 1011 else 1012 do{ /* global substitution */ 1013 len = loc2-loc1; /* length of match */ 1014 dosub(ipc->rhs); /* dosub moves loc2 */ 1015 if(*loc2 == 0) /* end of string */ 1016 break; 1017 if(len == 0) /* zero-length R.E. match */ 1018 loc2++; /* bump over zero-length match */ 1019 if(*loc2 == 0) /* end of string */ 1020 break; 1021 } while(match(ipc->re1, loc2)); 1022 return 1; 1023 } 1024 1025 void 1026 dosub(Rune *rhsbuf) 1027 { 1028 Rune *lp, *sp; 1029 Rune *rp; 1030 int c, n; 1031 1032 lp = linebuf; 1033 sp = genbuf; 1034 rp = rhsbuf; 1035 while (lp < loc1) 1036 *sp++ = *lp++; 1037 while(c = *rp++) { 1038 if (c == '&') { 1039 sp = place(sp, loc1, loc2); 1040 continue; 1041 } 1042 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { 1043 n = c-'0'; 1044 if (subexp[n].rsp && subexp[n].rep) { 1045 sp = place(sp, subexp[n].rsp, subexp[n].rep); 1046 continue; 1047 } 1048 else { 1049 fprint(2, "sed: Invalid back reference \\%d\n",n); 1050 errexit(); 1051 } 1052 } 1053 *sp++ = c; 1054 if (sp >= &genbuf[LBSIZE]) 1055 fprint(2, "sed: Output line too long.\n"); 1056 } 1057 lp = loc2; 1058 loc2 = sp - genbuf + linebuf; 1059 while (*sp++ = *lp++) 1060 if (sp >= &genbuf[LBSIZE]) 1061 fprint(2, "sed: Output line too long.\n"); 1062 lp = linebuf; 1063 sp = genbuf; 1064 while (*lp++ = *sp++) 1065 ; 1066 spend = lp-1; 1067 } 1068 1069 Rune * 1070 place(Rune *sp, Rune *l1, Rune *l2) 1071 { 1072 while (l1 < l2) { 1073 *sp++ = *l1++; 1074 if (sp >= &genbuf[LBSIZE]) 1075 fprint(2, "sed: Output line too long.\n"); 1076 } 1077 return(sp); 1078 } 1079 1080 char * 1081 trans(int c) 1082 { 1083 static char buf[] = "\\x0000"; 1084 static char hex[] = "0123456789abcdef"; 1085 1086 switch(c) { 1087 case '\b': 1088 return "\\b"; 1089 case '\n': 1090 return "\\n"; 1091 case '\r': 1092 return "\\r"; 1093 case '\t': 1094 return "\\t"; 1095 case '\\': 1096 return "\\\\"; 1097 } 1098 buf[2] = hex[(c>>12)&0xF]; 1099 buf[3] = hex[(c>>8)&0xF]; 1100 buf[4] = hex[(c>>4)&0xF]; 1101 buf[5] = hex[c&0xF]; 1102 return buf; 1103 } 1104 1105 void 1106 command(SedCom *ipc) 1107 { 1108 int i, c; 1109 Rune *p1, *p2; 1110 char *ucp; 1111 Rune *rp; 1112 Rune *execp; 1113 1114 switch(ipc->command) { 1115 1116 case ACOM: 1117 *aptr++ = ipc; 1118 if(aptr >= abuf+MAXADDS) { 1119 quit("sed: Too many appends after line %ld\n", 1120 (char *) lnum); 1121 } 1122 *aptr = 0; 1123 break; 1124 case CCOM: 1125 delflag = 1; 1126 if(ipc->active == 1) { 1127 for(rp = ipc->text; *rp; rp++) 1128 Bputrune(&fout, *rp); 1129 Bputc(&fout, '\n'); 1130 } 1131 break; 1132 case DCOM: 1133 delflag++; 1134 break; 1135 case CDCOM: 1136 p1 = p2 = linebuf; 1137 while(*p1 != '\n') { 1138 if(*p1++ == 0) { 1139 delflag++; 1140 return; 1141 } 1142 } 1143 p1++; 1144 while(*p2++ = *p1++) 1145 ; 1146 spend = p2-1; 1147 jflag++; 1148 break; 1149 case EQCOM: 1150 Bprint(&fout, "%ld\n", lnum); 1151 break; 1152 case GCOM: 1153 p1 = linebuf; 1154 p2 = holdsp; 1155 while(*p1++ = *p2++) 1156 ; 1157 spend = p1-1; 1158 break; 1159 case CGCOM: 1160 *spend++ = '\n'; 1161 p1 = spend; 1162 p2 = holdsp; 1163 while(*p1++ = *p2++) 1164 if(p1 >= lbend) 1165 break; 1166 spend = p1-1; 1167 break; 1168 case HCOM: 1169 p1 = holdsp; 1170 p2 = linebuf; 1171 while(*p1++ = *p2++); 1172 hspend = p1-1; 1173 break; 1174 case CHCOM: 1175 *hspend++ = '\n'; 1176 p1 = hspend; 1177 p2 = linebuf; 1178 while(*p1++ = *p2++) 1179 if(p1 >= hend) 1180 break; 1181 hspend = p1-1; 1182 break; 1183 case ICOM: 1184 for(rp = ipc->text; *rp; rp++) 1185 Bputrune(&fout, *rp); 1186 Bputc(&fout, '\n'); 1187 break; 1188 case BCOM: 1189 jflag = 1; 1190 break; 1191 case LCOM: 1192 c = 0; 1193 for (i = 0, rp = linebuf; *rp; rp++) { 1194 c = *rp; 1195 if(c >= 0x20 && c < 0x7F && c != '\\') { 1196 Bputc(&fout, c); 1197 if(i++ > 71) { 1198 Bprint(&fout, "\\\n"); 1199 i = 0; 1200 } 1201 } else { 1202 for (ucp = trans(*rp); *ucp; ucp++){ 1203 c = *ucp; 1204 Bputc(&fout, c); 1205 if(i++ > 71) { 1206 Bprint(&fout, "\\\n"); 1207 i = 0; 1208 } 1209 } 1210 } 1211 } 1212 if(c == ' ') 1213 Bprint(&fout, "\\n"); 1214 Bputc(&fout, '\n'); 1215 break; 1216 case NCOM: 1217 if(!nflag) 1218 putline(&fout, linebuf, spend-linebuf); 1219 1220 if(aptr > abuf) 1221 arout(); 1222 if((execp = gline(linebuf)) == 0) { 1223 delflag = 1; 1224 break; 1225 } 1226 spend = execp; 1227 break; 1228 case CNCOM: 1229 if(aptr > abuf) 1230 arout(); 1231 *spend++ = '\n'; 1232 if((execp = gline(spend)) == 0) { 1233 delflag = 1; 1234 break; 1235 } 1236 spend = execp; 1237 break; 1238 case PCOM: 1239 putline(&fout, linebuf, spend-linebuf); 1240 break; 1241 case CPCOM: 1242 cpcom: 1243 for(rp = linebuf; *rp && *rp != '\n'; rp++) 1244 Bputc(&fout, *rp); 1245 Bputc(&fout, '\n'); 1246 break; 1247 case QCOM: 1248 if(!nflag) 1249 putline(&fout, linebuf, spend-linebuf); 1250 if(aptr > abuf) 1251 arout(); 1252 exits(0); 1253 case RCOM: 1254 *aptr++ = ipc; 1255 if(aptr >= &abuf[MAXADDS]) 1256 quit("sed: Too many reads after line %ld\n", 1257 (char *) lnum); 1258 *aptr = 0; 1259 break; 1260 case SCOM: 1261 i = substitute(ipc); 1262 if(i && ipc->pfl) 1263 if(ipc->pfl == 1) 1264 putline(&fout, linebuf, spend-linebuf); 1265 else 1266 goto cpcom; 1267 if(i && ipc->fcode) 1268 goto wcom; 1269 break; 1270 1271 case TCOM: 1272 if(sflag == 0) break; 1273 sflag = 0; 1274 jflag = 1; 1275 break; 1276 1277 wcom: 1278 case WCOM: 1279 putline(ipc->fcode,linebuf, spend-linebuf); 1280 break; 1281 case XCOM: 1282 p1 = linebuf; 1283 p2 = genbuf; 1284 while(*p2++ = *p1++); 1285 p1 = holdsp; 1286 p2 = linebuf; 1287 while(*p2++ = *p1++); 1288 spend = p2 - 1; 1289 p1 = genbuf; 1290 p2 = holdsp; 1291 while(*p2++ = *p1++); 1292 hspend = p2 - 1; 1293 break; 1294 case YCOM: 1295 p1 = linebuf; 1296 p2 = ipc->text; 1297 for (i = *p2++; *p1; p1++){ 1298 if (*p1 <= i) *p1 = p2[*p1]; 1299 } 1300 break; 1301 } 1302 1303 } 1304 1305 void 1306 putline(Biobuf *bp, Rune *buf, int n) 1307 { 1308 while (n--) 1309 Bputrune(bp, *buf++); 1310 Bputc(bp, '\n'); 1311 } 1312 ecmp(Rune *a, Rune *b, int count) 1313 { 1314 while(count--) 1315 if(*a++ != *b++) return(0); 1316 return(1); 1317 } 1318 1319 void 1320 arout(void) 1321 { 1322 Rune *p1; 1323 Biobuf *fi; 1324 int c; 1325 char *s; 1326 char buf[128]; 1327 1328 for (aptr = abuf; *aptr; aptr++) { 1329 if((*aptr)->command == ACOM) { 1330 for(p1 = (*aptr)->text; *p1; p1++ ) 1331 Bputrune(&fout, *p1); 1332 Bputc(&fout, '\n'); 1333 } else { 1334 for(s = buf, p1= (*aptr)->text; *p1; p1++) 1335 s += runetochar(s, p1); 1336 *s = '\0'; 1337 if((fi = Bopen(buf, OREAD)) == 0) 1338 continue; 1339 while((c = Bgetc(fi)) >= 0) 1340 Bputc(&fout, c); 1341 Bterm(fi); 1342 } 1343 } 1344 aptr = abuf; 1345 *aptr = 0; 1346 } 1347 1348 void 1349 errexit(void) 1350 { 1351 exits("error"); 1352 } 1353 1354 void 1355 quit (char *msg, char *arg) 1356 { 1357 fprint(2, "sed: "); 1358 fprint(2, msg, arg); 1359 fprint(2, "\n"); 1360 errexit(); 1361 } 1362 1363 Rune * 1364 gline(Rune *addr) 1365 { 1366 long c; 1367 Rune *p; 1368 1369 static long peekc = 0; 1370 1371 if (f == 0 && opendata() < 0) 1372 return 0; 1373 sflag = 0; 1374 lnum++; 1375 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ 1376 do { 1377 p = addr; 1378 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { 1379 if (c == '\n') { 1380 if ((peekc = Bgetrune(f)) < 0) { 1381 if (fhead == 0) 1382 dolflag = 1; 1383 } 1384 *p = '\0'; 1385 return p; 1386 } 1387 if (c && p < lbend) 1388 *p++ = c; 1389 } 1390 /* return partial final line, adding implicit newline */ 1391 if(p != addr) { 1392 *p = '\0'; 1393 peekc = -1; 1394 if (fhead == 0) 1395 dolflag = 1; 1396 return p; 1397 } 1398 peekc = 0; 1399 Bterm(f); 1400 } while (opendata() > 0); /* Switch to next stream */ 1401 f = 0; 1402 return 0; 1403 } 1404 1405 /* Data file input section - the intent is to transparently 1406 * catenate all data input streams. 1407 */ 1408 void 1409 enroll(char *filename) /* Add a file to the input file cache */ 1410 { 1411 FileCache *fp; 1412 1413 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) 1414 quit("Out of memory", 0); 1415 if (ftail == 0) 1416 fhead = fp; 1417 else 1418 ftail->next = fp; 1419 ftail = fp; 1420 fp->next = 0; 1421 fp->name = filename; /* 0 => stdin */ 1422 } 1423 1424 int 1425 opendata(void) 1426 { 1427 if (fhead == 0) 1428 return -1; 1429 if (fhead->name) { 1430 if ((f = Bopen(fhead->name, OREAD)) == 0) 1431 quit("Can't open %s", fhead->name); 1432 } else { 1433 Binit(&stdin, 0, OREAD); 1434 f = &stdin; 1435 } 1436 fhead = fhead->next; 1437 return 1; 1438 } 1439