1 /* 2 * sed -- stream editor 3 */ 4 #include <lib9.h> 5 #include <bio.h> 6 #include <regexp.h> 7 8 enum { 9 DEPTH = 20, /* max nesting depth of {} */ 10 MAXCMDS = 512, /* max sed commands */ 11 ADDSIZE = 10000, /* size of add & read buffer */ 12 MAXADDS = 20, /* max pending adds and reads */ 13 LBSIZE = 8192, /* input line size */ 14 LABSIZE = 50, /* max number of labels */ 15 MAXSUB = 10, /* max number of sub reg exp */ 16 MAXFILES = 120 /* max output files */ 17 }; 18 19 /* 20 * An address is a line #, a R.E., "$", a reference to the last 21 * R.E., or nothing. 22 */ 23 typedef struct { 24 enum { 25 A_NONE, 26 A_DOL, 27 A_LINE, 28 A_RE, 29 A_LAST 30 }type; 31 /* union { */ 32 long line; /* Line # */ 33 Reprog *rp; /* Compiled R.E. */ 34 /* }; */ 35 } Addr; 36 37 typedef struct SEDCOM { 38 Addr ad1; /* optional start address */ 39 Addr ad2; /* optional end address */ 40 /* union { */ 41 Reprog *re1; /* compiled R.E. */ 42 Rune *text; /* added text or file name */ 43 struct SEDCOM *lb1; /* destination command of branch */ 44 /* }; */ 45 Rune *rhs; /* Right-hand side of substitution */ 46 Biobuf* fcode; /* File ID for read and write */ 47 char command; /* command code -see below */ 48 char gfl; /* 'Global' flag for substitutions */ 49 char pfl; /* 'print' flag for substitutions */ 50 char active; /* 1 => data between start and end */ 51 char negfl; /* negation flag */ 52 } SedCom; 53 54 /* Command Codes for field SedCom.command */ 55 #define ACOM 01 56 #define BCOM 020 57 #define CCOM 02 58 #define CDCOM 025 59 #define CNCOM 022 60 #define COCOM 017 61 #define CPCOM 023 62 #define DCOM 03 63 #define ECOM 015 64 #define EQCOM 013 65 #define FCOM 016 66 #define GCOM 027 67 #define CGCOM 030 68 #define HCOM 031 69 #define CHCOM 032 70 #define ICOM 04 71 #define LCOM 05 72 #define NCOM 012 73 #define PCOM 010 74 #define QCOM 011 75 #define RCOM 06 76 #define SCOM 07 77 #define TCOM 021 78 #define WCOM 014 79 #define CWCOM 024 80 #define YCOM 026 81 #define XCOM 033 82 83 typedef struct label { /* Label symbol table */ 84 Rune uninm[9]; /* Label name */ 85 SedCom *chain; 86 SedCom *address; /* Command associated with label */ 87 } Label; 88 89 typedef struct FILE_CACHE { /* Data file control block */ 90 struct FILE_CACHE *next; /* Forward Link */ 91 char *name; /* Name of file */ 92 } FileCache; 93 94 SedCom pspace[MAXCMDS]; /* Command storage */ 95 SedCom *pend = pspace+MAXCMDS; /* End of command storage */ 96 SedCom *rep = pspace; /* Current fill point */ 97 98 Reprog *lastre = 0; /* Last regular expression */ 99 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ 100 101 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ 102 Rune *addend = addspace+ADDSIZE; 103 104 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ 105 SedCom **aptr = abuf; 106 107 struct { /* Sed program input control block */ 108 enum PTYPE { /* Either on command line or in file */ 109 P_ARG, 110 P_FILE 111 } type; 112 /* union PCTL { */ /* Pointer to data */ 113 Biobuf *bp; 114 char *curr; 115 /* }; */ 116 } prog; 117 118 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ 119 120 FileCache *fhead = 0; /* Head of File Cache Chain */ 121 FileCache *ftail = 0; /* Tail of File Cache Chain */ 122 123 Rune *loc1; /* Start of pattern match */ 124 Rune *loc2; /* End of pattern match */ 125 Rune seof; /* Pattern delimiter char */ 126 127 Rune linebuf[LBSIZE+1]; /* Input data buffer */ 128 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ 129 Rune *spend = linebuf; /* End of input data */ 130 Rune *cp; /* Current scan point in linebuf */ 131 132 Rune holdsp[LBSIZE+1]; /* Hold buffer */ 133 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ 134 Rune *hspend = holdsp; /* End of hold data */ 135 136 int nflag; /* Command line flags */ 137 int gflag; 138 139 int dolflag; /* Set when at true EOF */ 140 int sflag; /* Set when substitution done */ 141 int jflag; /* Set when jump required */ 142 int delflag; /* Delete current line when set */ 143 144 long lnum = 0; /* Input line count */ 145 146 char fname[MAXFILES][40]; /* File name cache */ 147 Biobuf *fcode[MAXFILES]; /* File ID cache */ 148 int nfiles = 0; /* Cache fill point */ 149 150 Biobuf fout; /* Output stream */ 151 Biobuf bstdin; /* Default input */ 152 Biobuf* f = 0; /* Input data */ 153 154 Label ltab[LABSIZE]; /* Label name symbol table */ 155 Label *labend = ltab+LABSIZE; /* End of label table */ 156 Label *lab = ltab+1; /* Current Fill point */ 157 158 int depth = 0; /* {} stack pointer */ 159 160 Rune bad; /* Dummy err ptr reference */ 161 Rune *badp = &bad; 162 163 164 char CGMES[] = "command garbled: %S"; 165 char TMMES[] = "Too much text: %S"; 166 char LTL[] = "Label too long: %S"; 167 char AD0MES[] = "No addresses allowed: %S"; 168 char AD1MES[] = "Only one address allowed: %S"; 169 170 void address(Addr *); 171 void arout(void); 172 int cmp(char *, char *); 173 int rcmp(Rune *, Rune *); 174 void command(SedCom *); 175 Reprog *compile(void); 176 Rune *compsub(Rune *, Rune *); 177 void dechain(void); 178 void dosub(Rune *); 179 int ecmp(Rune *, Rune *, int); 180 void enroll(char *); 181 void errexit(void); 182 int executable(SedCom *); 183 void execute(void); 184 void fcomp(void); 185 long getrune(void); 186 Rune *gline(Rune *); 187 int match(Reprog *, Rune *); 188 void newfile(enum PTYPE, char *); 189 int opendata(void); 190 Biobuf *open_file(char *); 191 Rune *place(Rune *, Rune *, Rune *); 192 void quit(char *, ...); 193 int rline(Rune *, Rune *); 194 Label *search(Label *); 195 int substitute(SedCom *); 196 char *text(char *); 197 Rune *stext(Rune *, Rune *); 198 int ycomp(SedCom *); 199 char * trans(int c); 200 void putline(Biobuf *bp, Rune *buf, int n); 201 202 void 203 main(int argc, char **argv) 204 { 205 int compfl; 206 207 lnum = 0; 208 Binit(&fout, 1, OWRITE); 209 fcode[nfiles++] = &fout; 210 compfl = 0; 211 212 if(argc == 1) 213 exits(0); 214 ARGBEGIN{ 215 case 'e': 216 if (argc <= 1) 217 quit("missing pattern"); 218 newfile(P_ARG, ARGF()); 219 fcomp(); 220 compfl = 1; 221 continue; 222 case 'f': 223 if(argc <= 1) 224 quit("no pattern-file"); 225 newfile(P_FILE, ARGF()); 226 fcomp(); 227 compfl = 1; 228 continue; 229 case 'g': 230 gflag++; 231 continue; 232 case 'n': 233 nflag++; 234 continue; 235 default: 236 fprint(2, "sed: Unknown flag: %c\n", ARGC()); 237 continue; 238 } ARGEND 239 240 if(compfl == 0) { 241 if (--argc < 0) 242 quit("missing pattern"); 243 newfile(P_ARG, *argv++); 244 fcomp(); 245 } 246 247 if(depth) 248 quit("Too many {'s"); 249 250 ltab[0].address = rep; 251 252 dechain(); 253 254 if(argc <= 0) 255 enroll(0); /* Add stdin to cache */ 256 else 257 while(--argc >= 0) 258 enroll(*argv++); 259 execute(); 260 exits(0); 261 } 262 263 void 264 fcomp(void) 265 { 266 int i; 267 Label *lpt; 268 Rune *tp; 269 SedCom *pt, *pt1; 270 static Rune *p = addspace; 271 static SedCom **cmpend[DEPTH]; /* stack of {} operations */ 272 273 while (rline(linebuf, lbend) >= 0) { 274 cp = linebuf; 275 comploop: 276 while(*cp == ' ' || *cp == '\t') 277 cp++; 278 if(*cp == '\0' || *cp == '#') 279 continue; 280 if(*cp == ';') { 281 cp++; 282 goto comploop; 283 } 284 285 address(&rep->ad1); 286 if (rep->ad1.type != A_NONE) { 287 if (rep->ad1.type == A_LAST) { 288 if (!lastre) 289 quit("First RE may not be null"); 290 rep->ad1.type = A_RE; 291 rep->ad1.rp = lastre; 292 } 293 if(*cp == ',' || *cp == ';') { 294 cp++; 295 address(&rep->ad2); 296 if (rep->ad2.type == A_LAST) { 297 rep->ad2.type = A_RE; 298 rep->ad2.rp = lastre; 299 } 300 } else 301 rep->ad2.type = A_NONE; 302 } 303 while(*cp == ' ' || *cp == '\t') 304 cp++; 305 306 swit: 307 switch(*cp++) { 308 default: 309 quit("Unrecognized command: %S", linebuf); 310 311 case '!': 312 rep->negfl = 1; 313 goto swit; 314 315 case '{': 316 rep->command = BCOM; 317 rep->negfl = !rep->negfl; 318 cmpend[depth++] = &rep->lb1; 319 if(++rep >= pend) 320 quit("Too many commands: %S", linebuf); 321 if(*cp == '\0') 322 continue; 323 goto comploop; 324 325 case '}': 326 if(rep->ad1.type != A_NONE) 327 quit(AD0MES, linebuf); 328 if(--depth < 0) 329 quit("Too many }'s"); 330 *cmpend[depth] = rep; 331 if(*cp == 0) 332 continue; 333 goto comploop; 334 335 case '=': 336 rep->command = EQCOM; 337 if(rep->ad2.type != A_NONE) 338 quit(AD1MES, linebuf); 339 break; 340 341 case ':': 342 if(rep->ad1.type != A_NONE) 343 quit(AD0MES, linebuf); 344 345 while(*cp == ' ') 346 cp++; 347 tp = lab->uninm; 348 while (*cp && *cp != ';' && *cp != ' ' && 349 *cp != '\t' && *cp != '#') { 350 *tp++ = *cp++; 351 if(tp >= &lab->uninm[8]) 352 quit(LTL, linebuf); 353 } 354 *tp = '\0'; 355 356 if (*lab->uninm == '\0') /* no label? */ 357 quit(CGMES, linebuf); 358 if(lpt = search(lab)) { 359 if(lpt->address) 360 quit("Duplicate labels: %S", linebuf); 361 } else { 362 lab->chain = 0; 363 lpt = lab; 364 if(++lab >= labend) 365 quit("Too many labels: %S", linebuf); 366 } 367 lpt->address = rep; 368 if (*cp == '#') 369 continue; 370 rep--; /* reuse this slot */ 371 break; 372 373 case 'a': 374 rep->command = ACOM; 375 if(rep->ad2.type != A_NONE) 376 quit(AD1MES, linebuf); 377 if(*cp == '\\') 378 cp++; 379 if(*cp++ != '\n') 380 quit(CGMES, linebuf); 381 rep->text = p; 382 p = stext(p, addend); 383 break; 384 case 'c': 385 rep->command = CCOM; 386 if(*cp == '\\') 387 cp++; 388 if(*cp++ != '\n') 389 quit(CGMES, linebuf); 390 rep->text = p; 391 p = stext(p, addend); 392 break; 393 case 'i': 394 rep->command = ICOM; 395 if(rep->ad2.type != A_NONE) 396 quit(AD1MES, linebuf); 397 if(*cp == '\\') 398 cp++; 399 if(*cp++ != '\n') 400 quit(CGMES, linebuf); 401 rep->text = p; 402 p = stext(p, addend); 403 break; 404 405 case 'g': 406 rep->command = GCOM; 407 break; 408 409 case 'G': 410 rep->command = CGCOM; 411 break; 412 413 case 'h': 414 rep->command = HCOM; 415 break; 416 417 case 'H': 418 rep->command = CHCOM; 419 break; 420 421 case 't': 422 rep->command = TCOM; 423 goto jtcommon; 424 425 case 'b': 426 rep->command = BCOM; 427 jtcommon: 428 while(*cp == ' ') 429 cp++; 430 if(*cp == '\0' || *cp == ';') { 431 /* no label; jump to end */ 432 if(pt = ltab[0].chain) { 433 while((pt1 = pt->lb1) != nil) 434 pt = pt1; 435 pt->lb1 = rep; 436 } else 437 ltab[0].chain = rep; 438 break; 439 } 440 441 /* copy label into lab->uninm */ 442 tp = lab->uninm; 443 while((*tp = *cp++) != '\0' && *tp != ';') 444 if(++tp >= &lab->uninm[8]) 445 quit(LTL, linebuf); 446 cp--; 447 *tp = '\0'; 448 449 if (*lab->uninm == '\0') 450 /* shouldn't get here */ 451 quit(CGMES, linebuf); 452 if((lpt = search(lab)) != nil) { 453 if(lpt->address) 454 rep->lb1 = lpt->address; 455 else { 456 for(pt = lpt->chain; pt != nil && 457 (pt1 = pt->lb1) != nil; pt = pt1) 458 ; 459 if (pt) 460 pt->lb1 = rep; 461 } 462 } else { /* add new label */ 463 lab->chain = rep; 464 lab->address = 0; 465 if(++lab >= labend) 466 quit("Too many labels: %S", linebuf); 467 } 468 break; 469 470 case 'n': 471 rep->command = NCOM; 472 break; 473 474 case 'N': 475 rep->command = CNCOM; 476 break; 477 478 case 'p': 479 rep->command = PCOM; 480 break; 481 482 case 'P': 483 rep->command = CPCOM; 484 break; 485 486 case 'r': 487 rep->command = RCOM; 488 if(rep->ad2.type != A_NONE) 489 quit(AD1MES, linebuf); 490 if(*cp++ != ' ') 491 quit(CGMES, linebuf); 492 rep->text = p; 493 p = stext(p, addend); 494 break; 495 496 case 'd': 497 rep->command = DCOM; 498 break; 499 500 case 'D': 501 rep->command = CDCOM; 502 rep->lb1 = pspace; 503 break; 504 505 case 'q': 506 rep->command = QCOM; 507 if(rep->ad2.type != A_NONE) 508 quit(AD1MES, linebuf); 509 break; 510 511 case 'l': 512 rep->command = LCOM; 513 break; 514 515 case 's': 516 rep->command = SCOM; 517 seof = *cp++; 518 if ((rep->re1 = compile()) == 0) { 519 if(!lastre) 520 quit("First RE may not be null."); 521 rep->re1 = lastre; 522 } 523 rep->rhs = p; 524 if((p = compsub(p, addend)) == 0) 525 quit(CGMES, linebuf); 526 if(*cp == 'g') { 527 cp++; 528 rep->gfl++; 529 } else if(gflag) 530 rep->gfl++; 531 532 if(*cp == 'p') { 533 cp++; 534 rep->pfl = 1; 535 } 536 537 if(*cp == 'P') { 538 cp++; 539 rep->pfl = 2; 540 } 541 542 if(*cp == 'w') { 543 cp++; 544 if(*cp++ != ' ') 545 quit(CGMES, linebuf); 546 text(fname[nfiles]); 547 for(i = nfiles - 1; i >= 0; i--) 548 if(cmp(fname[nfiles], fname[i]) == 0) { 549 rep->fcode = fcode[i]; 550 goto done; 551 } 552 if(nfiles >= MAXFILES) 553 quit("Too many files in w commands 1"); 554 rep->fcode = open_file(fname[nfiles]); 555 } 556 break; 557 558 case 'w': 559 rep->command = WCOM; 560 if(*cp++ != ' ') 561 quit(CGMES, linebuf); 562 text(fname[nfiles]); 563 for(i = nfiles - 1; i >= 0; i--) 564 if(cmp(fname[nfiles], fname[i]) == 0) { 565 rep->fcode = fcode[i]; 566 goto done; 567 } 568 if(nfiles >= MAXFILES){ 569 fprint(2, "sed: Too many files in w commands 2 \n"); 570 fprint(2, "nfiles = %d; MAXF = %d\n", 571 nfiles, MAXFILES); 572 errexit(); 573 } 574 rep->fcode = open_file(fname[nfiles]); 575 break; 576 577 case 'x': 578 rep->command = XCOM; 579 break; 580 581 case 'y': 582 rep->command = YCOM; 583 seof = *cp++; 584 if (ycomp(rep) == 0) 585 quit(CGMES, linebuf); 586 break; 587 588 } 589 done: 590 if(++rep >= pend) 591 quit("Too many commands, last: %S", linebuf); 592 if(*cp++ != '\0') { 593 if(cp[-1] == ';') 594 goto comploop; 595 quit(CGMES, linebuf); 596 } 597 } 598 } 599 600 Biobuf * 601 open_file(char *name) 602 { 603 int fd; 604 Biobuf *bp; 605 606 if ((bp = malloc(sizeof(Biobuf))) == 0) 607 quit("Out of memory"); 608 if ((fd = open(name, OWRITE)) < 0 && 609 (fd = create(name, OWRITE, 0666)) < 0) 610 quit("Cannot create %s", name); 611 Binit(bp, fd, OWRITE); 612 Bseek(bp, 0, 2); 613 fcode[nfiles++] = bp; 614 return bp; 615 } 616 617 Rune * 618 compsub(Rune *rhs, Rune *end) 619 { 620 Rune r; 621 622 while ((r = *cp++) != '\0') { 623 if(r == '\\') { 624 if (rhs < end) 625 *rhs++ = 0xFFFF; 626 else 627 return 0; 628 r = *cp++; 629 if(r == 'n') 630 r = '\n'; 631 } else { 632 if(r == seof) { 633 if (rhs < end) 634 *rhs++ = '\0'; 635 else 636 return 0; 637 return rhs; 638 } 639 } 640 if (rhs < end) 641 *rhs++ = r; 642 else 643 return 0; 644 } 645 return 0; 646 } 647 648 Reprog * 649 compile(void) 650 { 651 Rune c; 652 char *ep; 653 char expbuf[512]; 654 655 if((c = *cp++) == seof) /* '//' */ 656 return 0; 657 ep = expbuf; 658 do { 659 if (c == '\0' || c == '\n') 660 quit(TMMES, linebuf); 661 if (c == '\\') { 662 if (ep >= expbuf+sizeof(expbuf)) 663 quit(TMMES, linebuf); 664 ep += runetochar(ep, &c); 665 if ((c = *cp++) == 'n') 666 c = '\n'; 667 } 668 if (ep >= expbuf + sizeof(expbuf)) 669 quit(TMMES, linebuf); 670 ep += runetochar(ep, &c); 671 } while ((c = *cp++) != seof); 672 *ep = 0; 673 return lastre = regcomp(expbuf); 674 } 675 676 void 677 regerror(char *s) 678 { 679 USED(s); 680 quit(CGMES, linebuf); 681 } 682 683 void 684 newfile(enum PTYPE type, char *name) 685 { 686 if (type == P_ARG) 687 prog.curr = name; 688 else if ((prog.bp = Bopen(name, OREAD)) == 0) 689 quit("Cannot open pattern-file: %s\n", name); 690 prog.type = type; 691 } 692 693 int 694 rline(Rune *buf, Rune *end) 695 { 696 long c; 697 Rune r; 698 699 while ((c = getrune()) >= 0) { 700 if(c == '\r') /* consume CRs on win95 */ 701 continue; 702 r = c; 703 if (r == '\\') { 704 if (buf <= end) 705 *buf++ = r; 706 c = getrune(); 707 if (c == '\r') 708 c = getrune(); 709 if (c < 0) 710 break; 711 r = c; 712 } else if (r == '\n') { 713 *buf = '\0'; 714 return 1; 715 } 716 if (buf <= end) 717 *buf++ = r; 718 } 719 *buf = '\0'; 720 return -1; 721 } 722 723 long 724 getrune(void) 725 { 726 long c; 727 Rune r; 728 char *p; 729 730 if (prog.type == P_ARG) { 731 if ((p = prog.curr) != 0) { 732 if (*p) { 733 prog.curr += chartorune(&r, p); 734 c = r; 735 } else { 736 c = '\n'; /* fake an end-of-line */ 737 prog.curr = 0; 738 } 739 } else 740 c = -1; 741 } else if ((c = Bgetrune(prog.bp)) < 0) 742 Bterm(prog.bp); 743 return c; 744 } 745 746 void 747 address(Addr *ap) 748 { 749 int c; 750 long lno; 751 752 if((c = *cp++) == '$') 753 ap->type = A_DOL; 754 else if(c == '/') { 755 seof = c; 756 if (ap->rp = compile()) 757 ap->type = A_RE; 758 else 759 ap->type = A_LAST; 760 } 761 else if (c >= '0' && c <= '9') { 762 lno = c - '0'; 763 while ((c = *cp) >= '0' && c <= '9') 764 lno = lno*10 + *cp++ - '0'; 765 if(!lno) 766 quit("line number 0 is illegal",0); 767 ap->type = A_LINE; 768 ap->line = lno; 769 } 770 else { 771 cp--; 772 ap->type = A_NONE; 773 } 774 } 775 776 cmp(char *a, char *b) /* compare characters */ 777 { 778 while(*a == *b++) 779 if (*a == '\0') 780 return 0; 781 else 782 a++; 783 return 1; 784 } 785 rcmp(Rune *a, Rune *b) /* compare runes */ 786 { 787 while(*a == *b++) 788 if (*a == '\0') 789 return 0; 790 else 791 a++; 792 return 1; 793 } 794 795 char * 796 text(char *p) /* extract character string */ 797 { 798 Rune r; 799 800 while(*cp == ' ' || *cp == '\t') 801 cp++; 802 while (*cp) { 803 if ((r = *cp++) == '\\' && (r = *cp++) == '\0') 804 break; 805 if (r == '\n') 806 while (*cp == ' ' || *cp == '\t') 807 cp++; 808 p += runetochar(p, &r); 809 } 810 *p++ = '\0'; 811 return p; 812 } 813 814 Rune * 815 stext(Rune *p, Rune *end) /* extract rune string */ 816 { 817 while(*cp == ' ' || *cp == '\t') 818 cp++; 819 while (*cp) { 820 if (*cp == '\\' && *++cp == '\0') 821 break; 822 if (p >= end-1) 823 quit(TMMES, linebuf); 824 if ((*p++ = *cp++) == '\n') 825 while(*cp == ' ' || *cp == '\t') 826 cp++; 827 } 828 *p++ = 0; 829 return p; 830 } 831 832 833 Label * 834 search(Label *ptr) 835 { 836 Label *rp; 837 838 for (rp = ltab; rp < ptr; rp++) 839 if(rcmp(rp->uninm, ptr->uninm) == 0) 840 return(rp); 841 return(0); 842 } 843 844 void 845 dechain(void) 846 { 847 Label *lptr; 848 SedCom *rptr, *trptr; 849 850 for(lptr = ltab; lptr < lab; lptr++) { 851 if(lptr->address == 0) 852 quit("Undefined label: %S", lptr->uninm); 853 if(lptr->chain) { 854 rptr = lptr->chain; 855 while((trptr = rptr->lb1) != nil) { 856 rptr->lb1 = lptr->address; 857 rptr = trptr; 858 } 859 rptr->lb1 = lptr->address; 860 } 861 } 862 } 863 864 int 865 ycomp(SedCom *r) 866 { 867 int i; 868 Rune *rp, *sp, *tsp; 869 Rune c, highc; 870 871 highc = 0; 872 for(tsp = cp; *tsp != seof; tsp++) { 873 if(*tsp == '\\') 874 tsp++; 875 if(*tsp == '\n' || *tsp == '\0') 876 return 0; 877 if (*tsp > highc) 878 highc = *tsp; 879 } 880 tsp++; 881 if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil) 882 quit("Out of memory"); 883 *rp++ = highc; /* save upper bound */ 884 for (i = 0; i <= highc; i++) 885 rp[i] = i; 886 sp = cp; 887 while((c = *sp++) != seof) { 888 if(c == '\\' && *sp == 'n') { 889 sp++; 890 c = '\n'; 891 } 892 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { 893 rp[c] = '\n'; 894 tsp++; 895 } 896 if(rp[c] == seof || rp[c] == '\0') { 897 free(r->text); 898 r->text = nil; 899 return 0; 900 } 901 } 902 if(*tsp != seof) { 903 free(r->text); 904 r->text = nil; 905 return 0; 906 } 907 cp = tsp+1; 908 return 1; 909 } 910 911 void 912 execute(void) 913 { 914 SedCom *ipc; 915 916 while (spend = gline(linebuf)){ 917 for(ipc = pspace; ipc->command; ) { 918 if (!executable(ipc)) { 919 ipc++; 920 continue; 921 } 922 command(ipc); 923 924 if(delflag) 925 break; 926 if(jflag) { 927 jflag = 0; 928 if((ipc = ipc->lb1) == 0) 929 break; 930 } else 931 ipc++; 932 } 933 if(!nflag && !delflag) 934 putline(&fout, linebuf, spend - linebuf); 935 if(aptr > abuf) 936 arout(); 937 delflag = 0; 938 } 939 } 940 941 /* determine if a statement should be applied to an input line */ 942 int 943 executable(SedCom *ipc) 944 { 945 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ 946 if (ipc->active == 1) /* Second line */ 947 ipc->active = 2; 948 switch(ipc->ad2.type) { 949 case A_NONE: /* No second addr; use first */ 950 ipc->active = 0; 951 break; 952 case A_DOL: /* Accept everything */ 953 return !ipc->negfl; 954 case A_LINE: /* Line at end of range? */ 955 if (lnum <= ipc->ad2.line) { 956 if (ipc->ad2.line == lnum) 957 ipc->active = 0; 958 return !ipc->negfl; 959 } 960 ipc->active = 0; /* out of range */ 961 return ipc->negfl; 962 case A_RE: /* Check for matching R.E. */ 963 if (match(ipc->ad2.rp, linebuf)) 964 ipc->active = 0; 965 return !ipc->negfl; 966 default: 967 quit("Internal error"); 968 } 969 } 970 switch (ipc->ad1.type) { /* Check first address */ 971 case A_NONE: /* Everything matches */ 972 return !ipc->negfl; 973 case A_DOL: /* Only last line */ 974 if (dolflag) 975 return !ipc->negfl; 976 break; 977 case A_LINE: /* Check line number */ 978 if (ipc->ad1.line == lnum) { 979 ipc->active = 1; /* In range */ 980 return !ipc->negfl; 981 } 982 break; 983 case A_RE: /* Check R.E. */ 984 if (match(ipc->ad1.rp, linebuf)) { 985 ipc->active = 1; /* In range */ 986 return !ipc->negfl; 987 } 988 break; 989 default: 990 quit("Internal error"); 991 } 992 return ipc->negfl; 993 } 994 995 int 996 match(Reprog *pattern, Rune *buf) 997 { 998 if (!pattern) 999 return 0; 1000 subexp[0].s.rsp = buf; 1001 subexp[0].e.ep = 0; 1002 if (rregexec(pattern, linebuf, subexp, MAXSUB)) { 1003 loc1 = subexp[0].s.rsp; 1004 loc2 = subexp[0].e.rep; 1005 return 1; 1006 } 1007 loc1 = loc2 = 0; 1008 return 0; 1009 } 1010 1011 int 1012 substitute(SedCom *ipc) 1013 { 1014 int len; 1015 1016 if(!match(ipc->re1, linebuf)) 1017 return 0; 1018 1019 /* 1020 * we have at least one match. some patterns, e.g. '$' or '^', can 1021 * produce 0-length matches, so during a global substitute we must 1022 * bump to the character after a 0-length match to keep from looping. 1023 */ 1024 sflag = 1; 1025 if(ipc->gfl == 0) /* single substitution */ 1026 dosub(ipc->rhs); 1027 else 1028 do{ /* global substitution */ 1029 len = loc2 - loc1; /* length of match */ 1030 dosub(ipc->rhs); /* dosub moves loc2 */ 1031 if(*loc2 == 0) /* end of string */ 1032 break; 1033 if(len == 0) /* zero-length R.E. match */ 1034 loc2++; /* bump over 0-length match */ 1035 if(*loc2 == 0) /* end of string */ 1036 break; 1037 } while(match(ipc->re1, loc2)); 1038 return 1; 1039 } 1040 1041 void 1042 dosub(Rune *rhsbuf) 1043 { 1044 int c, n; 1045 Rune *lp, *sp, *rp; 1046 1047 lp = linebuf; 1048 sp = genbuf; 1049 rp = rhsbuf; 1050 while (lp < loc1) 1051 *sp++ = *lp++; 1052 while(c = *rp++) { 1053 if (c == '&') { 1054 sp = place(sp, loc1, loc2); 1055 continue; 1056 } 1057 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') { 1058 n = c-'0'; 1059 if (subexp[n].s.rsp && subexp[n].e.rep) { 1060 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); 1061 continue; 1062 } 1063 else { 1064 fprint(2, "sed: Invalid back reference \\%d\n",n); 1065 errexit(); 1066 } 1067 } 1068 *sp++ = c; 1069 if (sp >= &genbuf[LBSIZE]) 1070 fprint(2, "sed: Output line too long.\n"); 1071 } 1072 lp = loc2; 1073 loc2 = sp - genbuf + linebuf; 1074 while (*sp++ = *lp++) 1075 if (sp >= &genbuf[LBSIZE]) 1076 fprint(2, "sed: Output line too long.\n"); 1077 lp = linebuf; 1078 sp = genbuf; 1079 while (*lp++ = *sp++) 1080 ; 1081 spend = lp - 1; 1082 } 1083 1084 Rune * 1085 place(Rune *sp, Rune *l1, Rune *l2) 1086 { 1087 while (l1 < l2) { 1088 *sp++ = *l1++; 1089 if (sp >= &genbuf[LBSIZE]) 1090 fprint(2, "sed: Output line too long.\n"); 1091 } 1092 return sp; 1093 } 1094 1095 char * 1096 trans(int c) 1097 { 1098 static char buf[] = "\\x0000"; 1099 static char hex[] = "0123456789abcdef"; 1100 1101 switch(c) { 1102 case '\b': 1103 return "\\b"; 1104 case '\n': 1105 return "\\n"; 1106 case '\r': 1107 return "\\r"; 1108 case '\t': 1109 return "\\t"; 1110 case '\\': 1111 return "\\\\"; 1112 } 1113 buf[2] = hex[(c>>12)&0xF]; 1114 buf[3] = hex[(c>>8)&0xF]; 1115 buf[4] = hex[(c>>4)&0xF]; 1116 buf[5] = hex[c&0xF]; 1117 return buf; 1118 } 1119 1120 void 1121 command(SedCom *ipc) 1122 { 1123 int i, c; 1124 char *ucp; 1125 Rune *execp, *p1, *p2, *rp; 1126 1127 switch(ipc->command) { 1128 case ACOM: 1129 *aptr++ = ipc; 1130 if(aptr >= abuf+MAXADDS) 1131 quit("sed: Too many appends after line %ld\n", 1132 (char *)lnum); 1133 *aptr = 0; 1134 break; 1135 case CCOM: 1136 delflag = 1; 1137 if(ipc->active == 1) { 1138 for(rp = ipc->text; *rp; rp++) 1139 Bputrune(&fout, *rp); 1140 Bputc(&fout, '\n'); 1141 } 1142 break; 1143 case DCOM: 1144 delflag++; 1145 break; 1146 case CDCOM: 1147 p1 = p2 = linebuf; 1148 while(*p1 != '\n') { 1149 if(*p1++ == 0) { 1150 delflag++; 1151 return; 1152 } 1153 } 1154 p1++; 1155 while(*p2++ = *p1++) 1156 ; 1157 spend = p2 - 1; 1158 jflag++; 1159 break; 1160 case EQCOM: 1161 Bprint(&fout, "%ld\n", lnum); 1162 break; 1163 case GCOM: 1164 p1 = linebuf; 1165 p2 = holdsp; 1166 while(*p1++ = *p2++) 1167 ; 1168 spend = p1 - 1; 1169 break; 1170 case CGCOM: 1171 *spend++ = '\n'; 1172 p1 = spend; 1173 p2 = holdsp; 1174 while(*p1++ = *p2++) 1175 if(p1 >= lbend) 1176 break; 1177 spend = p1 - 1; 1178 break; 1179 case HCOM: 1180 p1 = holdsp; 1181 p2 = linebuf; 1182 while(*p1++ = *p2++); 1183 hspend = p1 - 1; 1184 break; 1185 case CHCOM: 1186 *hspend++ = '\n'; 1187 p1 = hspend; 1188 p2 = linebuf; 1189 while(*p1++ = *p2++) 1190 if(p1 >= hend) 1191 break; 1192 hspend = p1 - 1; 1193 break; 1194 case ICOM: 1195 for(rp = ipc->text; *rp; rp++) 1196 Bputrune(&fout, *rp); 1197 Bputc(&fout, '\n'); 1198 break; 1199 case BCOM: 1200 jflag = 1; 1201 break; 1202 case LCOM: 1203 c = 0; 1204 for (i = 0, rp = linebuf; *rp; rp++) { 1205 c = *rp; 1206 if(c >= 0x20 && c < 0x7F && c != '\\') { 1207 Bputc(&fout, c); 1208 if(i++ > 71) { 1209 Bprint(&fout, "\\\n"); 1210 i = 0; 1211 } 1212 } else { 1213 for (ucp = trans(*rp); *ucp; ucp++){ 1214 c = *ucp; 1215 Bputc(&fout, c); 1216 if(i++ > 71) { 1217 Bprint(&fout, "\\\n"); 1218 i = 0; 1219 } 1220 } 1221 } 1222 } 1223 if(c == ' ') 1224 Bprint(&fout, "\\n"); 1225 Bputc(&fout, '\n'); 1226 break; 1227 case NCOM: 1228 if(!nflag) 1229 putline(&fout, linebuf, spend-linebuf); 1230 1231 if(aptr > abuf) 1232 arout(); 1233 if((execp = gline(linebuf)) == 0) { 1234 delflag = 1; 1235 break; 1236 } 1237 spend = execp; 1238 break; 1239 case CNCOM: 1240 if(aptr > abuf) 1241 arout(); 1242 *spend++ = '\n'; 1243 if((execp = gline(spend)) == 0) { 1244 delflag = 1; 1245 break; 1246 } 1247 spend = execp; 1248 break; 1249 case PCOM: 1250 putline(&fout, linebuf, spend-linebuf); 1251 break; 1252 case CPCOM: 1253 cpcom: 1254 for(rp = linebuf; *rp && *rp != '\n'; rp++) 1255 Bputc(&fout, *rp); 1256 Bputc(&fout, '\n'); 1257 break; 1258 case QCOM: 1259 if(!nflag) 1260 putline(&fout, linebuf, spend-linebuf); 1261 if(aptr > abuf) 1262 arout(); 1263 exits(0); 1264 case RCOM: 1265 *aptr++ = ipc; 1266 if(aptr >= &abuf[MAXADDS]) 1267 quit("sed: Too many reads after line %ld\n", 1268 (char *)lnum); 1269 *aptr = 0; 1270 break; 1271 case SCOM: 1272 i = substitute(ipc); 1273 if(i && ipc->pfl) 1274 if(ipc->pfl == 1) 1275 putline(&fout, linebuf, spend-linebuf); 1276 else 1277 goto cpcom; 1278 if(i && ipc->fcode) 1279 goto wcom; 1280 break; 1281 1282 case TCOM: 1283 if(sflag) { 1284 sflag = 0; 1285 jflag = 1; 1286 } 1287 break; 1288 1289 case WCOM: 1290 wcom: 1291 putline(ipc->fcode,linebuf, spend - linebuf); 1292 break; 1293 case XCOM: 1294 p1 = linebuf; 1295 p2 = genbuf; 1296 while(*p2++ = *p1++) 1297 ; 1298 p1 = holdsp; 1299 p2 = linebuf; 1300 while(*p2++ = *p1++) 1301 ; 1302 spend = p2 - 1; 1303 p1 = genbuf; 1304 p2 = holdsp; 1305 while(*p2++ = *p1++) 1306 ; 1307 hspend = p2 - 1; 1308 break; 1309 case YCOM: 1310 p1 = linebuf; 1311 p2 = ipc->text; 1312 for (i = *p2++; *p1; p1++) 1313 if (*p1 <= i) 1314 *p1 = p2[*p1]; 1315 break; 1316 } 1317 } 1318 1319 void 1320 putline(Biobuf *bp, Rune *buf, int n) 1321 { 1322 while (n--) 1323 Bputrune(bp, *buf++); 1324 Bputc(bp, '\n'); 1325 } 1326 ecmp(Rune *a, Rune *b, int count) 1327 { 1328 while(count--) 1329 if(*a++ != *b++) 1330 return 0; 1331 return 1; 1332 } 1333 1334 void 1335 arout(void) 1336 { 1337 int c; 1338 char *s; 1339 char buf[128]; 1340 Rune *p1; 1341 Biobuf *fi; 1342 1343 for (aptr = abuf; *aptr; aptr++) { 1344 if((*aptr)->command == ACOM) { 1345 for(p1 = (*aptr)->text; *p1; p1++ ) 1346 Bputrune(&fout, *p1); 1347 Bputc(&fout, '\n'); 1348 } else { 1349 for(s = buf, p1 = (*aptr)->text; *p1; p1++) 1350 s += runetochar(s, p1); 1351 *s = '\0'; 1352 if((fi = Bopen(buf, OREAD)) == 0) 1353 continue; 1354 while((c = Bgetc(fi)) >= 0) 1355 Bputc(&fout, c); 1356 Bterm(fi); 1357 } 1358 } 1359 aptr = abuf; 1360 *aptr = 0; 1361 } 1362 1363 void 1364 errexit(void) 1365 { 1366 exits("error"); 1367 } 1368 1369 void 1370 quit(char *fmt, ...) 1371 { 1372 char *p, *ep; 1373 char msg[256]; 1374 va_list arg; 1375 1376 ep = msg + sizeof msg; 1377 p = seprint(msg, ep, "sed: "); 1378 va_start(arg, fmt); 1379 p = vseprint(p, ep, fmt, arg); 1380 va_end(arg); 1381 p = seprint(p, ep, "\n"); 1382 write(2, msg, p - msg); 1383 errexit(); 1384 } 1385 1386 Rune * 1387 gline(Rune *addr) 1388 { 1389 long c; 1390 Rune *p; 1391 static long peekc = 0; 1392 1393 if (f == 0 && opendata() < 0) 1394 return 0; 1395 sflag = 0; 1396 lnum++; 1397 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ 1398 do { 1399 p = addr; 1400 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) { 1401 if (c == '\n') { 1402 if ((peekc = Bgetrune(f)) < 0 && fhead == 0) 1403 dolflag = 1; 1404 *p = '\0'; 1405 return p; 1406 } 1407 if (c && p < lbend) 1408 *p++ = c; 1409 } 1410 /* return partial final line, adding implicit newline */ 1411 if(p != addr) { 1412 *p = '\0'; 1413 peekc = -1; 1414 if (fhead == 0) 1415 dolflag = 1; 1416 return p; 1417 } 1418 peekc = 0; 1419 Bterm(f); 1420 } while (opendata() > 0); /* Switch to next stream */ 1421 f = 0; 1422 return 0; 1423 } 1424 1425 /* 1426 * Data file input section - the intent is to transparently 1427 * catenate all data input streams. 1428 */ 1429 void 1430 enroll(char *filename) /* Add a file to the input file cache */ 1431 { 1432 FileCache *fp; 1433 1434 if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil) 1435 quit("Out of memory"); 1436 if (ftail == nil) 1437 fhead = fp; 1438 else 1439 ftail->next = fp; 1440 ftail = fp; 1441 fp->next = nil; 1442 fp->name = filename; /* 0 => stdin */ 1443 } 1444 1445 int 1446 opendata(void) 1447 { 1448 if (fhead == nil) 1449 return -1; 1450 if (fhead->name) { 1451 if ((f = Bopen(fhead->name, OREAD)) == nil) 1452 quit("Can't open %s", fhead->name); 1453 } else { 1454 Binit(&bstdin, 0, OREAD); 1455 f = &bstdin; 1456 } 1457 fhead = fhead->next; 1458 return 1; 1459 } 1460