1 /* 2 * sed -- stream editor 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <bio.h> 7 #include <regexp.h> 8 9 enum { 10 DEPTH = 20, /* max nesting depth of {} */ 11 MAXCMDS = 512, /* max sed commands */ 12 ADDSIZE = 10000, /* size of add & read buffer */ 13 MAXADDS = 20, /* max pending adds and reads */ 14 LBSIZE = 8192, /* input line size */ 15 LABSIZE = 50, /* max number of labels */ 16 MAXSUB = 10, /* max number of sub reg exp */ 17 MAXFILES = 120, /* max output files */ 18 }; 19 20 /* 21 * An address is a line #, a R.E., "$", a reference to the last 22 * R.E., or nothing. 23 */ 24 typedef struct { 25 enum { 26 A_NONE, 27 A_DOL, 28 A_LINE, 29 A_RE, 30 A_LAST, 31 }type; 32 union { 33 long line; /* Line # */ 34 Reprog *rp; /* Compiled R.E. */ 35 }; 36 } Addr; 37 38 typedef struct SEDCOM { 39 Addr ad1; /* optional start address */ 40 Addr ad2; /* optional end address */ 41 union { 42 Reprog *re1; /* compiled R.E. */ 43 Rune *text; /* added text or file name */ 44 struct SEDCOM *lb1; /* destination command of branch */ 45 }; 46 Rune *rhs; /* Right-hand side of substitution */ 47 Biobuf* fcode; /* File ID for read and write */ 48 char command; /* command code -see below */ 49 char gfl; /* 'Global' flag for substitutions */ 50 char pfl; /* 'print' flag for substitutions */ 51 char active; /* 1 => data between start and end */ 52 char negfl; /* negation flag */ 53 } SedCom; 54 55 /* Command Codes for field SedCom.command */ 56 #define ACOM 01 57 #define BCOM 020 58 #define CCOM 02 59 #define CDCOM 025 60 #define CNCOM 022 61 #define COCOM 017 62 #define CPCOM 023 63 #define DCOM 03 64 #define ECOM 015 65 #define EQCOM 013 66 #define FCOM 016 67 #define GCOM 027 68 #define CGCOM 030 69 #define HCOM 031 70 #define CHCOM 032 71 #define ICOM 04 72 #define LCOM 05 73 #define NCOM 012 74 #define PCOM 010 75 #define QCOM 011 76 #define RCOM 06 77 #define SCOM 07 78 #define TCOM 021 79 #define WCOM 014 80 #define CWCOM 024 81 #define YCOM 026 82 #define XCOM 033 83 84 typedef struct label { /* Label symbol table */ 85 Rune uninm[9]; /* Label name */ 86 SedCom *chain; 87 SedCom *address; /* Command associated with label */ 88 } Label; 89 90 typedef struct FILE_CACHE { /* Data file control block */ 91 struct FILE_CACHE *next; /* Forward Link */ 92 char *name; /* Name of file */ 93 } FileCache; 94 95 SedCom pspace[MAXCMDS]; /* Command storage */ 96 SedCom *pend = pspace+MAXCMDS; /* End of command storage */ 97 SedCom *rep = pspace; /* Current fill point */ 98 99 Reprog *lastre = 0; /* Last regular expression */ 100 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ 101 102 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ 103 Rune *addend = addspace+ADDSIZE; 104 105 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ 106 SedCom **aptr = abuf; 107 108 struct { /* Sed program input control block */ 109 enum PTYPE { /* Either on command line or in file */ 110 P_ARG, 111 P_FILE, 112 } type; 113 union PCTL { /* Pointer to data */ 114 Biobuf *bp; 115 char *curr; 116 }; 117 } prog; 118 119 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ 120 121 FileCache *fhead = 0; /* Head of File Cache Chain */ 122 FileCache *ftail = 0; /* Tail of File Cache Chain */ 123 124 Rune *loc1; /* Start of pattern match */ 125 Rune *loc2; /* End of pattern match */ 126 Rune seof; /* Pattern delimiter char */ 127 128 Rune linebuf[LBSIZE+1]; /* Input data buffer */ 129 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ 130 Rune *spend = linebuf; /* End of input data */ 131 Rune *cp; /* Current scan point in linebuf */ 132 133 Rune holdsp[LBSIZE+1]; /* Hold buffer */ 134 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ 135 Rune *hspend = holdsp; /* End of hold data */ 136 137 int nflag; /* Command line flags */ 138 int gflag; 139 140 int dolflag; /* Set when at true EOF */ 141 int sflag; /* Set when substitution done */ 142 int jflag; /* Set when jump required */ 143 int delflag; /* Delete current line when set */ 144 145 long lnum = 0; /* Input line count */ 146 147 char fname[MAXFILES][40]; /* File name cache */ 148 Biobuf *fcode[MAXFILES]; /* File ID cache */ 149 int nfiles = 0; /* Cache fill point */ 150 151 Biobuf fout; /* Output stream */ 152 Biobuf stdin; /* Default input */ 153 Biobuf* f = 0; /* Input data */ 154 155 Label ltab[LABSIZE]; /* Label name symbol table */ 156 Label *labend = ltab+LABSIZE; /* End of label table */ 157 Label *lab = ltab+1; /* Current Fill point */ 158 159 int depth = 0; /* {} stack pointer */ 160 161 Rune bad; /* Dummy err ptr reference */ 162 Rune *badp = &bad; 163 164 165 char CGMES[] = "%S command garbled: %S"; 166 char TMMES[] = "Too much text: %S"; 167 char LTL[] = "Label too long: %S"; 168 char AD0MES[] = "No addresses allowed: %S"; 169 char AD1MES[] = "Only one address allowed: %S"; 170 171 void address(Addr *); 172 void arout(void); 173 int cmp(char *, char *); 174 int rcmp(Rune *, Rune *); 175 void command(SedCom *); 176 Reprog *compile(void); 177 Rune *compsub(Rune *, Rune *); 178 void dechain(void); 179 void dosub(Rune *); 180 int ecmp(Rune *, Rune *, int); 181 void enroll(char *); 182 void errexit(void); 183 int executable(SedCom *); 184 void execute(void); 185 void fcomp(void); 186 long getrune(void); 187 Rune *gline(Rune *); 188 int match(Reprog *, Rune *); 189 void newfile(enum PTYPE, char *); 190 int opendata(void); 191 Biobuf *open_file(char *); 192 Rune *place(Rune *, Rune *, Rune *); 193 void quit(char *, ...); 194 int rline(Rune *, Rune *); 195 Label *search(Label *); 196 int substitute(SedCom *); 197 char *text(char *); 198 Rune *stext(Rune *, Rune *); 199 int ycomp(SedCom *); 200 char * trans(int c); 201 void putline(Biobuf *bp, Rune *buf, int n); 202 203 void 204 main(int argc, char **argv) 205 { 206 int compfl; 207 208 lnum = 0; 209 Binit(&fout, 1, OWRITE); 210 fcode[nfiles++] = &fout; 211 compfl = 0; 212 213 if(argc == 1) 214 exits(0); 215 ARGBEGIN{ 216 case 'e': 217 if (argc <= 1) 218 quit("missing pattern"); 219 newfile(P_ARG, ARGF()); 220 fcomp(); 221 compfl = 1; 222 continue; 223 case 'f': 224 if(argc <= 1) 225 quit("no pattern-file"); 226 newfile(P_FILE, ARGF()); 227 fcomp(); 228 compfl = 1; 229 continue; 230 case 'g': 231 gflag++; 232 continue; 233 case 'n': 234 nflag++; 235 continue; 236 default: 237 fprint(2, "sed: Unknown flag: %c\n", ARGC()); 238 continue; 239 } ARGEND 240 241 if(compfl == 0) { 242 if (--argc < 0) 243 quit("missing pattern"); 244 newfile(P_ARG, *argv++); 245 fcomp(); 246 } 247 248 if(depth) 249 quit("Too many {'s"); 250 251 ltab[0].address = rep; 252 253 dechain(); 254 255 if(argc <= 0) 256 enroll(0); /* Add stdin to cache */ 257 else 258 while(--argc >= 0) 259 enroll(*argv++); 260 execute(); 261 exits(0); 262 } 263 264 void 265 fcomp(void) 266 { 267 int i; 268 Label *lpt; 269 Rune *tp; 270 SedCom *pt, *pt1; 271 static Rune *p = addspace; 272 static SedCom **cmpend[DEPTH]; /* stack of {} operations */ 273 274 while (rline(linebuf, lbend) >= 0) { 275 cp = linebuf; 276 comploop: 277 while(*cp == L' ' || *cp == L'\t') 278 cp++; 279 if(*cp == L'\0' || *cp == L'#') 280 continue; 281 if(*cp == L';') { 282 cp++; 283 goto comploop; 284 } 285 286 address(&rep->ad1); 287 if (rep->ad1.type != A_NONE) { 288 if (rep->ad1.type == A_LAST) { 289 if (!lastre) 290 quit("First RE may not be null"); 291 rep->ad1.type = A_RE; 292 rep->ad1.rp = lastre; 293 } 294 if(*cp == L',' || *cp == L';') { 295 cp++; 296 address(&rep->ad2); 297 if (rep->ad2.type == A_LAST) { 298 rep->ad2.type = A_RE; 299 rep->ad2.rp = lastre; 300 } 301 } else 302 rep->ad2.type = A_NONE; 303 } 304 while(*cp == L' ' || *cp == L'\t') 305 cp++; 306 307 swit: 308 switch(*cp++) { 309 default: 310 quit("Unrecognized command: %S", linebuf); 311 312 case '!': 313 rep->negfl = 1; 314 goto swit; 315 316 case '{': 317 rep->command = BCOM; 318 rep->negfl = !rep->negfl; 319 cmpend[depth++] = &rep->lb1; 320 if(++rep >= pend) 321 quit("Too many commands: %S", linebuf); 322 if(*cp == '\0') 323 continue; 324 goto comploop; 325 326 case '}': 327 if(rep->ad1.type != A_NONE) 328 quit(AD0MES, linebuf); 329 if(--depth < 0) 330 quit("Too many }'s"); 331 *cmpend[depth] = rep; 332 if(*cp == 0) 333 continue; 334 goto comploop; 335 336 case '=': 337 rep->command = EQCOM; 338 if(rep->ad2.type != A_NONE) 339 quit(AD1MES, linebuf); 340 break; 341 342 case ':': 343 if(rep->ad1.type != A_NONE) 344 quit(AD0MES, linebuf); 345 346 while(*cp == L' ') 347 cp++; 348 tp = lab->uninm; 349 while (*cp && *cp != L';' && *cp != L' ' && 350 *cp != L'\t' && *cp != L'#') { 351 *tp++ = *cp++; 352 if(tp >= &lab->uninm[8]) 353 quit(LTL, linebuf); 354 } 355 *tp = L'\0'; 356 357 if (*lab->uninm == L'\0') /* no label? */ 358 quit(CGMES, L":", linebuf); 359 if(lpt = search(lab)) { 360 if(lpt->address) 361 quit("Duplicate labels: %S", linebuf); 362 } else { 363 lab->chain = 0; 364 lpt = lab; 365 if(++lab >= labend) 366 quit("Too many labels: %S", linebuf); 367 } 368 lpt->address = rep; 369 if (*cp == L'#') 370 continue; 371 rep--; /* reuse this slot */ 372 break; 373 374 case 'a': 375 rep->command = ACOM; 376 if(rep->ad2.type != A_NONE) 377 quit(AD1MES, linebuf); 378 if(*cp == L'\\') 379 cp++; 380 if(*cp++ != L'\n') 381 quit(CGMES, L"a", linebuf); 382 rep->text = p; 383 p = stext(p, addend); 384 break; 385 case 'c': 386 rep->command = CCOM; 387 if(*cp == L'\\') 388 cp++; 389 if(*cp++ != L'\n') 390 quit(CGMES, L"c", linebuf); 391 rep->text = p; 392 p = stext(p, addend); 393 break; 394 case 'i': 395 rep->command = ICOM; 396 if(rep->ad2.type != A_NONE) 397 quit(AD1MES, linebuf); 398 if(*cp == L'\\') 399 cp++; 400 if(*cp++ != L'\n') 401 quit(CGMES, L"i", linebuf); 402 rep->text = p; 403 p = stext(p, addend); 404 break; 405 406 case 'g': 407 rep->command = GCOM; 408 break; 409 410 case 'G': 411 rep->command = CGCOM; 412 break; 413 414 case 'h': 415 rep->command = HCOM; 416 break; 417 418 case 'H': 419 rep->command = CHCOM; 420 break; 421 422 case 't': 423 rep->command = TCOM; 424 goto jtcommon; 425 426 case 'b': 427 rep->command = BCOM; 428 jtcommon: 429 while(*cp == L' ') 430 cp++; 431 if(*cp == L'\0' || *cp == L';') { 432 /* no label; jump to end */ 433 if(pt = ltab[0].chain) { 434 while((pt1 = pt->lb1) != nil) 435 pt = pt1; 436 pt->lb1 = rep; 437 } else 438 ltab[0].chain = rep; 439 break; 440 } 441 442 /* copy label into lab->uninm */ 443 tp = lab->uninm; 444 while((*tp = *cp++) != L'\0' && *tp != L';') 445 if(++tp >= &lab->uninm[8]) 446 quit(LTL, linebuf); 447 cp--; 448 *tp = L'\0'; 449 450 if (*lab->uninm == L'\0') 451 /* shouldn't get here */ 452 quit(CGMES, L"b or t", linebuf); 453 if((lpt = search(lab)) != nil) { 454 if(lpt->address) 455 rep->lb1 = lpt->address; 456 else { 457 for(pt = lpt->chain; pt != nil && 458 (pt1 = pt->lb1) != nil; pt = pt1) 459 ; 460 if (pt) 461 pt->lb1 = rep; 462 } 463 } else { /* add new label */ 464 lab->chain = rep; 465 lab->address = 0; 466 if(++lab >= labend) 467 quit("Too many labels: %S", linebuf); 468 } 469 break; 470 471 case 'n': 472 rep->command = NCOM; 473 break; 474 475 case 'N': 476 rep->command = CNCOM; 477 break; 478 479 case 'p': 480 rep->command = PCOM; 481 break; 482 483 case 'P': 484 rep->command = CPCOM; 485 break; 486 487 case 'r': 488 rep->command = RCOM; 489 if(rep->ad2.type != A_NONE) 490 quit(AD1MES, linebuf); 491 if(*cp++ != L' ') 492 quit(CGMES, L"r", linebuf); 493 rep->text = p; 494 p = stext(p, addend); 495 break; 496 497 case 'd': 498 rep->command = DCOM; 499 break; 500 501 case 'D': 502 rep->command = CDCOM; 503 rep->lb1 = pspace; 504 break; 505 506 case 'q': 507 rep->command = QCOM; 508 if(rep->ad2.type != A_NONE) 509 quit(AD1MES, linebuf); 510 break; 511 512 case 'l': 513 rep->command = LCOM; 514 break; 515 516 case 's': 517 rep->command = SCOM; 518 seof = *cp++; 519 if ((rep->re1 = compile()) == 0) { 520 if(!lastre) 521 quit("First RE may not be null."); 522 rep->re1 = lastre; 523 } 524 rep->rhs = p; 525 if((p = compsub(p, addend)) == 0) 526 quit(CGMES, L"s", linebuf); 527 if(*cp == L'g') { 528 cp++; 529 rep->gfl++; 530 } else if(gflag) 531 rep->gfl++; 532 533 if(*cp == L'p') { 534 cp++; 535 rep->pfl = 1; 536 } 537 538 if(*cp == L'P') { 539 cp++; 540 rep->pfl = 2; 541 } 542 543 if(*cp == L'w') { 544 cp++; 545 if(*cp++ != L' ') 546 quit(CGMES, L"s", linebuf); 547 text(fname[nfiles]); 548 for(i = nfiles - 1; i >= 0; i--) 549 if(cmp(fname[nfiles], fname[i]) == 0) { 550 rep->fcode = fcode[i]; 551 goto done; 552 } 553 if(nfiles >= MAXFILES) 554 quit("Too many files in w commands 1"); 555 rep->fcode = open_file(fname[nfiles]); 556 } 557 break; 558 559 case 'w': 560 rep->command = WCOM; 561 if(*cp++ != L' ') 562 quit(CGMES, L"w", linebuf); 563 text(fname[nfiles]); 564 for(i = nfiles - 1; i >= 0; i--) 565 if(cmp(fname[nfiles], fname[i]) == 0) { 566 rep->fcode = fcode[i]; 567 goto done; 568 } 569 if(nfiles >= MAXFILES){ 570 fprint(2, "sed: Too many files in w commands 2 \n"); 571 fprint(2, "nfiles = %d; MAXF = %d\n", 572 nfiles, MAXFILES); 573 errexit(); 574 } 575 rep->fcode = open_file(fname[nfiles]); 576 break; 577 578 case 'x': 579 rep->command = XCOM; 580 break; 581 582 case 'y': 583 rep->command = YCOM; 584 seof = *cp++; 585 if (ycomp(rep) == 0) 586 quit(CGMES, L"y", linebuf); 587 break; 588 589 } 590 done: 591 if(++rep >= pend) 592 quit("Too many commands, last: %S", linebuf); 593 if(*cp++ != L'\0') { 594 if(cp[-1] == L';') 595 goto comploop; 596 quit(CGMES, cp - 1, linebuf); 597 } 598 } 599 } 600 601 Biobuf * 602 open_file(char *name) 603 { 604 int fd; 605 Biobuf *bp; 606 607 if ((bp = malloc(sizeof(Biobuf))) == 0) 608 quit("Out of memory"); 609 if ((fd = open(name, OWRITE)) < 0 && 610 (fd = create(name, OWRITE, 0666)) < 0) 611 quit("Cannot create %s", name); 612 Binit(bp, fd, OWRITE); 613 Bseek(bp, 0, 2); 614 fcode[nfiles++] = bp; 615 return bp; 616 } 617 618 Rune * 619 compsub(Rune *rhs, Rune *end) 620 { 621 Rune r; 622 623 while ((r = *cp++) != '\0') { 624 if(r == '\\') { 625 if (rhs < end) 626 *rhs++ = Runemax; 627 else 628 return 0; 629 r = *cp++; 630 if(r == 'n') 631 r = '\n'; 632 } else { 633 if(r == seof) { 634 if (rhs < end) 635 *rhs++ = '\0'; 636 else 637 return 0; 638 return rhs; 639 } 640 } 641 if (rhs < end) 642 *rhs++ = r; 643 else 644 return 0; 645 } 646 return 0; 647 } 648 649 Reprog * 650 compile(void) 651 { 652 Rune c; 653 char *ep; 654 char expbuf[512]; 655 656 if((c = *cp++) == seof) /* L'//' */ 657 return 0; 658 ep = expbuf; 659 do { 660 if (c == L'\0' || c == L'\n') 661 quit(TMMES, linebuf); 662 if (c == L'\\') { 663 if (ep >= expbuf+sizeof(expbuf)) 664 quit(TMMES, linebuf); 665 ep += runetochar(ep, &c); 666 if ((c = *cp++) == L'n') 667 c = L'\n'; 668 } 669 if (ep >= expbuf + sizeof(expbuf)) 670 quit(TMMES, linebuf); 671 ep += runetochar(ep, &c); 672 } while ((c = *cp++) != seof); 673 *ep = 0; 674 return lastre = regcomp(expbuf); 675 } 676 677 void 678 regerror(char *s) 679 { 680 USED(s); 681 quit(CGMES, L"r.e.-using", linebuf); 682 } 683 684 void 685 newfile(enum PTYPE type, char *name) 686 { 687 if (type == P_ARG) 688 prog.curr = name; 689 else if ((prog.bp = Bopen(name, OREAD)) == 0) 690 quit("Cannot open pattern-file: %s\n", name); 691 prog.type = type; 692 } 693 694 int 695 rline(Rune *buf, Rune *end) 696 { 697 long c; 698 Rune r; 699 700 while ((c = getrune()) >= 0) { 701 r = c; 702 if (r == '\\') { 703 if (buf <= end) 704 *buf++ = r; 705 if ((c = getrune()) < 0) 706 break; 707 r = c; 708 } else if (r == '\n') { 709 *buf = '\0'; 710 return 1; 711 } 712 if (buf <= end) 713 *buf++ = r; 714 } 715 *buf = '\0'; 716 return -1; 717 } 718 719 long 720 getrune(void) 721 { 722 long c; 723 Rune r; 724 char *p; 725 726 if (prog.type == P_ARG) { 727 if ((p = prog.curr) != 0) { 728 if (*p) { 729 prog.curr += chartorune(&r, p); 730 c = r; 731 } else { 732 c = '\n'; /* fake an end-of-line */ 733 prog.curr = 0; 734 } 735 } else 736 c = -1; 737 } else if ((c = Bgetrune(prog.bp)) < 0) 738 Bterm(prog.bp); 739 return c; 740 } 741 742 void 743 address(Addr *ap) 744 { 745 int c; 746 long lno; 747 748 if((c = *cp++) == '$') 749 ap->type = A_DOL; 750 else if(c == '/') { 751 seof = c; 752 if (ap->rp = compile()) 753 ap->type = A_RE; 754 else 755 ap->type = A_LAST; 756 } 757 else if (c >= '0' && c <= '9') { 758 lno = c - '0'; 759 while ((c = *cp) >= '0' && c <= '9') 760 lno = lno*10 + *cp++ - '0'; 761 if(!lno) 762 quit("line number 0 is illegal",0); 763 ap->type = A_LINE; 764 ap->line = lno; 765 } 766 else { 767 cp--; 768 ap->type = A_NONE; 769 } 770 } 771 772 cmp(char *a, char *b) /* compare characters */ 773 { 774 while(*a == *b++) 775 if (*a == '\0') 776 return 0; 777 else 778 a++; 779 return 1; 780 } 781 rcmp(Rune *a, Rune *b) /* compare runes */ 782 { 783 while(*a == *b++) 784 if (*a == '\0') 785 return 0; 786 else 787 a++; 788 return 1; 789 } 790 791 char * 792 text(char *p) /* extract character string */ 793 { 794 Rune r; 795 796 while(*cp == ' ' || *cp == '\t') 797 cp++; 798 while (*cp) { 799 if ((r = *cp++) == '\\' && (r = *cp++) == '\0') 800 break; 801 if (r == '\n') 802 while (*cp == ' ' || *cp == '\t') 803 cp++; 804 p += runetochar(p, &r); 805 } 806 *p++ = '\0'; 807 return p; 808 } 809 810 Rune * 811 stext(Rune *p, Rune *end) /* extract rune string */ 812 { 813 while(*cp == L' ' || *cp == L'\t') 814 cp++; 815 while (*cp) { 816 if (*cp == L'\\' && *++cp == L'\0') 817 break; 818 if (p >= end-1) 819 quit(TMMES, linebuf); 820 if ((*p++ = *cp++) == L'\n') 821 while(*cp == L' ' || *cp == L'\t') 822 cp++; 823 } 824 *p++ = 0; 825 return p; 826 } 827 828 829 Label * 830 search(Label *ptr) 831 { 832 Label *rp; 833 834 for (rp = ltab; rp < ptr; rp++) 835 if(rcmp(rp->uninm, ptr->uninm) == 0) 836 return(rp); 837 return(0); 838 } 839 840 void 841 dechain(void) 842 { 843 Label *lptr; 844 SedCom *rptr, *trptr; 845 846 for(lptr = ltab; lptr < lab; lptr++) { 847 if(lptr->address == 0) 848 quit("Undefined label: %S", lptr->uninm); 849 if(lptr->chain) { 850 rptr = lptr->chain; 851 while((trptr = rptr->lb1) != nil) { 852 rptr->lb1 = lptr->address; 853 rptr = trptr; 854 } 855 rptr->lb1 = lptr->address; 856 } 857 } 858 } 859 860 int 861 ycomp(SedCom *r) 862 { 863 int i; 864 Rune *rp, *sp, *tsp; 865 Rune c, highc; 866 867 highc = 0; 868 for(tsp = cp; *tsp != seof; tsp++) { 869 if(*tsp == L'\\') 870 tsp++; 871 if(*tsp == L'\n' || *tsp == L'\0') 872 return 0; 873 if (*tsp > highc) 874 highc = *tsp; 875 } 876 tsp++; 877 if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil) 878 quit("Out of memory"); 879 *rp++ = highc; /* save upper bound */ 880 for (i = 0; i <= highc; i++) 881 rp[i] = i; 882 sp = cp; 883 while((c = *sp++) != seof) { 884 if(c == L'\\' && *sp == L'n') { 885 sp++; 886 c = L'\n'; 887 } 888 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') { 889 rp[c] = L'\n'; 890 tsp++; 891 } 892 if(rp[c] == seof || rp[c] == L'\0') { 893 free(r->re1); 894 r->re1 = nil; 895 return 0; 896 } 897 } 898 if(*tsp != seof) { 899 free(r->re1); 900 r->re1 = nil; 901 return 0; 902 } 903 cp = tsp+1; 904 return 1; 905 } 906 907 void 908 execute(void) 909 { 910 SedCom *ipc; 911 912 while (spend = gline(linebuf)){ 913 for(ipc = pspace; ipc->command; ) { 914 if (!executable(ipc)) { 915 ipc++; 916 continue; 917 } 918 command(ipc); 919 920 if(delflag) 921 break; 922 if(jflag) { 923 jflag = 0; 924 if((ipc = ipc->lb1) == 0) 925 break; 926 } else 927 ipc++; 928 } 929 if(!nflag && !delflag) 930 putline(&fout, linebuf, spend - linebuf); 931 if(aptr > abuf) 932 arout(); 933 delflag = 0; 934 } 935 } 936 937 /* determine if a statement should be applied to an input line */ 938 int 939 executable(SedCom *ipc) 940 { 941 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ 942 if (ipc->active == 1) /* Second line */ 943 ipc->active = 2; 944 switch(ipc->ad2.type) { 945 case A_NONE: /* No second addr; use first */ 946 ipc->active = 0; 947 break; 948 case A_DOL: /* Accept everything */ 949 return !ipc->negfl; 950 case A_LINE: /* Line at end of range? */ 951 if (lnum <= ipc->ad2.line) { 952 if (ipc->ad2.line == lnum) 953 ipc->active = 0; 954 return !ipc->negfl; 955 } 956 ipc->active = 0; /* out of range */ 957 return ipc->negfl; 958 case A_RE: /* Check for matching R.E. */ 959 if (match(ipc->ad2.rp, linebuf)) 960 ipc->active = 0; 961 return !ipc->negfl; 962 default: 963 quit("Internal error"); 964 } 965 } 966 switch (ipc->ad1.type) { /* Check first address */ 967 case A_NONE: /* Everything matches */ 968 return !ipc->negfl; 969 case A_DOL: /* Only last line */ 970 if (dolflag) 971 return !ipc->negfl; 972 break; 973 case A_LINE: /* Check line number */ 974 if (ipc->ad1.line == lnum) { 975 ipc->active = 1; /* In range */ 976 return !ipc->negfl; 977 } 978 break; 979 case A_RE: /* Check R.E. */ 980 if (match(ipc->ad1.rp, linebuf)) { 981 ipc->active = 1; /* In range */ 982 return !ipc->negfl; 983 } 984 break; 985 default: 986 quit("Internal error"); 987 } 988 return ipc->negfl; 989 } 990 991 int 992 match(Reprog *pattern, Rune *buf) 993 { 994 if (!pattern) 995 return 0; 996 subexp[0].rsp = buf; 997 subexp[0].ep = 0; 998 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) { 999 loc1 = subexp[0].rsp; 1000 loc2 = subexp[0].rep; 1001 return 1; 1002 } 1003 loc1 = loc2 = 0; 1004 return 0; 1005 } 1006 1007 int 1008 substitute(SedCom *ipc) 1009 { 1010 int len; 1011 1012 if(!match(ipc->re1, linebuf)) 1013 return 0; 1014 1015 /* 1016 * we have at least one match. some patterns, e.g. '$' or '^', can 1017 * produce 0-length matches, so during a global substitute we must 1018 * bump to the character after a 0-length match to keep from looping. 1019 */ 1020 sflag = 1; 1021 if(ipc->gfl == 0) /* single substitution */ 1022 dosub(ipc->rhs); 1023 else 1024 do{ /* global substitution */ 1025 len = loc2 - loc1; /* length of match */ 1026 dosub(ipc->rhs); /* dosub moves loc2 */ 1027 if(*loc2 == 0) /* end of string */ 1028 break; 1029 if(len == 0) /* zero-length R.E. match */ 1030 loc2++; /* bump over 0-length match */ 1031 if(*loc2 == 0) /* end of string */ 1032 break; 1033 } while(match(ipc->re1, loc2)); 1034 return 1; 1035 } 1036 1037 void 1038 dosub(Rune *rhsbuf) 1039 { 1040 int c, n; 1041 Rune *lp, *sp, *rp; 1042 1043 lp = linebuf; 1044 sp = genbuf; 1045 rp = rhsbuf; 1046 while (lp < loc1) 1047 *sp++ = *lp++; 1048 while(c = *rp++) { 1049 if (c == '&') { 1050 sp = place(sp, loc1, loc2); 1051 continue; 1052 } 1053 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') { 1054 n = c-'0'; 1055 if (subexp[n].rsp && subexp[n].rep) { 1056 sp = place(sp, subexp[n].rsp, subexp[n].rep); 1057 continue; 1058 } 1059 else { 1060 fprint(2, "sed: Invalid back reference \\%d\n",n); 1061 errexit(); 1062 } 1063 } 1064 *sp++ = c; 1065 if (sp >= &genbuf[LBSIZE]) 1066 fprint(2, "sed: Output line too long.\n"); 1067 } 1068 lp = loc2; 1069 loc2 = sp - genbuf + linebuf; 1070 while (*sp++ = *lp++) 1071 if (sp >= &genbuf[LBSIZE]) 1072 fprint(2, "sed: Output line too long.\n"); 1073 lp = linebuf; 1074 sp = genbuf; 1075 while (*lp++ = *sp++) 1076 ; 1077 spend = lp - 1; 1078 } 1079 1080 Rune * 1081 place(Rune *sp, Rune *l1, Rune *l2) 1082 { 1083 while (l1 < l2) { 1084 *sp++ = *l1++; 1085 if (sp >= &genbuf[LBSIZE]) 1086 fprint(2, "sed: Output line too long.\n"); 1087 } 1088 return sp; 1089 } 1090 1091 char * 1092 trans(int c) 1093 { 1094 static char buf[] = "\\x0000"; 1095 static char hex[] = "0123456789abcdef"; 1096 1097 switch(c) { 1098 case '\b': 1099 return "\\b"; 1100 case '\n': 1101 return "\\n"; 1102 case '\r': 1103 return "\\r"; 1104 case '\t': 1105 return "\\t"; 1106 case '\\': 1107 return "\\\\"; 1108 } 1109 buf[2] = hex[(c>>12)&0xF]; 1110 buf[3] = hex[(c>>8)&0xF]; 1111 buf[4] = hex[(c>>4)&0xF]; 1112 buf[5] = hex[c&0xF]; 1113 return buf; 1114 } 1115 1116 void 1117 command(SedCom *ipc) 1118 { 1119 int i, c; 1120 char *ucp; 1121 Rune *execp, *p1, *p2, *rp; 1122 1123 switch(ipc->command) { 1124 case ACOM: 1125 *aptr++ = ipc; 1126 if(aptr >= abuf+MAXADDS) 1127 quit("sed: Too many appends after line %ld\n", 1128 (char *)lnum); 1129 *aptr = 0; 1130 break; 1131 case CCOM: 1132 delflag = 1; 1133 if(ipc->active == 1) { 1134 for(rp = ipc->text; *rp; rp++) 1135 Bputrune(&fout, *rp); 1136 Bputc(&fout, '\n'); 1137 } 1138 break; 1139 case DCOM: 1140 delflag++; 1141 break; 1142 case CDCOM: 1143 p1 = p2 = linebuf; 1144 while(*p1 != '\n') { 1145 if(*p1++ == 0) { 1146 delflag++; 1147 return; 1148 } 1149 } 1150 p1++; 1151 while(*p2++ = *p1++) 1152 ; 1153 spend = p2 - 1; 1154 jflag++; 1155 break; 1156 case EQCOM: 1157 Bprint(&fout, "%ld\n", lnum); 1158 break; 1159 case GCOM: 1160 p1 = linebuf; 1161 p2 = holdsp; 1162 while(*p1++ = *p2++) 1163 ; 1164 spend = p1 - 1; 1165 break; 1166 case CGCOM: 1167 *spend++ = '\n'; 1168 p1 = spend; 1169 p2 = holdsp; 1170 while(*p1++ = *p2++) 1171 if(p1 >= lbend) 1172 break; 1173 spend = p1 - 1; 1174 break; 1175 case HCOM: 1176 p1 = holdsp; 1177 p2 = linebuf; 1178 while(*p1++ = *p2++); 1179 hspend = p1 - 1; 1180 break; 1181 case CHCOM: 1182 *hspend++ = '\n'; 1183 p1 = hspend; 1184 p2 = linebuf; 1185 while(*p1++ = *p2++) 1186 if(p1 >= hend) 1187 break; 1188 hspend = p1 - 1; 1189 break; 1190 case ICOM: 1191 for(rp = ipc->text; *rp; rp++) 1192 Bputrune(&fout, *rp); 1193 Bputc(&fout, '\n'); 1194 break; 1195 case BCOM: 1196 jflag = 1; 1197 break; 1198 case LCOM: 1199 c = 0; 1200 for (i = 0, rp = linebuf; *rp; rp++) { 1201 c = *rp; 1202 if(c >= 0x20 && c < 0x7F && c != '\\') { 1203 Bputc(&fout, c); 1204 if(i++ > 71) { 1205 Bprint(&fout, "\\\n"); 1206 i = 0; 1207 } 1208 } else { 1209 for (ucp = trans(*rp); *ucp; ucp++){ 1210 c = *ucp; 1211 Bputc(&fout, c); 1212 if(i++ > 71) { 1213 Bprint(&fout, "\\\n"); 1214 i = 0; 1215 } 1216 } 1217 } 1218 } 1219 if(c == ' ') 1220 Bprint(&fout, "\\n"); 1221 Bputc(&fout, '\n'); 1222 break; 1223 case NCOM: 1224 if(!nflag) 1225 putline(&fout, linebuf, spend-linebuf); 1226 1227 if(aptr > abuf) 1228 arout(); 1229 if((execp = gline(linebuf)) == 0) { 1230 delflag = 1; 1231 break; 1232 } 1233 spend = execp; 1234 break; 1235 case CNCOM: 1236 if(aptr > abuf) 1237 arout(); 1238 *spend++ = '\n'; 1239 if((execp = gline(spend)) == 0) { 1240 delflag = 1; 1241 break; 1242 } 1243 spend = execp; 1244 break; 1245 case PCOM: 1246 putline(&fout, linebuf, spend-linebuf); 1247 break; 1248 case CPCOM: 1249 cpcom: 1250 for(rp = linebuf; *rp && *rp != '\n'; rp++) 1251 Bputc(&fout, *rp); 1252 Bputc(&fout, '\n'); 1253 break; 1254 case QCOM: 1255 if(!nflag) 1256 putline(&fout, linebuf, spend-linebuf); 1257 if(aptr > abuf) 1258 arout(); 1259 exits(0); 1260 case RCOM: 1261 *aptr++ = ipc; 1262 if(aptr >= &abuf[MAXADDS]) 1263 quit("sed: Too many reads after line %ld\n", 1264 (char *)lnum); 1265 *aptr = 0; 1266 break; 1267 case SCOM: 1268 i = substitute(ipc); 1269 if(i && ipc->pfl) 1270 if(ipc->pfl == 1) 1271 putline(&fout, linebuf, spend-linebuf); 1272 else 1273 goto cpcom; 1274 if(i && ipc->fcode) 1275 goto wcom; 1276 break; 1277 1278 case TCOM: 1279 if(sflag) { 1280 sflag = 0; 1281 jflag = 1; 1282 } 1283 break; 1284 1285 case WCOM: 1286 wcom: 1287 putline(ipc->fcode,linebuf, spend - linebuf); 1288 break; 1289 case XCOM: 1290 p1 = linebuf; 1291 p2 = genbuf; 1292 while(*p2++ = *p1++) 1293 ; 1294 p1 = holdsp; 1295 p2 = linebuf; 1296 while(*p2++ = *p1++) 1297 ; 1298 spend = p2 - 1; 1299 p1 = genbuf; 1300 p2 = holdsp; 1301 while(*p2++ = *p1++) 1302 ; 1303 hspend = p2 - 1; 1304 break; 1305 case YCOM: 1306 p1 = linebuf; 1307 p2 = ipc->text; 1308 for (i = *p2++; *p1; p1++) 1309 if (*p1 <= i) 1310 *p1 = p2[*p1]; 1311 break; 1312 } 1313 } 1314 1315 void 1316 putline(Biobuf *bp, Rune *buf, int n) 1317 { 1318 while (n--) 1319 Bputrune(bp, *buf++); 1320 Bputc(bp, '\n'); 1321 } 1322 ecmp(Rune *a, Rune *b, int count) 1323 { 1324 while(count--) 1325 if(*a++ != *b++) 1326 return 0; 1327 return 1; 1328 } 1329 1330 void 1331 arout(void) 1332 { 1333 int c; 1334 char *s; 1335 char buf[128]; 1336 Rune *p1; 1337 Biobuf *fi; 1338 1339 for (aptr = abuf; *aptr; aptr++) { 1340 if((*aptr)->command == ACOM) { 1341 for(p1 = (*aptr)->text; *p1; p1++ ) 1342 Bputrune(&fout, *p1); 1343 Bputc(&fout, '\n'); 1344 } else { 1345 for(s = buf, p1 = (*aptr)->text; *p1; p1++) 1346 s += runetochar(s, p1); 1347 *s = '\0'; 1348 if((fi = Bopen(buf, OREAD)) == 0) 1349 continue; 1350 while((c = Bgetc(fi)) >= 0) 1351 Bputc(&fout, c); 1352 Bterm(fi); 1353 } 1354 } 1355 aptr = abuf; 1356 *aptr = 0; 1357 } 1358 1359 void 1360 errexit(void) 1361 { 1362 exits("error"); 1363 } 1364 1365 void 1366 quit(char *fmt, ...) 1367 { 1368 char *p, *ep; 1369 char msg[256]; 1370 va_list arg; 1371 1372 ep = msg + sizeof msg; 1373 p = seprint(msg, ep, "sed: "); 1374 va_start(arg, fmt); 1375 p = vseprint(p, ep, fmt, arg); 1376 va_end(arg); 1377 p = seprint(p, ep, "\n"); 1378 write(2, msg, p - msg); 1379 errexit(); 1380 } 1381 1382 Rune * 1383 gline(Rune *addr) 1384 { 1385 long c; 1386 Rune *p; 1387 static long peekc = 0; 1388 1389 if (f == 0 && opendata() < 0) 1390 return 0; 1391 sflag = 0; 1392 lnum++; 1393 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ 1394 do { 1395 p = addr; 1396 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) { 1397 if (c == '\n') { 1398 if ((peekc = Bgetrune(f)) < 0 && fhead == 0) 1399 dolflag = 1; 1400 *p = '\0'; 1401 return p; 1402 } 1403 if (c && p < lbend) 1404 *p++ = c; 1405 } 1406 /* return partial final line, adding implicit newline */ 1407 if(p != addr) { 1408 *p = '\0'; 1409 peekc = -1; 1410 if (fhead == 0) 1411 dolflag = 1; 1412 return p; 1413 } 1414 peekc = 0; 1415 Bterm(f); 1416 } while (opendata() > 0); /* Switch to next stream */ 1417 f = 0; 1418 return 0; 1419 } 1420 1421 /* 1422 * Data file input section - the intent is to transparently 1423 * catenate all data input streams. 1424 */ 1425 void 1426 enroll(char *filename) /* Add a file to the input file cache */ 1427 { 1428 FileCache *fp; 1429 1430 if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil) 1431 quit("Out of memory"); 1432 if (ftail == nil) 1433 fhead = fp; 1434 else 1435 ftail->next = fp; 1436 ftail = fp; 1437 fp->next = nil; 1438 fp->name = filename; /* 0 => stdin */ 1439 } 1440 1441 int 1442 opendata(void) 1443 { 1444 if (fhead == nil) 1445 return -1; 1446 if (fhead->name) { 1447 if ((f = Bopen(fhead->name, OREAD)) == nil) 1448 quit("Can't open %s", fhead->name); 1449 } else { 1450 Binit(&stdin, 0, OREAD); 1451 f = &stdin; 1452 } 1453 fhead = fhead->next; 1454 return 1; 1455 } 1456