1 /* 2 * sed -- stream editor 3 * 4 * 5 */ 6 #include <u.h> 7 #include <libc.h> 8 #include <bio.h> 9 #include <regexp.h> 10 11 enum { 12 DEPTH = 20, /* max nesting depth of {} */ 13 MAXCMDS = 512, /* max sed commands */ 14 ADDSIZE = 10000, /* size of add & read buffer */ 15 MAXADDS = 20, /* max pending adds and reads */ 16 LBSIZE = 8192, /* input line size */ 17 LABSIZE = 50, /* max label name size */ 18 MAXSUB = 10, /* max number of sub reg exp */ 19 MAXFILES = 120, /* max output files */ 20 }; 21 /* An address is a line #, a R.E., "$", a reference to the last 22 * R.E., or nothing. 23 */ 24 typedef struct { 25 enum { 26 A_NONE, 27 A_DOL, 28 A_LINE, 29 A_RE, 30 A_LAST, 31 }type; 32 union { 33 long line; /* Line # */ 34 Reprog *rp; /* Compiled R.E. */ 35 }; 36 } Addr; 37 38 typedef struct SEDCOM { 39 Addr ad1; /* optional start address */ 40 Addr ad2; /* optional end address */ 41 union { 42 Reprog *re1; /* compiled R.E. */ 43 Rune *text; /* added text or file name */ 44 struct SEDCOM *lb1; /* destination command of branch */ 45 }; 46 Rune *rhs; /* Right-hand side of substitution */ 47 Biobuf* fcode; /* File ID for read and write */ 48 char command; /* command code -see below */ 49 char gfl; /* 'Global' flag for substitutions */ 50 char pfl; /* 'print' flag for substitutions */ 51 char active; /* 1 => data between start and end */ 52 char negfl; /* negation flag */ 53 } SedCom; 54 55 /* Command Codes for field SedCom.command */ 56 #define ACOM 01 57 #define BCOM 020 58 #define CCOM 02 59 #define CDCOM 025 60 #define CNCOM 022 61 #define COCOM 017 62 #define CPCOM 023 63 #define DCOM 03 64 #define ECOM 015 65 #define EQCOM 013 66 #define FCOM 016 67 #define GCOM 027 68 #define CGCOM 030 69 #define HCOM 031 70 #define CHCOM 032 71 #define ICOM 04 72 #define LCOM 05 73 #define NCOM 012 74 #define PCOM 010 75 #define QCOM 011 76 #define RCOM 06 77 #define SCOM 07 78 #define TCOM 021 79 #define WCOM 014 80 #define CWCOM 024 81 #define YCOM 026 82 #define XCOM 033 83 84 85 typedef struct label { /* Label symbol table */ 86 Rune asc[9]; /* Label name */ 87 SedCom *chain; 88 SedCom *address; /* Command associated with label */ 89 } Label; 90 91 typedef struct FILE_CACHE { /* Data file control block */ 92 struct FILE_CACHE *next; /* Forward Link */ 93 char *name; /* Name of file */ 94 } FileCache; 95 96 SedCom pspace[MAXCMDS]; /* Command storage */ 97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */ 98 SedCom *rep = pspace; /* Current fill point */ 99 100 Reprog *lastre = 0; /* Last regular expression */ 101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ 102 103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ 104 Rune *addend = addspace+ADDSIZE; 105 106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ 107 SedCom **aptr = abuf; 108 109 struct { /* Sed program input control block */ 110 enum PTYPE /* Either on command line or in file */ 111 { P_ARG, 112 P_FILE 113 } type; 114 union PCTL { /* Pointer to data */ 115 Biobuf *bp; 116 char *curr; 117 }; 118 } prog; 119 120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ 121 122 FileCache *fhead = 0; /* Head of File Cache Chain */ 123 FileCache *ftail = 0; /* Tail of File Cache Chain */ 124 125 Rune *loc1; /* Start of pattern match */ 126 Rune *loc2; /* End of pattern match */ 127 Rune seof; /* Pattern delimiter char */ 128 129 Rune linebuf[LBSIZE+1]; /* Input data buffer */ 130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ 131 Rune *spend = linebuf; /* End of input data */ 132 Rune *cp; /* Current scan point in linebuf */ 133 134 Rune holdsp[LBSIZE+1]; /* Hold buffer */ 135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ 136 Rune *hspend = holdsp; /* End of hold data */ 137 138 int nflag; /* Command line flags */ 139 int gflag; 140 141 int dolflag; /* Set when at true EOF */ 142 int sflag; /* Set when substitution done */ 143 int jflag; /* Set when jump required */ 144 int delflag; /* Delete current line when set */ 145 146 long lnum = 0; /* Input line count */ 147 148 char fname[MAXFILES][40]; /* File name cache */ 149 Biobuf *fcode[MAXFILES]; /* File ID cache */ 150 int nfiles = 0; /* Cache fill point */ 151 152 Biobuf fout; /* Output stream */ 153 Biobuf stdin; /* Default input */ 154 Biobuf* f = 0; /* Input data */ 155 156 Label ltab[LABSIZE]; /* Label name symbol table */ 157 Label *labend = ltab+LABSIZE; /* End of label table */ 158 Label *lab = ltab+1; /* Current Fill point */ 159 160 int depth = 0; /* {} stack pointer */ 161 162 Rune bad; /* Dummy err ptr reference */ 163 Rune *badp = &bad; 164 165 166 char CGMES[] = "Command garbled: %S"; 167 char TMMES[] = "Too much text: %S"; 168 char LTL[] = "Label too long: %S"; 169 char AD0MES[] = "No addresses allowed: %S"; 170 char AD1MES[] = "Only one address allowed: %S"; 171 172 void address(Addr *); 173 void arout(void); 174 int cmp(char *, char *); 175 int rcmp(Rune *, Rune *); 176 void command(SedCom *); 177 Reprog *compile(void); 178 Rune *compsub(Rune *, Rune *); 179 void dechain(void); 180 void dosub(Rune *); 181 int ecmp(Rune *, Rune *, int); 182 void enroll(char *); 183 void errexit(void); 184 int executable(SedCom *); 185 void execute(void); 186 void fcomp(void); 187 long getrune(void); 188 Rune *gline(Rune *); 189 int match(Reprog *, Rune *); 190 void newfile(enum PTYPE, char *); 191 int opendata(void); 192 Biobuf *open_file(char *); 193 Rune *place(Rune *, Rune *, Rune *); 194 void quit(char *, char *); 195 int rline(Rune *, Rune *); 196 Label *search(Label *); 197 int substitute(SedCom *); 198 char *text(char *); 199 Rune *stext(Rune *, Rune *); 200 int ycomp(SedCom *); 201 char * trans(int c); 202 void putline(Biobuf *bp, Rune *buf, int n); 203 204 void 205 main(int argc, char **argv) 206 { 207 int compfl; 208 209 lnum = 0; 210 Binit(&fout, 1, OWRITE); 211 fcode[nfiles++] = &fout; 212 compfl = 0; 213 214 if(argc == 1) 215 exits(0); 216 ARGBEGIN{ 217 case 'n': 218 nflag++; 219 continue; 220 case 'f': 221 if(argc <= 1) 222 quit("no pattern-file", 0); 223 newfile(P_FILE, ARGF()); 224 fcomp(); 225 compfl = 1; 226 continue; 227 case 'e': 228 if (argc <= 1) 229 quit("missing pattern", 0); 230 newfile(P_ARG, ARGF()); 231 fcomp(); 232 compfl = 1; 233 continue; 234 case 'g': 235 gflag++; 236 continue; 237 default: 238 fprint(2, "sed: Unknown flag: %c\n", ARGC()); 239 continue; 240 } ARGEND 241 242 if(compfl == 0) { 243 if (--argc < 0) 244 quit("missing pattern", 0); 245 newfile(P_ARG, *argv++); 246 fcomp(); 247 } 248 249 if(depth) 250 quit("Too many {'s", 0); 251 252 ltab[0].address = rep; 253 254 dechain(); 255 256 if(argc <= 0) 257 enroll(0); /* Add stdin to cache */ 258 else while(--argc >= 0) { 259 enroll(*argv++); 260 } 261 execute(); 262 exits(0); 263 } 264 void 265 fcomp(void) 266 { 267 Rune *tp; 268 SedCom *pt, *pt1; 269 int i; 270 Label *lpt; 271 272 static Rune *p = addspace; 273 static SedCom **cmpend[DEPTH]; /* stack of {} operations */ 274 275 while (rline(linebuf, lbend) >= 0) { 276 cp = linebuf; 277 comploop: 278 while(*cp == ' ' || *cp == '\t') 279 cp++; 280 if(*cp == '\0' || *cp == '#') 281 continue; 282 if(*cp == ';') { 283 cp++; 284 goto comploop; 285 } 286 287 address(&rep->ad1); 288 if (rep->ad1.type != A_NONE) { 289 if (rep->ad1.type == A_LAST) { 290 if (!lastre) 291 quit("First RE may not be null", 0); 292 rep->ad1.type = A_RE; 293 rep->ad1.rp = lastre; 294 } 295 if(*cp == ',' || *cp == ';') { 296 cp++; 297 address(&rep->ad2); 298 if (rep->ad2.type == A_LAST) { 299 rep->ad1.type = A_RE; 300 rep->ad2.rp = lastre; 301 } 302 } else 303 rep->ad2.type = A_NONE; 304 } 305 while(*cp == ' ' || *cp == '\t') 306 cp++; 307 308 swit: 309 switch(*cp++) { 310 311 default: 312 quit("Unrecognized command: %S", (char *)linebuf); 313 314 case '!': 315 rep->negfl = 1; 316 goto swit; 317 318 case '{': 319 rep->command = BCOM; 320 rep->negfl = !(rep->negfl); 321 cmpend[depth++] = &rep->lb1; 322 if(++rep >= pend) 323 quit("Too many commands: %S", (char *) linebuf); 324 if(*cp == '\0') continue; 325 goto comploop; 326 327 case '}': 328 if(rep->ad1.type != A_NONE) 329 quit(AD0MES, (char *) linebuf); 330 if(--depth < 0) 331 quit("Too many }'s", 0); 332 *cmpend[depth] = rep; 333 if(*cp == 0) continue; 334 goto comploop; 335 336 case '=': 337 rep->command = EQCOM; 338 if(rep->ad2.type != A_NONE) 339 quit(AD1MES, (char *) linebuf); 340 break; 341 342 case ':': 343 if(rep->ad1.type != A_NONE) 344 quit(AD0MES, (char *) linebuf); 345 346 while(*cp == ' ') 347 cp++; 348 tp = lab->asc; 349 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { 350 *tp++ = *cp++; 351 if(tp >= &(lab->asc[8])) 352 quit(LTL, (char *) linebuf); 353 } 354 *tp = '\0'; 355 356 if(lpt = search(lab)) { 357 if(lpt->address) 358 quit("Duplicate labels: %S", (char *) linebuf); 359 } else { 360 lab->chain = 0; 361 lpt = lab; 362 if(++lab >= labend) 363 quit("Too many labels: %S", (char *) linebuf); 364 } 365 lpt->address = rep; 366 if (*cp == '#') 367 continue; 368 rep--; /* reuse this slot */ 369 break; 370 371 case 'a': 372 rep->command = ACOM; 373 if(rep->ad2.type != A_NONE) 374 quit(AD1MES, (char *) linebuf); 375 if(*cp == '\\') cp++; 376 if(*cp++ != '\n') 377 quit(CGMES, (char *) linebuf); 378 rep->text = p; 379 p = stext(p, addend); 380 break; 381 case 'c': 382 rep->command = CCOM; 383 if(*cp == '\\') cp++; 384 if(*cp++ != '\n') 385 quit(CGMES, (char *) linebuf); 386 rep->text = p; 387 p = stext(p, addend); 388 break; 389 case 'i': 390 rep->command = ICOM; 391 if(rep->ad2.type != A_NONE) 392 quit(AD1MES, (char *) linebuf); 393 if(*cp == '\\') cp++; 394 if(*cp++ != '\n') 395 quit(CGMES, (char *) linebuf); 396 rep->text = p; 397 p = stext(p, addend); 398 break; 399 400 case 'g': 401 rep->command = GCOM; 402 break; 403 404 case 'G': 405 rep->command = CGCOM; 406 break; 407 408 case 'h': 409 rep->command = HCOM; 410 break; 411 412 case 'H': 413 rep->command = CHCOM; 414 break; 415 416 case 't': 417 rep->command = TCOM; 418 goto jtcommon; 419 420 case 'b': 421 rep->command = BCOM; 422 jtcommon: 423 while(*cp == ' ')cp++; 424 if(*cp == '\0') { 425 if(pt = ltab[0].chain) { 426 while(pt1 = pt->lb1) 427 pt = pt1; 428 pt->lb1 = rep; 429 } else 430 ltab[0].chain = rep; 431 break; 432 } 433 tp = lab->asc; 434 while((*tp++ = *cp++)) 435 if(tp >= &(lab->asc[8])) 436 quit(LTL, (char *) linebuf); 437 cp--; 438 tp[-1] = '\0'; 439 440 if(lpt = search(lab)) { 441 if(lpt->address) { 442 rep->lb1 = lpt->address; 443 } else { 444 pt = lpt->chain; 445 while(pt1 = pt->lb1) 446 pt = pt1; 447 pt->lb1 = rep; 448 } 449 } else { 450 lab->chain = rep; 451 lab->address = 0; 452 if(++lab >= labend) 453 quit("Too many labels: %S", 454 (char *) linebuf); 455 } 456 break; 457 458 case 'n': 459 rep->command = NCOM; 460 break; 461 462 case 'N': 463 rep->command = CNCOM; 464 break; 465 466 case 'p': 467 rep->command = PCOM; 468 break; 469 470 case 'P': 471 rep->command = CPCOM; 472 break; 473 474 case 'r': 475 rep->command = RCOM; 476 if(rep->ad2.type != A_NONE) 477 quit(AD1MES, (char *) linebuf); 478 if(*cp++ != ' ') 479 quit(CGMES, (char *) linebuf); 480 rep->text = p; 481 p = stext(p, addend); 482 break; 483 484 case 'd': 485 rep->command = DCOM; 486 break; 487 488 case 'D': 489 rep->command = CDCOM; 490 rep->lb1 = pspace; 491 break; 492 493 case 'q': 494 rep->command = QCOM; 495 if(rep->ad2.type != A_NONE) 496 quit(AD1MES, (char *) linebuf); 497 break; 498 499 case 'l': 500 rep->command = LCOM; 501 break; 502 503 case 's': 504 rep->command = SCOM; 505 seof = *cp++; 506 if ((rep->re1 = compile()) == 0) { 507 if(!lastre) 508 quit("First RE may not be null.", 0); 509 rep->re1 = lastre; 510 } 511 rep->rhs = p; 512 if((p = compsub(p, addend)) == 0) 513 quit(CGMES, (char *) linebuf); 514 if(*cp == 'g') { 515 cp++; 516 rep->gfl++; 517 } else if(gflag) 518 rep->gfl++; 519 520 if(*cp == 'p') { 521 cp++; 522 rep->pfl = 1; 523 } 524 525 if(*cp == 'P') { 526 cp++; 527 rep->pfl = 2; 528 } 529 530 if(*cp == 'w') { 531 cp++; 532 if(*cp++ != ' ') 533 quit(CGMES, (char *) linebuf); 534 text(fname[nfiles]); 535 for(i = nfiles - 1; i >= 0; i--) 536 if(cmp(fname[nfiles],fname[i]) == 0) { 537 rep->fcode = fcode[i]; 538 goto done; 539 } 540 if(nfiles >= MAXFILES) 541 quit("Too many files in w commands 1", 0); 542 rep->fcode = open_file(fname[nfiles]); 543 } 544 break; 545 546 case 'w': 547 rep->command = WCOM; 548 if(*cp++ != ' ') 549 quit(CGMES, (char *) linebuf); 550 text(fname[nfiles]); 551 for(i = nfiles - 1; i >= 0; i--) 552 if(cmp(fname[nfiles], fname[i]) == 0) { 553 rep->fcode = fcode[i]; 554 goto done; 555 } 556 if(nfiles >= MAXFILES){ 557 fprint(2, "sed: Too many files in w commands 2 \n"); 558 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); 559 errexit(); 560 } 561 rep->fcode = open_file(fname[nfiles]); 562 break; 563 564 case 'x': 565 rep->command = XCOM; 566 break; 567 568 case 'y': 569 rep->command = YCOM; 570 seof = *cp++; 571 if (ycomp(rep) == 0) 572 quit(CGMES, (char *) linebuf); 573 break; 574 575 } 576 done: 577 if(++rep >= pend) 578 quit("Too many commands, last: %S", (char *) linebuf); 579 580 if(*cp++ != '\0') { 581 if(cp[-1] == ';') 582 goto comploop; 583 quit(CGMES, (char *) linebuf); 584 } 585 586 } 587 } 588 589 Biobuf * 590 open_file(char *name) 591 { 592 Biobuf *bp; 593 int fd; 594 595 if ((bp = malloc(sizeof(Biobuf))) == 0) 596 quit("Out of memory", 0); 597 if ((fd = open(name, OWRITE)) < 0 && 598 (fd = create(name, OWRITE, 0666)) < 0) 599 quit("Cannot create %s", name); 600 Binit(bp, fd, OWRITE); 601 Bseek(bp, 0, 2); 602 fcode[nfiles++] = bp; 603 return bp; 604 } 605 606 Rune * 607 compsub(Rune *rhs, Rune *end) 608 { 609 Rune r; 610 611 while ((r = *cp++) != '\0') { 612 if(r == '\\') { 613 if (rhs < end) 614 *rhs++ = 0xFFFF; 615 else 616 return 0; 617 r = *cp++; 618 if(r == 'n') 619 r = '\n'; 620 } else { 621 if(r == seof) { 622 if (rhs < end) 623 *rhs++ = '\0'; 624 else 625 return 0; 626 return rhs; 627 } 628 } 629 if (rhs < end) 630 *rhs++ = r; 631 else 632 return 0; 633 634 } 635 return 0; 636 } 637 638 Reprog * 639 compile(void) 640 { 641 Rune c; 642 char *ep; 643 char expbuf[512]; 644 645 if((c = *cp++) == seof) /* '//' */ 646 return 0; 647 ep = expbuf; 648 do { 649 if (c == 0 || c == '\n') 650 quit(TMMES, (char *) linebuf); 651 if (c == '\\') { 652 if (ep >= expbuf+sizeof(expbuf)) 653 quit(TMMES, (char *) linebuf); 654 ep += runetochar(ep, &c); 655 if ((c = *cp++) == 'n') 656 c = '\n'; 657 } 658 if (ep >= expbuf+sizeof(expbuf)) 659 quit(TMMES, (char *) linebuf); 660 ep += runetochar(ep, &c); 661 } while ((c = *cp++) != seof); 662 *ep = 0; 663 return lastre = regcomp(expbuf); 664 } 665 666 void 667 regerror(char *s) 668 { 669 USED(s); 670 quit(CGMES, (char *) linebuf); 671 } 672 673 void 674 newfile(enum PTYPE type, char *name) 675 { 676 if (type == P_ARG) 677 prog.curr = name; 678 else if ((prog.bp = Bopen(name, OREAD)) == 0) 679 quit("Cannot open pattern-file: %s\n", name); 680 prog.type = type; 681 } 682 683 int 684 rline(Rune *buf, Rune *end) 685 { 686 long c; 687 Rune r; 688 689 while ((c = getrune()) >= 0) { 690 r = c; 691 if (r == '\\') { 692 if (buf <= end) 693 *buf++ = r; 694 if ((c = getrune()) < 0) 695 break; 696 r = c; 697 } else if (r == '\n') { 698 *buf = '\0'; 699 return(1); 700 } 701 if (buf <= end) 702 *buf++ = r; 703 } 704 *buf = '\0'; 705 return(-1); 706 } 707 708 long 709 getrune(void) 710 { 711 char *p; 712 long c; 713 Rune r; 714 715 if (prog.type == P_ARG) { 716 if ((p = prog.curr) != 0) { 717 if (*p) { 718 prog.curr += chartorune(&r, p); 719 c = r; 720 } else { 721 c = '\n'; /* fake an end-of-line */ 722 prog.curr = 0; 723 } 724 } else 725 c = -1; 726 } else if ((c = Bgetrune(prog.bp)) < 0) 727 Bterm(prog.bp); 728 return c; 729 } 730 731 void 732 address(Addr *ap) 733 { 734 int c; 735 long lno; 736 737 if((c = *cp++) == '$') 738 ap->type = A_DOL; 739 else if(c == '/') { 740 seof = c; 741 if (ap->rp = compile()) 742 ap->type = A_RE; 743 else 744 ap->type = A_LAST; 745 } 746 else if (c >= '0' && c <= '9') { 747 lno = c-'0'; 748 while ((c = *cp) >= '0' && c <= '9') 749 lno = lno*10 + *cp++-'0'; 750 if(!lno) 751 quit("line number 0 is illegal",0); 752 ap->type = A_LINE; 753 ap->line = lno; 754 } 755 else { 756 cp--; 757 ap->type = A_NONE; 758 } 759 } 760 761 cmp(char *a, char *b) /* compare characters */ 762 { 763 while(*a == *b++) 764 if (*a == '\0') 765 return(0); 766 else a++; 767 return(1); 768 } 769 rcmp(Rune *a, Rune *b) /* compare runes */ 770 { 771 while(*a == *b++) 772 if (*a == '\0') 773 return(0); 774 else a++; 775 return(1); 776 } 777 778 char * 779 text(char *p) /* extract character string */ 780 { 781 Rune r; 782 783 while(*cp == '\t' || *cp == ' ') 784 cp++; 785 while (*cp) { 786 if ((r = *cp++) == '\\') 787 if ((r = *cp++) == 0) 788 break;; 789 if (r == '\n') 790 while (*cp == '\t' || *cp == ' ') 791 cp++; 792 p += runetochar(p, &r); 793 } 794 *p++ = '\0'; 795 return p; 796 } 797 798 Rune * 799 stext(Rune *p, Rune *end) /* extract rune string */ 800 { 801 while(*cp == '\t' || *cp == ' ') 802 cp++; 803 while (*cp) { 804 if (*cp == '\\') 805 if (*++cp == 0) 806 break; 807 if (p >= end-1) 808 quit(TMMES, (char *) linebuf); 809 if ((*p++ = *cp++) == '\n') 810 while(*cp == '\t' || *cp == ' ') 811 cp++; 812 } 813 *p++ = 0; 814 return p; 815 } 816 817 818 Label * 819 search (Label *ptr) 820 { 821 Label *rp; 822 823 for (rp = ltab; rp < ptr; rp++) 824 if(rcmp(rp->asc, ptr->asc) == 0) 825 return(rp); 826 return(0); 827 } 828 829 void 830 dechain(void) 831 { 832 Label *lptr; 833 SedCom *rptr, *trptr; 834 835 for(lptr = ltab; lptr < lab; lptr++) { 836 837 if(lptr->address == 0) 838 quit("Undefined label: %S", (char *) lptr->asc); 839 840 if(lptr->chain) { 841 rptr = lptr->chain; 842 while(trptr = rptr->lb1) { 843 rptr->lb1 = lptr->address; 844 rptr = trptr; 845 } 846 rptr->lb1 = lptr->address; 847 } 848 } 849 } 850 851 int 852 ycomp(SedCom *r) 853 { 854 int i; 855 Rune *rp; 856 Rune c, *tsp, highc; 857 Rune *sp; 858 859 highc = 0; 860 for(tsp = cp; *tsp != seof; tsp++) { 861 if(*tsp == '\\') 862 tsp++; 863 if(*tsp == '\n' || *tsp == '\0') 864 return(0); 865 if (*tsp > highc) highc = *tsp; 866 } 867 tsp++; 868 if ((rp = r->text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) 869 quit("Out of memory", 0); 870 *rp++ = highc; /* save upper bound */ 871 for (i = 0; i <= highc; i++) 872 rp[i] = i; 873 sp = cp; 874 while((c = *sp++) != seof) { 875 if(c == '\\' && *sp == 'n') { 876 sp++; 877 c = '\n'; 878 } 879 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { 880 rp[c] = '\n'; 881 tsp++; 882 } 883 if(rp[c] == seof || rp[c] == '\0') { 884 free(r->re1); 885 r->re1 = 0; 886 return(0); 887 } 888 } 889 if(*tsp != seof) { 890 free(r->re1); 891 r->re1 = 0; 892 return(0); 893 } 894 cp = tsp+1; 895 return(1); 896 } 897 898 void 899 execute(void) 900 { 901 SedCom *ipc; 902 903 while (spend = gline(linebuf)){ 904 for(ipc = pspace; ipc->command; ) { 905 if (!executable(ipc)) { 906 ipc++; 907 continue; 908 } 909 command(ipc); 910 911 if(ipc->ad2.type == A_RE && match(ipc->ad2.rp, linebuf)) 912 ipc->active = 0; 913 914 if(delflag) 915 break; 916 917 if(jflag) { 918 jflag = 0; 919 if((ipc = ipc->lb1) == 0) 920 break; 921 } else 922 ipc++; 923 924 } 925 if(!nflag && !delflag) 926 putline(&fout, linebuf, spend-linebuf); 927 if(aptr > abuf) { 928 arout(); 929 } 930 delflag = 0; 931 } 932 } 933 /* determine if a statement should be applied to an input line */ 934 int 935 executable(SedCom *ipc) 936 { 937 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ 938 if (ipc->active == 1) /* Second line */ 939 ipc->active = 2; 940 switch(ipc->ad2.type) { 941 case A_NONE: /* No second addr; use first */ 942 ipc->active = 0; 943 break; 944 case A_DOL: /* Accept everything */ 945 return !ipc->negfl; 946 case A_LINE: /* Line at end of range? */ 947 if (lnum <= ipc->ad2.line) { 948 if (ipc->ad2.line == lnum) 949 ipc->active = 0; 950 return !ipc->negfl; 951 } 952 ipc->active = 0; /* out of range */ 953 return ipc->negfl; 954 case A_RE: /* Check for matching R.E. */ 955 if (match(ipc->ad2.rp, linebuf)) 956 ipc->active = 0; 957 return !ipc->negfl; 958 default: /* internal error */ 959 quit("Internal error", 0); 960 } 961 } 962 switch (ipc->ad1.type) { /* Check first address */ 963 case A_NONE: /* Everything matches */ 964 return !ipc->negfl; 965 case A_DOL: /* Only last line */ 966 if (dolflag) 967 return !ipc->negfl; 968 break; 969 case A_LINE: /* Check line number */ 970 if (ipc->ad1.line == lnum) { 971 ipc->active = 1; /* In range */ 972 return !ipc->negfl; 973 } 974 break; 975 case A_RE: /* Check R.E. */ 976 if (match(ipc->ad1.rp, linebuf)) { 977 ipc->active = 1; /* In range */ 978 return !ipc->negfl; 979 } 980 break; 981 default: 982 quit("Internal error", 0); 983 } 984 return ipc->negfl; 985 } 986 match(Reprog *pattern, Rune *buf) 987 { 988 if (!pattern) 989 return 0; 990 subexp[0].rsp = buf; 991 subexp[0].ep = 0; 992 if (rregexec(pattern, linebuf, subexp, MAXSUB)) { 993 loc1 = subexp[0].rsp; 994 loc2 = subexp[0].rep; 995 return 1; 996 } 997 loc1 = loc2 = 0; 998 return 0; 999 } 1000 substitute(SedCom *ipc) 1001 { 1002 if(match(ipc->re1, linebuf)) { 1003 sflag = 1; 1004 dosub(ipc->rhs); 1005 if(!ipc->gfl) 1006 return 1; 1007 while (match(ipc->re1, loc2)) { 1008 if (loc2-loc1 == 0){ /* NULL R.E. match */ 1009 if (*loc2++ == 0) 1010 break; 1011 } else { 1012 dosub(ipc->rhs); 1013 if(*loc2 == 0) 1014 break; 1015 } 1016 } 1017 return 1; 1018 } 1019 return 0; 1020 } 1021 1022 void 1023 dosub(Rune *rhsbuf) 1024 { 1025 Rune *lp, *sp; 1026 Rune *rp; 1027 int c, n; 1028 1029 lp = linebuf; 1030 sp = genbuf; 1031 rp = rhsbuf; 1032 while (lp < loc1) 1033 *sp++ = *lp++; 1034 while(c = *rp++) { 1035 if (c == '&') { 1036 sp = place(sp, loc1, loc2); 1037 continue; 1038 } 1039 if (c == 0xFFFF && (c = *rp++) >= '1' && c <= MAXSUB+'0') { 1040 n = c-'0'; 1041 if (subexp[n].rsp && subexp[n].rep) { 1042 sp = place(sp, subexp[n].rsp, subexp[n].rep); 1043 continue; 1044 } 1045 else { 1046 fprint(2, "sed: Invalid back reference \\%d\n",n); 1047 errexit(); 1048 } 1049 } 1050 *sp++ = c; 1051 if (sp >= &genbuf[LBSIZE]) 1052 fprint(2, "sed: Output line too long.\n"); 1053 } 1054 lp = loc2; 1055 loc2 = sp - genbuf + linebuf; 1056 while (*sp++ = *lp++) 1057 if (sp >= &genbuf[LBSIZE]) 1058 fprint(2, "sed: Output line too long.\n"); 1059 lp = linebuf; 1060 sp = genbuf; 1061 while (*lp++ = *sp++) 1062 ; 1063 spend = lp-1; 1064 } 1065 1066 Rune * 1067 place(Rune *sp, Rune *l1, Rune *l2) 1068 { 1069 while (l1 < l2) { 1070 *sp++ = *l1++; 1071 if (sp >= &genbuf[LBSIZE]) 1072 fprint(2, "sed: Output line too long.\n"); 1073 } 1074 return(sp); 1075 } 1076 1077 char * 1078 trans(int c) 1079 { 1080 static char buf[] = "\\x0000"; 1081 static char hex[] = "0123456789abcdef"; 1082 1083 switch(c) { 1084 case '\b': 1085 return "\\b"; 1086 case '\n': 1087 return "\\n"; 1088 case '\r': 1089 return "\\r"; 1090 case '\t': 1091 return "\\t"; 1092 case '\\': 1093 return "\\\\"; 1094 } 1095 buf[2] = hex[(c>>12)&0xF]; 1096 buf[3] = hex[(c>>8)&0xF]; 1097 buf[4] = hex[(c>>4)&0xF]; 1098 buf[5] = hex[c&0xF]; 1099 return buf; 1100 } 1101 1102 void 1103 command(SedCom *ipc) 1104 { 1105 int i, c; 1106 Rune *p1, *p2; 1107 char *ucp; 1108 Rune *rp; 1109 Rune *execp; 1110 1111 switch(ipc->command) { 1112 1113 case ACOM: 1114 *aptr++ = ipc; 1115 if(aptr >= abuf+MAXADDS) { 1116 quit("sed: Too many appends after line %ld\n", 1117 (char *) lnum); 1118 } 1119 *aptr = 0; 1120 break; 1121 case CCOM: 1122 delflag = 1; 1123 if(ipc->active == 1) { 1124 for(rp = ipc->text; *rp; rp++) 1125 Bputrune(&fout, *rp); 1126 Bputc(&fout, '\n'); 1127 } 1128 break; 1129 case DCOM: 1130 delflag++; 1131 break; 1132 case CDCOM: 1133 p1 = p2 = linebuf; 1134 while(*p1 != '\n') { 1135 if(*p1++ == 0) { 1136 delflag++; 1137 return; 1138 } 1139 } 1140 p1++; 1141 while(*p2++ = *p1++) 1142 ; 1143 spend = p2-1; 1144 jflag++; 1145 break; 1146 case EQCOM: 1147 Bprint(&fout, "%ld\n", lnum); 1148 break; 1149 case GCOM: 1150 p1 = linebuf; 1151 p2 = holdsp; 1152 while(*p1++ = *p2++) 1153 ; 1154 spend = p1-1; 1155 break; 1156 case CGCOM: 1157 *spend++ = '\n'; 1158 p1 = spend; 1159 p2 = holdsp; 1160 while(*p1++ = *p2++) 1161 if(p1 >= lbend) 1162 break; 1163 spend = p1-1; 1164 break; 1165 case HCOM: 1166 p1 = holdsp; 1167 p2 = linebuf; 1168 while(*p1++ = *p2++); 1169 hspend = p1-1; 1170 break; 1171 case CHCOM: 1172 *hspend++ = '\n'; 1173 p1 = hspend; 1174 p2 = linebuf; 1175 while(*p1++ = *p2++) 1176 if(p1 >= hend) 1177 break; 1178 hspend = p1-1; 1179 break; 1180 case ICOM: 1181 for(rp = ipc->text; *rp; rp++) 1182 Bputrune(&fout, *rp); 1183 Bputc(&fout, '\n'); 1184 break; 1185 case BCOM: 1186 jflag = 1; 1187 break; 1188 case LCOM: 1189 c = 0; 1190 for (i = 0, rp = linebuf; *rp; rp++) { 1191 c = *rp; 1192 if(c >= 0x20 && c < 0x7F && c != '\\') { 1193 Bputc(&fout, c); 1194 if(i++ > 71) { 1195 Bprint(&fout, "\\\n"); 1196 i = 0; 1197 } 1198 } else { 1199 for (ucp = trans(*rp); *ucp; ucp++){ 1200 c = *ucp; 1201 Bputc(&fout, c); 1202 if(i++ > 71) { 1203 Bprint(&fout, "\\\n"); 1204 i = 0; 1205 } 1206 } 1207 } 1208 } 1209 if(c == ' ') 1210 Bprint(&fout, "\\n"); 1211 Bputc(&fout, '\n'); 1212 break; 1213 case NCOM: 1214 if(!nflag) 1215 putline(&fout, linebuf, spend-linebuf); 1216 1217 if(aptr > abuf) 1218 arout(); 1219 if((execp = gline(linebuf)) == 0) { 1220 delflag = 1; 1221 break; 1222 } 1223 spend = execp; 1224 break; 1225 case CNCOM: 1226 if(aptr > abuf) 1227 arout(); 1228 *spend++ = '\n'; 1229 if((execp = gline(spend)) == 0) { 1230 delflag = 1; 1231 break; 1232 } 1233 spend = execp; 1234 break; 1235 case PCOM: 1236 putline(&fout, linebuf, spend-linebuf); 1237 break; 1238 case CPCOM: 1239 cpcom: 1240 for(rp = linebuf; *rp && *rp != '\n'; rp++) 1241 Bputc(&fout, *rp); 1242 Bputc(&fout, '\n'); 1243 break; 1244 case QCOM: 1245 if(!nflag) 1246 putline(&fout, linebuf, spend-linebuf); 1247 if(aptr > abuf) 1248 arout(); 1249 exits(0); 1250 case RCOM: 1251 *aptr++ = ipc; 1252 if(aptr >= &abuf[MAXADDS]) 1253 quit("sed: Too many reads after line %ld\n", 1254 (char *) lnum); 1255 *aptr = 0; 1256 break; 1257 case SCOM: 1258 i = substitute(ipc); 1259 if(i && ipc->pfl) 1260 if(ipc->pfl == 1) 1261 putline(&fout, linebuf, spend-linebuf); 1262 else 1263 goto cpcom; 1264 if(i && ipc->fcode) 1265 goto wcom; 1266 break; 1267 1268 case TCOM: 1269 if(sflag == 0) break; 1270 sflag = 0; 1271 jflag = 1; 1272 break; 1273 1274 wcom: 1275 case WCOM: 1276 putline(ipc->fcode,linebuf, spend-linebuf); 1277 break; 1278 case XCOM: 1279 p1 = linebuf; 1280 p2 = genbuf; 1281 while(*p2++ = *p1++); 1282 p1 = holdsp; 1283 p2 = linebuf; 1284 while(*p2++ = *p1++); 1285 spend = p2 - 1; 1286 p1 = genbuf; 1287 p2 = holdsp; 1288 while(*p2++ = *p1++); 1289 hspend = p2 - 1; 1290 break; 1291 case YCOM: 1292 p1 = linebuf; 1293 p2 = ipc->text; 1294 for (i = *p2++; *p1; p1++){ 1295 if (*p1 <= i) *p1 = p2[*p1]; 1296 } 1297 break; 1298 } 1299 1300 } 1301 1302 void 1303 putline(Biobuf *bp, Rune *buf, int n) 1304 { 1305 while (n--) 1306 Bputrune(bp, *buf++); 1307 Bputc(bp, '\n'); 1308 } 1309 ecmp(Rune *a, Rune *b, int count) 1310 { 1311 while(count--) 1312 if(*a++ != *b++) return(0); 1313 return(1); 1314 } 1315 1316 void 1317 arout(void) 1318 { 1319 Rune *p1; 1320 Biobuf *fi; 1321 int c; 1322 char *s; 1323 char buf[128]; 1324 1325 for (aptr = abuf; *aptr; aptr++) { 1326 if((*aptr)->command == ACOM) { 1327 for(p1 = (*aptr)->text; *p1; p1++ ) 1328 Bputrune(&fout, *p1); 1329 Bputc(&fout, '\n'); 1330 } else { 1331 for(s = buf, p1= (*aptr)->text; *p1; p1++) 1332 s += runetochar(s, p1); 1333 *s = '\0'; 1334 if((fi = Bopen(buf, OREAD)) == 0) 1335 continue; 1336 while((c = Bgetc(fi)) >= 0) 1337 Bputc(&fout, c); 1338 Bterm(fi); 1339 } 1340 } 1341 aptr = abuf; 1342 *aptr = 0; 1343 } 1344 1345 void 1346 errexit(void) 1347 { 1348 exits("error"); 1349 } 1350 1351 void 1352 quit (char *msg, char *arg) 1353 { 1354 fprint(2, "sed: "); 1355 fprint(2, msg, arg); 1356 fprint(2, "\n"); 1357 errexit(); 1358 } 1359 1360 Rune * 1361 gline(Rune *addr) 1362 { 1363 long c; 1364 Rune *p; 1365 1366 static long peekc = 0; 1367 1368 if (f == 0 && opendata() < 0) 1369 return 0; 1370 sflag = 0; 1371 lnum++; 1372 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ 1373 do { 1374 p = addr; 1375 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { 1376 if (c == '\n') { 1377 if ((peekc = Bgetrune(f)) < 0) { 1378 Bterm(f); 1379 if (fhead == 0) 1380 dolflag = 1; 1381 } 1382 *p = '\0'; 1383 return p; 1384 } 1385 if (c && p < lbend) 1386 *p++ = c; 1387 } 1388 peekc = 0; 1389 } while (opendata() > 0); /* Switch to next stream */ 1390 return 0; 1391 } 1392 1393 /* Data file input section - the intent is to transparently 1394 * catenate all data input streams. 1395 */ 1396 void 1397 enroll(char *filename) /* Add a file to the input file cache */ 1398 { 1399 FileCache *fp; 1400 1401 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) 1402 quit("Out of memory", 0); 1403 if (ftail == 0) 1404 fhead = fp; 1405 else 1406 ftail->next = fp; 1407 ftail = fp; 1408 fp->next = 0; 1409 fp->name = filename; /* 0 => stdin */ 1410 } 1411 1412 int 1413 opendata(void) 1414 { 1415 if (fhead == 0) 1416 return -1; 1417 if (fhead->name) { 1418 if ((f = Bopen(fhead->name, OREAD)) == 0) 1419 quit("Can't open %s", fhead->name); 1420 } else { 1421 Binit(&stdin, 0, OREAD); 1422 f = &stdin; 1423 } 1424 fhead = fhead->next; 1425 return 1; 1426 } 1427