1 /*
2 * sed -- stream editor
3 */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8
9 enum {
10 DEPTH = 20, /* max nesting depth of {} */
11 MAXCMDS = 512, /* max sed commands */
12 ADDSIZE = 10000, /* size of add & read buffer */
13 MAXADDS = 20, /* max pending adds and reads */
14 LBSIZE = 8192, /* input line size */
15 LABSIZE = 50, /* max number of labels */
16 MAXSUB = 10, /* max number of sub reg exp */
17 MAXFILES = 120, /* max output files */
18 };
19
20 /*
21 * An address is a line #, a R.E., "$", a reference to the last
22 * R.E., or nothing.
23 */
24 typedef struct {
25 enum {
26 A_NONE,
27 A_DOL,
28 A_LINE,
29 A_RE,
30 A_LAST,
31 }type;
32 union {
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
35 };
36 } Addr;
37
38 typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
41 union {
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
45 };
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
53 } SedCom;
54
55 /* Command Codes for field SedCom.command */
56 #define ACOM 01
57 #define BCOM 020
58 #define CCOM 02
59 #define CDCOM 025
60 #define CNCOM 022
61 #define COCOM 017
62 #define CPCOM 023
63 #define DCOM 03
64 #define ECOM 015
65 #define EQCOM 013
66 #define FCOM 016
67 #define GCOM 027
68 #define CGCOM 030
69 #define HCOM 031
70 #define CHCOM 032
71 #define ICOM 04
72 #define LCOM 05
73 #define NCOM 012
74 #define PCOM 010
75 #define QCOM 011
76 #define RCOM 06
77 #define SCOM 07
78 #define TCOM 021
79 #define WCOM 014
80 #define CWCOM 024
81 #define YCOM 026
82 #define XCOM 033
83
84 typedef struct label { /* Label symbol table */
85 Rune uninm[9]; /* Label name */
86 SedCom *chain;
87 SedCom *address; /* Command associated with label */
88 } Label;
89
90 typedef struct FILE_CACHE { /* Data file control block */
91 struct FILE_CACHE *next; /* Forward Link */
92 char *name; /* Name of file */
93 } FileCache;
94
95 SedCom pspace[MAXCMDS]; /* Command storage */
96 SedCom *pend = pspace+MAXCMDS; /* End of command storage */
97 SedCom *rep = pspace; /* Current fill point */
98
99 Reprog *lastre = 0; /* Last regular expression */
100 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
101
102 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
103 Rune *addend = addspace+ADDSIZE;
104
105 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
106 SedCom **aptr = abuf;
107
108 struct { /* Sed program input control block */
109 enum PTYPE { /* Either on command line or in file */
110 P_ARG,
111 P_FILE,
112 } type;
113 union PCTL { /* Pointer to data */
114 Biobuf *bp;
115 char *curr;
116 };
117 } prog;
118
119 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
120
121 FileCache *fhead = 0; /* Head of File Cache Chain */
122 FileCache *ftail = 0; /* Tail of File Cache Chain */
123
124 Rune *loc1; /* Start of pattern match */
125 Rune *loc2; /* End of pattern match */
126 Rune seof; /* Pattern delimiter char */
127
128 Rune linebuf[LBSIZE+1]; /* Input data buffer */
129 Rune *lbend = linebuf+LBSIZE; /* End of buffer */
130 Rune *spend = linebuf; /* End of input data */
131 Rune *cp; /* Current scan point in linebuf */
132
133 Rune holdsp[LBSIZE+1]; /* Hold buffer */
134 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
135 Rune *hspend = holdsp; /* End of hold data */
136
137 int nflag; /* Command line flags */
138 int gflag;
139
140 int dolflag; /* Set when at true EOF */
141 int sflag; /* Set when substitution done */
142 int jflag; /* Set when jump required */
143 int delflag; /* Delete current line when set */
144
145 long lnum = 0; /* Input line count */
146
147 char fname[MAXFILES][40]; /* File name cache */
148 Biobuf *fcode[MAXFILES]; /* File ID cache */
149 int nfiles = 0; /* Cache fill point */
150
151 Biobuf fout; /* Output stream */
152 Biobuf stdin; /* Default input */
153 Biobuf* f = 0; /* Input data */
154
155 Label ltab[LABSIZE]; /* Label name symbol table */
156 Label *labend = ltab+LABSIZE; /* End of label table */
157 Label *lab = ltab+1; /* Current Fill point */
158
159 int depth = 0; /* {} stack pointer */
160
161 Rune bad; /* Dummy err ptr reference */
162 Rune *badp = &bad;
163
164
165 char CGMES[] = "%S command garbled: %S";
166 char TMMES[] = "Too much text: %S";
167 char LTL[] = "Label too long: %S";
168 char AD0MES[] = "No addresses allowed: %S";
169 char AD1MES[] = "Only one address allowed: %S";
170
171 void address(Addr *);
172 void arout(void);
173 int cmp(char *, char *);
174 int rcmp(Rune *, Rune *);
175 void command(SedCom *);
176 Reprog *compile(void);
177 Rune *compsub(Rune *, Rune *);
178 void dechain(void);
179 void dosub(Rune *);
180 int ecmp(Rune *, Rune *, int);
181 void enroll(char *);
182 void errexit(void);
183 int executable(SedCom *);
184 void execute(void);
185 void fcomp(void);
186 long getrune(void);
187 Rune *gline(Rune *);
188 int match(Reprog *, Rune *);
189 void newfile(enum PTYPE, char *);
190 int opendata(void);
191 Biobuf *open_file(char *);
192 Rune *place(Rune *, Rune *, Rune *);
193 void quit(char *, ...);
194 int rline(Rune *, Rune *);
195 Label *search(Label *);
196 int substitute(SedCom *);
197 char *text(char *);
198 Rune *stext(Rune *, Rune *);
199 int ycomp(SedCom *);
200 char * trans(int c);
201 void putline(Biobuf *bp, Rune *buf, int n);
202
203 void
main(int argc,char ** argv)204 main(int argc, char **argv)
205 {
206 int compfl;
207
208 lnum = 0;
209 Binit(&fout, 1, OWRITE);
210 fcode[nfiles++] = &fout;
211 compfl = 0;
212
213 if(argc == 1)
214 exits(0);
215 ARGBEGIN{
216 case 'e':
217 if (argc <= 1)
218 quit("missing pattern");
219 newfile(P_ARG, ARGF());
220 fcomp();
221 compfl = 1;
222 continue;
223 case 'f':
224 if(argc <= 1)
225 quit("no pattern-file");
226 newfile(P_FILE, ARGF());
227 fcomp();
228 compfl = 1;
229 continue;
230 case 'g':
231 gflag++;
232 continue;
233 case 'n':
234 nflag++;
235 continue;
236 default:
237 fprint(2, "sed: Unknown flag: %c\n", ARGC());
238 continue;
239 } ARGEND
240
241 if(compfl == 0) {
242 if (--argc < 0)
243 quit("missing pattern");
244 newfile(P_ARG, *argv++);
245 fcomp();
246 }
247
248 if(depth)
249 quit("Too many {'s");
250
251 ltab[0].address = rep;
252
253 dechain();
254
255 if(argc <= 0)
256 enroll(0); /* Add stdin to cache */
257 else
258 while(--argc >= 0)
259 enroll(*argv++);
260 execute();
261 exits(0);
262 }
263
264 void
fcomp(void)265 fcomp(void)
266 {
267 int i;
268 Label *lpt;
269 Rune *tp;
270 SedCom *pt, *pt1;
271 static Rune *p = addspace;
272 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
273
274 while (rline(linebuf, lbend) >= 0) {
275 cp = linebuf;
276 comploop:
277 while(*cp == L' ' || *cp == L'\t')
278 cp++;
279 if(*cp == L'\0' || *cp == L'#')
280 continue;
281 if(*cp == L';') {
282 cp++;
283 goto comploop;
284 }
285
286 address(&rep->ad1);
287 if (rep->ad1.type != A_NONE) {
288 if (rep->ad1.type == A_LAST) {
289 if (!lastre)
290 quit("First RE may not be null");
291 rep->ad1.type = A_RE;
292 rep->ad1.rp = lastre;
293 }
294 if(*cp == L',' || *cp == L';') {
295 cp++;
296 address(&rep->ad2);
297 if (rep->ad2.type == A_LAST) {
298 rep->ad2.type = A_RE;
299 rep->ad2.rp = lastre;
300 }
301 } else
302 rep->ad2.type = A_NONE;
303 }
304 while(*cp == L' ' || *cp == L'\t')
305 cp++;
306
307 swit:
308 switch(*cp++) {
309 default:
310 quit("Unrecognized command: %S", linebuf);
311
312 case '!':
313 rep->negfl = 1;
314 goto swit;
315
316 case '{':
317 rep->command = BCOM;
318 rep->negfl = !rep->negfl;
319 cmpend[depth++] = &rep->lb1;
320 if(++rep >= pend)
321 quit("Too many commands: %S", linebuf);
322 if(*cp == '\0')
323 continue;
324 goto comploop;
325
326 case '}':
327 if(rep->ad1.type != A_NONE)
328 quit(AD0MES, linebuf);
329 if(--depth < 0)
330 quit("Too many }'s");
331 *cmpend[depth] = rep;
332 if(*cp == 0)
333 continue;
334 goto comploop;
335
336 case '=':
337 rep->command = EQCOM;
338 if(rep->ad2.type != A_NONE)
339 quit(AD1MES, linebuf);
340 break;
341
342 case ':':
343 if(rep->ad1.type != A_NONE)
344 quit(AD0MES, linebuf);
345
346 while(*cp == L' ')
347 cp++;
348 tp = lab->uninm;
349 while (*cp && *cp != L';' && *cp != L' ' &&
350 *cp != L'\t' && *cp != L'#') {
351 *tp++ = *cp++;
352 if(tp >= &lab->uninm[8])
353 quit(LTL, linebuf);
354 }
355 *tp = L'\0';
356
357 if (*lab->uninm == L'\0') /* no label? */
358 quit(CGMES, L":", linebuf);
359 if(lpt = search(lab)) {
360 if(lpt->address)
361 quit("Duplicate labels: %S", linebuf);
362 } else {
363 lab->chain = 0;
364 lpt = lab;
365 if(++lab >= labend)
366 quit("Too many labels: %S", linebuf);
367 }
368 lpt->address = rep;
369 if (*cp == L'#')
370 continue;
371 rep--; /* reuse this slot */
372 break;
373
374 case 'a':
375 rep->command = ACOM;
376 if(rep->ad2.type != A_NONE)
377 quit(AD1MES, linebuf);
378 if(*cp == L'\\')
379 cp++;
380 if(*cp++ != L'\n')
381 quit(CGMES, L"a", linebuf);
382 rep->text = p;
383 p = stext(p, addend);
384 break;
385 case 'c':
386 rep->command = CCOM;
387 if(*cp == L'\\')
388 cp++;
389 if(*cp++ != L'\n')
390 quit(CGMES, L"c", linebuf);
391 rep->text = p;
392 p = stext(p, addend);
393 break;
394 case 'i':
395 rep->command = ICOM;
396 if(rep->ad2.type != A_NONE)
397 quit(AD1MES, linebuf);
398 if(*cp == L'\\')
399 cp++;
400 if(*cp++ != L'\n')
401 quit(CGMES, L"i", linebuf);
402 rep->text = p;
403 p = stext(p, addend);
404 break;
405
406 case 'g':
407 rep->command = GCOM;
408 break;
409
410 case 'G':
411 rep->command = CGCOM;
412 break;
413
414 case 'h':
415 rep->command = HCOM;
416 break;
417
418 case 'H':
419 rep->command = CHCOM;
420 break;
421
422 case 't':
423 rep->command = TCOM;
424 goto jtcommon;
425
426 case 'b':
427 rep->command = BCOM;
428 jtcommon:
429 while(*cp == L' ')
430 cp++;
431 if(*cp == L'\0' || *cp == L';') {
432 /* no label; jump to end */
433 if(pt = ltab[0].chain) {
434 while((pt1 = pt->lb1) != nil)
435 pt = pt1;
436 pt->lb1 = rep;
437 } else
438 ltab[0].chain = rep;
439 break;
440 }
441
442 /* copy label into lab->uninm */
443 tp = lab->uninm;
444 while((*tp = *cp++) != L'\0' && *tp != L';')
445 if(++tp >= &lab->uninm[8])
446 quit(LTL, linebuf);
447 cp--;
448 *tp = L'\0';
449
450 if (*lab->uninm == L'\0')
451 /* shouldn't get here */
452 quit(CGMES, L"b or t", linebuf);
453 if((lpt = search(lab)) != nil) {
454 if(lpt->address)
455 rep->lb1 = lpt->address;
456 else {
457 for(pt = lpt->chain; pt != nil &&
458 (pt1 = pt->lb1) != nil; pt = pt1)
459 ;
460 if (pt)
461 pt->lb1 = rep;
462 }
463 } else { /* add new label */
464 lab->chain = rep;
465 lab->address = 0;
466 if(++lab >= labend)
467 quit("Too many labels: %S", linebuf);
468 }
469 break;
470
471 case 'n':
472 rep->command = NCOM;
473 break;
474
475 case 'N':
476 rep->command = CNCOM;
477 break;
478
479 case 'p':
480 rep->command = PCOM;
481 break;
482
483 case 'P':
484 rep->command = CPCOM;
485 break;
486
487 case 'r':
488 rep->command = RCOM;
489 if(rep->ad2.type != A_NONE)
490 quit(AD1MES, linebuf);
491 if(*cp++ != L' ')
492 quit(CGMES, L"r", linebuf);
493 rep->text = p;
494 p = stext(p, addend);
495 break;
496
497 case 'd':
498 rep->command = DCOM;
499 break;
500
501 case 'D':
502 rep->command = CDCOM;
503 rep->lb1 = pspace;
504 break;
505
506 case 'q':
507 rep->command = QCOM;
508 if(rep->ad2.type != A_NONE)
509 quit(AD1MES, linebuf);
510 break;
511
512 case 'l':
513 rep->command = LCOM;
514 break;
515
516 case 's':
517 rep->command = SCOM;
518 seof = *cp++;
519 if ((rep->re1 = compile()) == 0) {
520 if(!lastre)
521 quit("First RE may not be null.");
522 rep->re1 = lastre;
523 }
524 rep->rhs = p;
525 if((p = compsub(p, addend)) == 0)
526 quit(CGMES, L"s", linebuf);
527 if(*cp == L'g') {
528 cp++;
529 rep->gfl++;
530 } else if(gflag)
531 rep->gfl++;
532
533 if(*cp == L'p') {
534 cp++;
535 rep->pfl = 1;
536 }
537
538 if(*cp == L'P') {
539 cp++;
540 rep->pfl = 2;
541 }
542
543 if(*cp == L'w') {
544 cp++;
545 if(*cp++ != L' ')
546 quit(CGMES, L"s", linebuf);
547 text(fname[nfiles]);
548 for(i = nfiles - 1; i >= 0; i--)
549 if(cmp(fname[nfiles], fname[i]) == 0) {
550 rep->fcode = fcode[i];
551 goto done;
552 }
553 if(nfiles >= MAXFILES)
554 quit("Too many files in w commands 1");
555 rep->fcode = open_file(fname[nfiles]);
556 }
557 break;
558
559 case 'w':
560 rep->command = WCOM;
561 if(*cp++ != L' ')
562 quit(CGMES, L"w", linebuf);
563 text(fname[nfiles]);
564 for(i = nfiles - 1; i >= 0; i--)
565 if(cmp(fname[nfiles], fname[i]) == 0) {
566 rep->fcode = fcode[i];
567 goto done;
568 }
569 if(nfiles >= MAXFILES){
570 fprint(2, "sed: Too many files in w commands 2 \n");
571 fprint(2, "nfiles = %d; MAXF = %d\n",
572 nfiles, MAXFILES);
573 errexit();
574 }
575 rep->fcode = open_file(fname[nfiles]);
576 break;
577
578 case 'x':
579 rep->command = XCOM;
580 break;
581
582 case 'y':
583 rep->command = YCOM;
584 seof = *cp++;
585 if (ycomp(rep) == 0)
586 quit(CGMES, L"y", linebuf);
587 break;
588
589 }
590 done:
591 if(++rep >= pend)
592 quit("Too many commands, last: %S", linebuf);
593 if(*cp++ != L'\0') {
594 if(cp[-1] == L';')
595 goto comploop;
596 quit(CGMES, cp - 1, linebuf);
597 }
598 }
599 }
600
601 Biobuf *
open_file(char * name)602 open_file(char *name)
603 {
604 int fd;
605 Biobuf *bp;
606
607 if ((bp = malloc(sizeof(Biobuf))) == 0)
608 quit("Out of memory");
609 if ((fd = open(name, OWRITE)) < 0 &&
610 (fd = create(name, OWRITE, 0666)) < 0)
611 quit("Cannot create %s", name);
612 Binit(bp, fd, OWRITE);
613 Bseek(bp, 0, 2);
614 fcode[nfiles++] = bp;
615 return bp;
616 }
617
618 Rune *
compsub(Rune * rhs,Rune * end)619 compsub(Rune *rhs, Rune *end)
620 {
621 Rune r;
622
623 while ((r = *cp++) != '\0') {
624 if(r == '\\') {
625 if (rhs < end)
626 *rhs++ = Runemax;
627 else
628 return 0;
629 r = *cp++;
630 if(r == 'n')
631 r = '\n';
632 } else {
633 if(r == seof) {
634 if (rhs < end)
635 *rhs++ = '\0';
636 else
637 return 0;
638 return rhs;
639 }
640 }
641 if (rhs < end)
642 *rhs++ = r;
643 else
644 return 0;
645 }
646 return 0;
647 }
648
649 Reprog *
compile(void)650 compile(void)
651 {
652 Rune c;
653 char *ep;
654 char expbuf[512];
655
656 if((c = *cp++) == seof) /* L'//' */
657 return 0;
658 ep = expbuf;
659 do {
660 if (c == L'\0' || c == L'\n')
661 quit(TMMES, linebuf);
662 if (c == L'\\') {
663 if (ep >= expbuf+sizeof(expbuf))
664 quit(TMMES, linebuf);
665 ep += runetochar(ep, &c);
666 if ((c = *cp++) == L'n')
667 c = L'\n';
668 }
669 if (ep >= expbuf + sizeof(expbuf))
670 quit(TMMES, linebuf);
671 ep += runetochar(ep, &c);
672 } while ((c = *cp++) != seof);
673 *ep = 0;
674 return lastre = regcomp(expbuf);
675 }
676
677 void
regerror(char * s)678 regerror(char *s)
679 {
680 USED(s);
681 quit(CGMES, L"r.e.-using", linebuf);
682 }
683
684 void
newfile(enum PTYPE type,char * name)685 newfile(enum PTYPE type, char *name)
686 {
687 if (type == P_ARG)
688 prog.curr = name;
689 else if ((prog.bp = Bopen(name, OREAD)) == 0)
690 quit("Cannot open pattern-file: %s\n", name);
691 prog.type = type;
692 }
693
694 int
rline(Rune * buf,Rune * end)695 rline(Rune *buf, Rune *end)
696 {
697 long c;
698 Rune r;
699
700 while ((c = getrune()) >= 0) {
701 r = c;
702 if (r == '\\') {
703 if (buf <= end)
704 *buf++ = r;
705 if ((c = getrune()) < 0)
706 break;
707 r = c;
708 } else if (r == '\n') {
709 *buf = '\0';
710 return 1;
711 }
712 if (buf <= end)
713 *buf++ = r;
714 }
715 *buf = '\0';
716 return -1;
717 }
718
719 long
getrune(void)720 getrune(void)
721 {
722 long c;
723 Rune r;
724 char *p;
725
726 if (prog.type == P_ARG) {
727 if ((p = prog.curr) != 0) {
728 if (*p) {
729 prog.curr += chartorune(&r, p);
730 c = r;
731 } else {
732 c = '\n'; /* fake an end-of-line */
733 prog.curr = 0;
734 }
735 } else
736 c = -1;
737 } else if ((c = Bgetrune(prog.bp)) < 0)
738 Bterm(prog.bp);
739 return c;
740 }
741
742 void
address(Addr * ap)743 address(Addr *ap)
744 {
745 int c;
746 long lno;
747
748 if((c = *cp++) == '$')
749 ap->type = A_DOL;
750 else if(c == '/') {
751 seof = c;
752 if (ap->rp = compile())
753 ap->type = A_RE;
754 else
755 ap->type = A_LAST;
756 }
757 else if (c >= '0' && c <= '9') {
758 lno = c - '0';
759 while ((c = *cp) >= '0' && c <= '9')
760 lno = lno*10 + *cp++ - '0';
761 if(!lno)
762 quit("line number 0 is illegal",0);
763 ap->type = A_LINE;
764 ap->line = lno;
765 }
766 else {
767 cp--;
768 ap->type = A_NONE;
769 }
770 }
771
cmp(char * a,char * b)772 cmp(char *a, char *b) /* compare characters */
773 {
774 while(*a == *b++)
775 if (*a == '\0')
776 return 0;
777 else
778 a++;
779 return 1;
780 }
rcmp(Rune * a,Rune * b)781 rcmp(Rune *a, Rune *b) /* compare runes */
782 {
783 while(*a == *b++)
784 if (*a == '\0')
785 return 0;
786 else
787 a++;
788 return 1;
789 }
790
791 char *
text(char * p)792 text(char *p) /* extract character string */
793 {
794 Rune r;
795
796 while(*cp == ' ' || *cp == '\t')
797 cp++;
798 while (*cp) {
799 if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
800 break;
801 if (r == '\n')
802 while (*cp == ' ' || *cp == '\t')
803 cp++;
804 p += runetochar(p, &r);
805 }
806 *p++ = '\0';
807 return p;
808 }
809
810 Rune *
stext(Rune * p,Rune * end)811 stext(Rune *p, Rune *end) /* extract rune string */
812 {
813 while(*cp == L' ' || *cp == L'\t')
814 cp++;
815 while (*cp) {
816 if (*cp == L'\\' && *++cp == L'\0')
817 break;
818 if (p >= end-1)
819 quit(TMMES, linebuf);
820 if ((*p++ = *cp++) == L'\n')
821 while(*cp == L' ' || *cp == L'\t')
822 cp++;
823 }
824 *p++ = 0;
825 return p;
826 }
827
828
829 Label *
search(Label * ptr)830 search(Label *ptr)
831 {
832 Label *rp;
833
834 for (rp = ltab; rp < ptr; rp++)
835 if(rcmp(rp->uninm, ptr->uninm) == 0)
836 return(rp);
837 return(0);
838 }
839
840 void
dechain(void)841 dechain(void)
842 {
843 Label *lptr;
844 SedCom *rptr, *trptr;
845
846 for(lptr = ltab; lptr < lab; lptr++) {
847 if(lptr->address == 0)
848 quit("Undefined label: %S", lptr->uninm);
849 if(lptr->chain) {
850 rptr = lptr->chain;
851 while((trptr = rptr->lb1) != nil) {
852 rptr->lb1 = lptr->address;
853 rptr = trptr;
854 }
855 rptr->lb1 = lptr->address;
856 }
857 }
858 }
859
860 int
ycomp(SedCom * r)861 ycomp(SedCom *r)
862 {
863 int i;
864 Rune *rp, *sp, *tsp;
865 Rune c, highc;
866
867 highc = 0;
868 for(tsp = cp; *tsp != seof; tsp++) {
869 if(*tsp == L'\\')
870 tsp++;
871 if(*tsp == L'\n' || *tsp == L'\0')
872 return 0;
873 if (*tsp > highc)
874 highc = *tsp;
875 }
876 tsp++;
877 if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
878 quit("Out of memory");
879 *rp++ = highc; /* save upper bound */
880 for (i = 0; i <= highc; i++)
881 rp[i] = i;
882 sp = cp;
883 while((c = *sp++) != seof) {
884 if(c == L'\\' && *sp == L'n') {
885 sp++;
886 c = L'\n';
887 }
888 if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
889 rp[c] = L'\n';
890 tsp++;
891 }
892 if(rp[c] == seof || rp[c] == L'\0') {
893 free(r->re1);
894 r->re1 = nil;
895 return 0;
896 }
897 }
898 if(*tsp != seof) {
899 free(r->re1);
900 r->re1 = nil;
901 return 0;
902 }
903 cp = tsp+1;
904 return 1;
905 }
906
907 void
execute(void)908 execute(void)
909 {
910 SedCom *ipc;
911
912 while (spend = gline(linebuf)){
913 for(ipc = pspace; ipc->command; ) {
914 if (!executable(ipc)) {
915 ipc++;
916 continue;
917 }
918 command(ipc);
919
920 if(delflag)
921 break;
922 if(jflag) {
923 jflag = 0;
924 if((ipc = ipc->lb1) == 0)
925 break;
926 } else
927 ipc++;
928 }
929 if(!nflag && !delflag)
930 putline(&fout, linebuf, spend - linebuf);
931 if(aptr > abuf)
932 arout();
933 delflag = 0;
934 }
935 }
936
937 /* determine if a statement should be applied to an input line */
938 int
executable(SedCom * ipc)939 executable(SedCom *ipc)
940 {
941 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
942 if (ipc->active == 1) /* Second line */
943 ipc->active = 2;
944 switch(ipc->ad2.type) {
945 case A_NONE: /* No second addr; use first */
946 ipc->active = 0;
947 break;
948 case A_DOL: /* Accept everything */
949 return !ipc->negfl;
950 case A_LINE: /* Line at end of range? */
951 if (lnum <= ipc->ad2.line) {
952 if (ipc->ad2.line == lnum)
953 ipc->active = 0;
954 return !ipc->negfl;
955 }
956 ipc->active = 0; /* out of range */
957 return ipc->negfl;
958 case A_RE: /* Check for matching R.E. */
959 if (match(ipc->ad2.rp, linebuf))
960 ipc->active = 0;
961 return !ipc->negfl;
962 default:
963 quit("Internal error");
964 }
965 }
966 switch (ipc->ad1.type) { /* Check first address */
967 case A_NONE: /* Everything matches */
968 return !ipc->negfl;
969 case A_DOL: /* Only last line */
970 if (dolflag)
971 return !ipc->negfl;
972 break;
973 case A_LINE: /* Check line number */
974 if (ipc->ad1.line == lnum) {
975 ipc->active = 1; /* In range */
976 return !ipc->negfl;
977 }
978 break;
979 case A_RE: /* Check R.E. */
980 if (match(ipc->ad1.rp, linebuf)) {
981 ipc->active = 1; /* In range */
982 return !ipc->negfl;
983 }
984 break;
985 default:
986 quit("Internal error");
987 }
988 return ipc->negfl;
989 }
990
991 int
match(Reprog * pattern,Rune * buf)992 match(Reprog *pattern, Rune *buf)
993 {
994 if (!pattern)
995 return 0;
996 subexp[0].rsp = buf;
997 subexp[0].ep = 0;
998 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
999 loc1 = subexp[0].rsp;
1000 loc2 = subexp[0].rep;
1001 return 1;
1002 }
1003 loc1 = loc2 = 0;
1004 return 0;
1005 }
1006
1007 int
substitute(SedCom * ipc)1008 substitute(SedCom *ipc)
1009 {
1010 int len;
1011
1012 if(!match(ipc->re1, linebuf))
1013 return 0;
1014
1015 /*
1016 * we have at least one match. some patterns, e.g. '$' or '^', can
1017 * produce 0-length matches, so during a global substitute we must
1018 * bump to the character after a 0-length match to keep from looping.
1019 */
1020 sflag = 1;
1021 if(ipc->gfl == 0) /* single substitution */
1022 dosub(ipc->rhs);
1023 else
1024 do{ /* global substitution */
1025 len = loc2 - loc1; /* length of match */
1026 dosub(ipc->rhs); /* dosub moves loc2 */
1027 if(*loc2 == 0) /* end of string */
1028 break;
1029 if(len == 0) /* zero-length R.E. match */
1030 loc2++; /* bump over 0-length match */
1031 if(*loc2 == 0) /* end of string */
1032 break;
1033 } while(match(ipc->re1, loc2));
1034 return 1;
1035 }
1036
1037 void
dosub(Rune * rhsbuf)1038 dosub(Rune *rhsbuf)
1039 {
1040 int c, n;
1041 Rune *lp, *sp, *rp;
1042
1043 lp = linebuf;
1044 sp = genbuf;
1045 rp = rhsbuf;
1046 while (lp < loc1)
1047 *sp++ = *lp++;
1048 while(c = *rp++) {
1049 if (c == '&') {
1050 sp = place(sp, loc1, loc2);
1051 continue;
1052 }
1053 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1054 n = c-'0';
1055 if (subexp[n].rsp && subexp[n].rep) {
1056 sp = place(sp, subexp[n].rsp, subexp[n].rep);
1057 continue;
1058 }
1059 else {
1060 fprint(2, "sed: Invalid back reference \\%d\n",n);
1061 errexit();
1062 }
1063 }
1064 *sp++ = c;
1065 if (sp >= &genbuf[LBSIZE])
1066 fprint(2, "sed: Output line too long.\n");
1067 }
1068 lp = loc2;
1069 loc2 = sp - genbuf + linebuf;
1070 while (*sp++ = *lp++)
1071 if (sp >= &genbuf[LBSIZE])
1072 fprint(2, "sed: Output line too long.\n");
1073 lp = linebuf;
1074 sp = genbuf;
1075 while (*lp++ = *sp++)
1076 ;
1077 spend = lp - 1;
1078 }
1079
1080 Rune *
place(Rune * sp,Rune * l1,Rune * l2)1081 place(Rune *sp, Rune *l1, Rune *l2)
1082 {
1083 while (l1 < l2) {
1084 *sp++ = *l1++;
1085 if (sp >= &genbuf[LBSIZE])
1086 fprint(2, "sed: Output line too long.\n");
1087 }
1088 return sp;
1089 }
1090
1091 char *
trans(int c)1092 trans(int c)
1093 {
1094 static char buf[] = "\\x0000";
1095 static char hex[] = "0123456789abcdef";
1096
1097 switch(c) {
1098 case '\b':
1099 return "\\b";
1100 case '\n':
1101 return "\\n";
1102 case '\r':
1103 return "\\r";
1104 case '\t':
1105 return "\\t";
1106 case '\\':
1107 return "\\\\";
1108 }
1109 buf[2] = hex[(c>>12)&0xF];
1110 buf[3] = hex[(c>>8)&0xF];
1111 buf[4] = hex[(c>>4)&0xF];
1112 buf[5] = hex[c&0xF];
1113 return buf;
1114 }
1115
1116 void
command(SedCom * ipc)1117 command(SedCom *ipc)
1118 {
1119 int i, c;
1120 char *ucp;
1121 Rune *execp, *p1, *p2, *rp;
1122
1123 switch(ipc->command) {
1124 case ACOM:
1125 *aptr++ = ipc;
1126 if(aptr >= abuf+MAXADDS)
1127 quit("sed: Too many appends after line %ld\n",
1128 (char *)lnum);
1129 *aptr = 0;
1130 break;
1131 case CCOM:
1132 delflag = 1;
1133 if(ipc->active == 1) {
1134 for(rp = ipc->text; *rp; rp++)
1135 Bputrune(&fout, *rp);
1136 Bputc(&fout, '\n');
1137 }
1138 break;
1139 case DCOM:
1140 delflag++;
1141 break;
1142 case CDCOM:
1143 p1 = p2 = linebuf;
1144 while(*p1 != '\n') {
1145 if(*p1++ == 0) {
1146 delflag++;
1147 return;
1148 }
1149 }
1150 p1++;
1151 while(*p2++ = *p1++)
1152 ;
1153 spend = p2 - 1;
1154 jflag++;
1155 break;
1156 case EQCOM:
1157 Bprint(&fout, "%ld\n", lnum);
1158 break;
1159 case GCOM:
1160 p1 = linebuf;
1161 p2 = holdsp;
1162 while(*p1++ = *p2++)
1163 ;
1164 spend = p1 - 1;
1165 break;
1166 case CGCOM:
1167 *spend++ = '\n';
1168 p1 = spend;
1169 p2 = holdsp;
1170 while(*p1++ = *p2++)
1171 if(p1 >= lbend)
1172 break;
1173 spend = p1 - 1;
1174 break;
1175 case HCOM:
1176 p1 = holdsp;
1177 p2 = linebuf;
1178 while(*p1++ = *p2++);
1179 hspend = p1 - 1;
1180 break;
1181 case CHCOM:
1182 *hspend++ = '\n';
1183 p1 = hspend;
1184 p2 = linebuf;
1185 while(*p1++ = *p2++)
1186 if(p1 >= hend)
1187 break;
1188 hspend = p1 - 1;
1189 break;
1190 case ICOM:
1191 for(rp = ipc->text; *rp; rp++)
1192 Bputrune(&fout, *rp);
1193 Bputc(&fout, '\n');
1194 break;
1195 case BCOM:
1196 jflag = 1;
1197 break;
1198 case LCOM:
1199 c = 0;
1200 for (i = 0, rp = linebuf; *rp; rp++) {
1201 c = *rp;
1202 if(c >= 0x20 && c < 0x7F && c != '\\') {
1203 Bputc(&fout, c);
1204 if(i++ > 71) {
1205 Bprint(&fout, "\\\n");
1206 i = 0;
1207 }
1208 } else {
1209 for (ucp = trans(*rp); *ucp; ucp++){
1210 c = *ucp;
1211 Bputc(&fout, c);
1212 if(i++ > 71) {
1213 Bprint(&fout, "\\\n");
1214 i = 0;
1215 }
1216 }
1217 }
1218 }
1219 if(c == ' ')
1220 Bprint(&fout, "\\n");
1221 Bputc(&fout, '\n');
1222 break;
1223 case NCOM:
1224 if(!nflag)
1225 putline(&fout, linebuf, spend-linebuf);
1226
1227 if(aptr > abuf)
1228 arout();
1229 if((execp = gline(linebuf)) == 0) {
1230 delflag = 1;
1231 break;
1232 }
1233 spend = execp;
1234 break;
1235 case CNCOM:
1236 if(aptr > abuf)
1237 arout();
1238 *spend++ = '\n';
1239 if((execp = gline(spend)) == 0) {
1240 delflag = 1;
1241 break;
1242 }
1243 spend = execp;
1244 break;
1245 case PCOM:
1246 putline(&fout, linebuf, spend-linebuf);
1247 break;
1248 case CPCOM:
1249 cpcom:
1250 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1251 Bputc(&fout, *rp);
1252 Bputc(&fout, '\n');
1253 break;
1254 case QCOM:
1255 if(!nflag)
1256 putline(&fout, linebuf, spend-linebuf);
1257 if(aptr > abuf)
1258 arout();
1259 exits(0);
1260 case RCOM:
1261 *aptr++ = ipc;
1262 if(aptr >= &abuf[MAXADDS])
1263 quit("sed: Too many reads after line %ld\n",
1264 (char *)lnum);
1265 *aptr = 0;
1266 break;
1267 case SCOM:
1268 i = substitute(ipc);
1269 if(i && ipc->pfl)
1270 if(ipc->pfl == 1)
1271 putline(&fout, linebuf, spend-linebuf);
1272 else
1273 goto cpcom;
1274 if(i && ipc->fcode)
1275 goto wcom;
1276 break;
1277
1278 case TCOM:
1279 if(sflag) {
1280 sflag = 0;
1281 jflag = 1;
1282 }
1283 break;
1284
1285 case WCOM:
1286 wcom:
1287 putline(ipc->fcode,linebuf, spend - linebuf);
1288 break;
1289 case XCOM:
1290 p1 = linebuf;
1291 p2 = genbuf;
1292 while(*p2++ = *p1++)
1293 ;
1294 p1 = holdsp;
1295 p2 = linebuf;
1296 while(*p2++ = *p1++)
1297 ;
1298 spend = p2 - 1;
1299 p1 = genbuf;
1300 p2 = holdsp;
1301 while(*p2++ = *p1++)
1302 ;
1303 hspend = p2 - 1;
1304 break;
1305 case YCOM:
1306 p1 = linebuf;
1307 p2 = ipc->text;
1308 for (i = *p2++; *p1; p1++)
1309 if (*p1 <= i)
1310 *p1 = p2[*p1];
1311 break;
1312 }
1313 }
1314
1315 void
putline(Biobuf * bp,Rune * buf,int n)1316 putline(Biobuf *bp, Rune *buf, int n)
1317 {
1318 while (n--)
1319 Bputrune(bp, *buf++);
1320 Bputc(bp, '\n');
1321 }
ecmp(Rune * a,Rune * b,int count)1322 ecmp(Rune *a, Rune *b, int count)
1323 {
1324 while(count--)
1325 if(*a++ != *b++)
1326 return 0;
1327 return 1;
1328 }
1329
1330 void
arout(void)1331 arout(void)
1332 {
1333 int c;
1334 char *s;
1335 char buf[128];
1336 Rune *p1;
1337 Biobuf *fi;
1338
1339 for (aptr = abuf; *aptr; aptr++) {
1340 if((*aptr)->command == ACOM) {
1341 for(p1 = (*aptr)->text; *p1; p1++ )
1342 Bputrune(&fout, *p1);
1343 Bputc(&fout, '\n');
1344 } else {
1345 for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1346 s += runetochar(s, p1);
1347 *s = '\0';
1348 if((fi = Bopen(buf, OREAD)) == 0)
1349 continue;
1350 while((c = Bgetc(fi)) >= 0)
1351 Bputc(&fout, c);
1352 Bterm(fi);
1353 }
1354 }
1355 aptr = abuf;
1356 *aptr = 0;
1357 }
1358
1359 void
errexit(void)1360 errexit(void)
1361 {
1362 exits("error");
1363 }
1364
1365 void
quit(char * fmt,...)1366 quit(char *fmt, ...)
1367 {
1368 char *p, *ep;
1369 char msg[256];
1370 va_list arg;
1371
1372 ep = msg + sizeof msg;
1373 p = seprint(msg, ep, "sed: ");
1374 va_start(arg, fmt);
1375 p = vseprint(p, ep, fmt, arg);
1376 va_end(arg);
1377 p = seprint(p, ep, "\n");
1378 write(2, msg, p - msg);
1379 errexit();
1380 }
1381
1382 Rune *
gline(Rune * addr)1383 gline(Rune *addr)
1384 {
1385 long c;
1386 Rune *p;
1387 static long peekc = 0;
1388
1389 if (f == 0 && opendata() < 0)
1390 return 0;
1391 sflag = 0;
1392 lnum++;
1393 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1394 do {
1395 p = addr;
1396 for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1397 if (c == '\n') {
1398 if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1399 dolflag = 1;
1400 *p = '\0';
1401 return p;
1402 }
1403 if (c && p < lbend)
1404 *p++ = c;
1405 }
1406 /* return partial final line, adding implicit newline */
1407 if(p != addr) {
1408 *p = '\0';
1409 peekc = -1;
1410 if (fhead == 0)
1411 dolflag = 1;
1412 return p;
1413 }
1414 peekc = 0;
1415 Bterm(f);
1416 } while (opendata() > 0); /* Switch to next stream */
1417 f = 0;
1418 return 0;
1419 }
1420
1421 /*
1422 * Data file input section - the intent is to transparently
1423 * catenate all data input streams.
1424 */
1425 void
enroll(char * filename)1426 enroll(char *filename) /* Add a file to the input file cache */
1427 {
1428 FileCache *fp;
1429
1430 if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1431 quit("Out of memory");
1432 if (ftail == nil)
1433 fhead = fp;
1434 else
1435 ftail->next = fp;
1436 ftail = fp;
1437 fp->next = nil;
1438 fp->name = filename; /* 0 => stdin */
1439 }
1440
1441 int
opendata(void)1442 opendata(void)
1443 {
1444 if (fhead == nil)
1445 return -1;
1446 if (fhead->name) {
1447 if ((f = Bopen(fhead->name, OREAD)) == nil)
1448 quit("Can't open %s", fhead->name);
1449 } else {
1450 Binit(&stdin, 0, OREAD);
1451 f = &stdin;
1452 }
1453 fhead = fhead->next;
1454 return 1;
1455 }
1456