xref: /plan9/sys/src/cmd/sed.c (revision 82726826a7b3d40fb66339b4b0e95b60314f98b9)
1 /*
2  * sed -- stream editor
3  */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <regexp.h>
8 
9 enum {
10 	DEPTH		= 20,		/* max nesting depth of {} */
11 	MAXCMDS		= 512,		/* max sed commands */
12 	ADDSIZE		= 10000,	/* size of add & read buffer */
13 	MAXADDS		= 20,		/* max pending adds and reads */
14 	LBSIZE		= 8192,		/* input line size */
15 	LABSIZE		= 50,		/* max number of labels */
16 	MAXSUB		= 10,		/* max number of sub reg exp */
17 	MAXFILES	= 120,		/* max output files */
18 };
19 
20 /*
21  * An address is a line #, a R.E., "$", a reference to the last
22  * R.E., or nothing.
23  */
24 typedef struct {
25 	enum {
26 		A_NONE,
27 		A_DOL,
28 		A_LINE,
29 		A_RE,
30 		A_LAST,
31 	}type;
32 	union {
33 		long	line;		/* Line # */
34 		Reprog	*rp;		/* Compiled R.E. */
35 	};
36 } Addr;
37 
38 typedef struct	SEDCOM {
39 	Addr	ad1;			/* optional start address */
40 	Addr	ad2;			/* optional end address */
41 	union {
42 		Reprog	*re1;		/* compiled R.E. */
43 		Rune	*text;		/* added text or file name */
44 		struct	SEDCOM	*lb1;	/* destination command of branch */
45 	};
46 	Rune	*rhs;			/* Right-hand side of substitution */
47 	Biobuf*	fcode;			/* File ID for read and write */
48 	char	command;		/* command code -see below */
49 	char	gfl;			/* 'Global' flag for substitutions */
50 	char	pfl;			/* 'print' flag for substitutions */
51 	char	active;			/* 1 => data between start and end */
52 	char	negfl;			/* negation flag */
53 } SedCom;
54 
55 /* Command Codes for field SedCom.command */
56 #define ACOM	01
57 #define BCOM	020
58 #define CCOM	02
59 #define	CDCOM	025
60 #define	CNCOM	022
61 #define COCOM	017
62 #define	CPCOM	023
63 #define DCOM	03
64 #define ECOM	015
65 #define EQCOM	013
66 #define FCOM	016
67 #define GCOM	027
68 #define CGCOM	030
69 #define HCOM	031
70 #define CHCOM	032
71 #define ICOM	04
72 #define LCOM	05
73 #define NCOM	012
74 #define PCOM	010
75 #define QCOM	011
76 #define RCOM	06
77 #define SCOM	07
78 #define TCOM	021
79 #define WCOM	014
80 #define	CWCOM	024
81 #define	YCOM	026
82 #define XCOM	033
83 
84 typedef struct label {			/* Label symbol table */
85 	Rune	uninm[9];		/* Label name */
86 	SedCom	*chain;
87 	SedCom	*address;		/* Command associated with label */
88 } Label;
89 
90 typedef	struct	FILE_CACHE {		/* Data file control block */
91 	struct FILE_CACHE *next;	/* Forward Link */
92 	char	*name;			/* Name of file */
93 } FileCache;
94 
95 SedCom pspace[MAXCMDS];			/* Command storage */
96 SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
97 SedCom *rep = pspace;			/* Current fill point */
98 
99 Reprog	*lastre = 0;			/* Last regular expression */
100 Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
101 
102 Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
103 Rune	*addend = addspace+ADDSIZE;
104 
105 SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
106 SedCom	**aptr = abuf;
107 
108 struct {				/* Sed program input control block */
109 	enum PTYPE { 			/* Either on command line or in file */
110 		P_ARG,
111 		P_FILE,
112 	} type;
113 	union PCTL {			/* Pointer to data */
114 		Biobuf	*bp;
115 		char	*curr;
116 	};
117 } prog;
118 
119 Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
120 
121 FileCache	*fhead = 0;		/* Head of File Cache Chain */
122 FileCache	*ftail = 0;		/* Tail of File Cache Chain */
123 
124 Rune	*loc1;				/* Start of pattern match */
125 Rune	*loc2;				/* End of pattern match */
126 Rune	seof;				/* Pattern delimiter char */
127 
128 Rune	linebuf[LBSIZE+1];		/* Input data buffer */
129 Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
130 Rune	*spend = linebuf;		/* End of input data */
131 Rune	*cp;				/* Current scan point in linebuf */
132 
133 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
134 Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
135 Rune	*hspend = holdsp;		/* End of hold data */
136 
137 int	nflag;				/* Command line flags */
138 int	gflag;
139 
140 int	dolflag;			/* Set when at true EOF */
141 int	sflag;				/* Set when substitution done */
142 int	jflag;				/* Set when jump required */
143 int	delflag;			/* Delete current line when set */
144 
145 long	lnum = 0;			/* Input line count */
146 
147 char	fname[MAXFILES][40];		/* File name cache */
148 Biobuf	*fcode[MAXFILES];		/* File ID cache */
149 int	nfiles = 0;			/* Cache fill point */
150 
151 Biobuf	fout;				/* Output stream */
152 Biobuf	stdin;				/* Default input */
153 Biobuf*	f = 0;				/* Input data */
154 
155 Label	ltab[LABSIZE];			/* Label name symbol table */
156 Label	*labend = ltab+LABSIZE;		/* End of label table */
157 Label	*lab = ltab+1;			/* Current Fill point */
158 
159 int	depth = 0;			/* {} stack pointer */
160 
161 Rune	bad;				/* Dummy err ptr reference */
162 Rune	*badp = &bad;
163 
164 
165 char	CGMES[]	 = 	"%S command garbled: %S";
166 char	TMMES[]	 = 	"Too much text: %S";
167 char	LTL[]	 = 	"Label too long: %S";
168 char	AD0MES[] =	"No addresses allowed: %S";
169 char	AD1MES[] =	"Only one address allowed: %S";
170 
171 void	address(Addr *);
172 void	arout(void);
173 int	cmp(char *, char *);
174 int	rcmp(Rune *, Rune *);
175 void	command(SedCom *);
176 Reprog	*compile(void);
177 Rune	*compsub(Rune *, Rune *);
178 void	dechain(void);
179 void	dosub(Rune *);
180 int	ecmp(Rune *, Rune *, int);
181 void	enroll(char *);
182 void	errexit(void);
183 int	executable(SedCom *);
184 void	execute(void);
185 void	fcomp(void);
186 long	getrune(void);
187 Rune	*gline(Rune *);
188 int	match(Reprog *, Rune *);
189 void	newfile(enum PTYPE, char *);
190 int 	opendata(void);
191 Biobuf	*open_file(char *);
192 Rune	*place(Rune *, Rune *, Rune *);
193 void	quit(char *, ...);
194 int	rline(Rune *, Rune *);
195 Label	*search(Label *);
196 int	substitute(SedCom *);
197 char	*text(char *);
198 Rune	*stext(Rune *, Rune *);
199 int	ycomp(SedCom *);
200 char *	trans(int c);
201 void	putline(Biobuf *bp, Rune *buf, int n);
202 
203 void
main(int argc,char ** argv)204 main(int argc, char **argv)
205 {
206 	int compfl;
207 
208 	lnum = 0;
209 	Binit(&fout, 1, OWRITE);
210 	fcode[nfiles++] = &fout;
211 	compfl = 0;
212 
213 	if(argc == 1)
214 		exits(0);
215 	ARGBEGIN{
216 	case 'e':
217 		if (argc <= 1)
218 			quit("missing pattern");
219 		newfile(P_ARG, ARGF());
220 		fcomp();
221 		compfl = 1;
222 		continue;
223 	case 'f':
224 		if(argc <= 1)
225 			quit("no pattern-file");
226 		newfile(P_FILE, ARGF());
227 		fcomp();
228 		compfl = 1;
229 		continue;
230 	case 'g':
231 		gflag++;
232 		continue;
233 	case 'n':
234 		nflag++;
235 		continue;
236 	default:
237 		fprint(2, "sed: Unknown flag: %c\n", ARGC());
238 		continue;
239 	} ARGEND
240 
241 	if(compfl == 0) {
242 		if (--argc < 0)
243 			quit("missing pattern");
244 		newfile(P_ARG, *argv++);
245 		fcomp();
246 	}
247 
248 	if(depth)
249 		quit("Too many {'s");
250 
251 	ltab[0].address = rep;
252 
253 	dechain();
254 
255 	if(argc <= 0)
256 		enroll(0);		/* Add stdin to cache */
257 	else
258 		while(--argc >= 0)
259 			enroll(*argv++);
260 	execute();
261 	exits(0);
262 }
263 
264 void
fcomp(void)265 fcomp(void)
266 {
267 	int	i;
268 	Label	*lpt;
269 	Rune	*tp;
270 	SedCom	*pt, *pt1;
271 	static Rune	*p = addspace;
272 	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
273 
274 	while (rline(linebuf, lbend) >= 0) {
275 		cp = linebuf;
276 comploop:
277 		while(*cp == L' ' || *cp == L'\t')
278 			cp++;
279 		if(*cp == L'\0' || *cp == L'#')
280 			continue;
281 		if(*cp == L';') {
282 			cp++;
283 			goto comploop;
284 		}
285 
286 		address(&rep->ad1);
287 		if (rep->ad1.type != A_NONE) {
288 			if (rep->ad1.type == A_LAST) {
289 				if (!lastre)
290 					quit("First RE may not be null");
291 				rep->ad1.type = A_RE;
292 				rep->ad1.rp = lastre;
293 			}
294 			if(*cp == L',' || *cp == L';') {
295 				cp++;
296 				address(&rep->ad2);
297 				if (rep->ad2.type == A_LAST) {
298 					rep->ad2.type = A_RE;
299 					rep->ad2.rp = lastre;
300 				}
301 			} else
302 				rep->ad2.type = A_NONE;
303 		}
304 		while(*cp == L' ' || *cp == L'\t')
305 			cp++;
306 
307 swit:
308 		switch(*cp++) {
309 		default:
310 			quit("Unrecognized command: %S", linebuf);
311 
312 		case '!':
313 			rep->negfl = 1;
314 			goto swit;
315 
316 		case '{':
317 			rep->command = BCOM;
318 			rep->negfl = !rep->negfl;
319 			cmpend[depth++] = &rep->lb1;
320 			if(++rep >= pend)
321 				quit("Too many commands: %S", linebuf);
322 			if(*cp == '\0')
323 				continue;
324 			goto comploop;
325 
326 		case '}':
327 			if(rep->ad1.type != A_NONE)
328 				quit(AD0MES, linebuf);
329 			if(--depth < 0)
330 				quit("Too many }'s");
331 			*cmpend[depth] = rep;
332 			if(*cp == 0)
333 				continue;
334 			goto comploop;
335 
336 		case '=':
337 			rep->command = EQCOM;
338 			if(rep->ad2.type != A_NONE)
339 				quit(AD1MES, linebuf);
340 			break;
341 
342 		case ':':
343 			if(rep->ad1.type != A_NONE)
344 				quit(AD0MES, linebuf);
345 
346 			while(*cp == L' ')
347 				cp++;
348 			tp = lab->uninm;
349 			while (*cp && *cp != L';' && *cp != L' ' &&
350 			    *cp != L'\t' && *cp != L'#') {
351 				*tp++ = *cp++;
352 				if(tp >= &lab->uninm[8])
353 					quit(LTL, linebuf);
354 			}
355 			*tp = L'\0';
356 
357 			if (*lab->uninm == L'\0')		/* no label? */
358 				quit(CGMES, L":", linebuf);
359 			if(lpt = search(lab)) {
360 				if(lpt->address)
361 					quit("Duplicate labels: %S", linebuf);
362 			} else {
363 				lab->chain = 0;
364 				lpt = lab;
365 				if(++lab >= labend)
366 					quit("Too many labels: %S", linebuf);
367 			}
368 			lpt->address = rep;
369 			if (*cp == L'#')
370 				continue;
371 			rep--;			/* reuse this slot */
372 			break;
373 
374 		case 'a':
375 			rep->command = ACOM;
376 			if(rep->ad2.type != A_NONE)
377 				quit(AD1MES, linebuf);
378 			if(*cp == L'\\')
379 				cp++;
380 			if(*cp++ != L'\n')
381 				quit(CGMES, L"a", linebuf);
382 			rep->text = p;
383 			p = stext(p, addend);
384 			break;
385 		case 'c':
386 			rep->command = CCOM;
387 			if(*cp == L'\\')
388 				cp++;
389 			if(*cp++ != L'\n')
390 				quit(CGMES, L"c", linebuf);
391 			rep->text = p;
392 			p = stext(p, addend);
393 			break;
394 		case 'i':
395 			rep->command = ICOM;
396 			if(rep->ad2.type != A_NONE)
397 				quit(AD1MES, linebuf);
398 			if(*cp == L'\\')
399 				cp++;
400 			if(*cp++ != L'\n')
401 				quit(CGMES, L"i", linebuf);
402 			rep->text = p;
403 			p = stext(p, addend);
404 			break;
405 
406 		case 'g':
407 			rep->command = GCOM;
408 			break;
409 
410 		case 'G':
411 			rep->command = CGCOM;
412 			break;
413 
414 		case 'h':
415 			rep->command = HCOM;
416 			break;
417 
418 		case 'H':
419 			rep->command = CHCOM;
420 			break;
421 
422 		case 't':
423 			rep->command = TCOM;
424 			goto jtcommon;
425 
426 		case 'b':
427 			rep->command = BCOM;
428 jtcommon:
429 			while(*cp == L' ')
430 				cp++;
431 			if(*cp == L'\0' || *cp == L';') {
432 				/* no label; jump to end */
433 				if(pt = ltab[0].chain) {
434 					while((pt1 = pt->lb1) != nil)
435 						pt = pt1;
436 					pt->lb1 = rep;
437 				} else
438 					ltab[0].chain = rep;
439 				break;
440 			}
441 
442 			/* copy label into lab->uninm */
443 			tp = lab->uninm;
444 			while((*tp = *cp++) != L'\0' && *tp != L';')
445 				if(++tp >= &lab->uninm[8])
446 					quit(LTL, linebuf);
447 			cp--;
448 			*tp = L'\0';
449 
450 			if (*lab->uninm == L'\0')
451 				/* shouldn't get here */
452 				quit(CGMES, L"b or t", linebuf);
453 			if((lpt = search(lab)) != nil) {
454 				if(lpt->address)
455 					rep->lb1 = lpt->address;
456 				else {
457 					for(pt = lpt->chain; pt != nil &&
458 					    (pt1 = pt->lb1) != nil; pt = pt1)
459 						;
460 					if (pt)
461 						pt->lb1 = rep;
462 				}
463 			} else {			/* add new label */
464 				lab->chain = rep;
465 				lab->address = 0;
466 				if(++lab >= labend)
467 					quit("Too many labels: %S", linebuf);
468 			}
469 			break;
470 
471 		case 'n':
472 			rep->command = NCOM;
473 			break;
474 
475 		case 'N':
476 			rep->command = CNCOM;
477 			break;
478 
479 		case 'p':
480 			rep->command = PCOM;
481 			break;
482 
483 		case 'P':
484 			rep->command = CPCOM;
485 			break;
486 
487 		case 'r':
488 			rep->command = RCOM;
489 			if(rep->ad2.type != A_NONE)
490 				quit(AD1MES, linebuf);
491 			if(*cp++ != L' ')
492 				quit(CGMES, L"r", linebuf);
493 			rep->text = p;
494 			p = stext(p, addend);
495 			break;
496 
497 		case 'd':
498 			rep->command = DCOM;
499 			break;
500 
501 		case 'D':
502 			rep->command = CDCOM;
503 			rep->lb1 = pspace;
504 			break;
505 
506 		case 'q':
507 			rep->command = QCOM;
508 			if(rep->ad2.type != A_NONE)
509 				quit(AD1MES, linebuf);
510 			break;
511 
512 		case 'l':
513 			rep->command = LCOM;
514 			break;
515 
516 		case 's':
517 			rep->command = SCOM;
518 			seof = *cp++;
519 			if ((rep->re1 = compile()) == 0) {
520 				if(!lastre)
521 					quit("First RE may not be null.");
522 				rep->re1 = lastre;
523 			}
524 			rep->rhs = p;
525 			if((p = compsub(p, addend)) == 0)
526 				quit(CGMES, L"s", linebuf);
527 			if(*cp == L'g') {
528 				cp++;
529 				rep->gfl++;
530 			} else if(gflag)
531 				rep->gfl++;
532 
533 			if(*cp == L'p') {
534 				cp++;
535 				rep->pfl = 1;
536 			}
537 
538 			if(*cp == L'P') {
539 				cp++;
540 				rep->pfl = 2;
541 			}
542 
543 			if(*cp == L'w') {
544 				cp++;
545 				if(*cp++ !=  L' ')
546 					quit(CGMES, L"s", linebuf);
547 				text(fname[nfiles]);
548 				for(i = nfiles - 1; i >= 0; i--)
549 					if(cmp(fname[nfiles], fname[i]) == 0) {
550 						rep->fcode = fcode[i];
551 						goto done;
552 					}
553 				if(nfiles >= MAXFILES)
554 					quit("Too many files in w commands 1");
555 				rep->fcode = open_file(fname[nfiles]);
556 			}
557 			break;
558 
559 		case 'w':
560 			rep->command = WCOM;
561 			if(*cp++ != L' ')
562 				quit(CGMES, L"w", linebuf);
563 			text(fname[nfiles]);
564 			for(i = nfiles - 1; i >= 0; i--)
565 				if(cmp(fname[nfiles], fname[i]) == 0) {
566 					rep->fcode = fcode[i];
567 					goto done;
568 				}
569 			if(nfiles >= MAXFILES){
570 				fprint(2, "sed: Too many files in w commands 2 \n");
571 				fprint(2, "nfiles = %d; MAXF = %d\n",
572 					nfiles, MAXFILES);
573 				errexit();
574 			}
575 			rep->fcode = open_file(fname[nfiles]);
576 			break;
577 
578 		case 'x':
579 			rep->command = XCOM;
580 			break;
581 
582 		case 'y':
583 			rep->command = YCOM;
584 			seof = *cp++;
585 			if (ycomp(rep) == 0)
586 				quit(CGMES, L"y", linebuf);
587 			break;
588 
589 		}
590 done:
591 		if(++rep >= pend)
592 			quit("Too many commands, last: %S", linebuf);
593 		if(*cp++ != L'\0') {
594 			if(cp[-1] == L';')
595 				goto comploop;
596 			quit(CGMES, cp - 1, linebuf);
597 		}
598 	}
599 }
600 
601 Biobuf *
open_file(char * name)602 open_file(char *name)
603 {
604 	int fd;
605 	Biobuf *bp;
606 
607 	if ((bp = malloc(sizeof(Biobuf))) == 0)
608 		quit("Out of memory");
609 	if ((fd = open(name, OWRITE)) < 0 &&
610 	    (fd = create(name, OWRITE, 0666)) < 0)
611 		quit("Cannot create %s", name);
612 	Binit(bp, fd, OWRITE);
613 	Bseek(bp, 0, 2);
614 	fcode[nfiles++] = bp;
615 	return bp;
616 }
617 
618 Rune *
compsub(Rune * rhs,Rune * end)619 compsub(Rune *rhs, Rune *end)
620 {
621 	Rune r;
622 
623 	while ((r = *cp++) != '\0') {
624 		if(r == '\\') {
625 			if (rhs < end)
626 				*rhs++ = Runemax;
627 			else
628 				return 0;
629 			r = *cp++;
630 			if(r == 'n')
631 				r = '\n';
632 		} else {
633 			if(r == seof) {
634 				if (rhs < end)
635 					*rhs++ = '\0';
636 				else
637 					return 0;
638 				return rhs;
639 			}
640 		}
641 		if (rhs < end)
642 			*rhs++ = r;
643 		else
644 			return 0;
645 	}
646 	return 0;
647 }
648 
649 Reprog *
compile(void)650 compile(void)
651 {
652 	Rune c;
653 	char *ep;
654 	char expbuf[512];
655 
656 	if((c = *cp++) == seof)		/* L'//' */
657 		return 0;
658 	ep = expbuf;
659 	do {
660 		if (c == L'\0' || c == L'\n')
661 			quit(TMMES, linebuf);
662 		if (c == L'\\') {
663 			if (ep >= expbuf+sizeof(expbuf))
664 				quit(TMMES, linebuf);
665 			ep += runetochar(ep, &c);
666 			if ((c = *cp++) == L'n')
667 				c = L'\n';
668 		}
669 		if (ep >= expbuf + sizeof(expbuf))
670 			quit(TMMES, linebuf);
671 		ep += runetochar(ep, &c);
672 	} while ((c = *cp++) != seof);
673 	*ep = 0;
674 	return lastre = regcomp(expbuf);
675 }
676 
677 void
regerror(char * s)678 regerror(char *s)
679 {
680 	USED(s);
681 	quit(CGMES, L"r.e.-using", linebuf);
682 }
683 
684 void
newfile(enum PTYPE type,char * name)685 newfile(enum PTYPE type, char *name)
686 {
687 	if (type == P_ARG)
688 		prog.curr = name;
689 	else if ((prog.bp = Bopen(name, OREAD)) == 0)
690 		quit("Cannot open pattern-file: %s\n", name);
691 	prog.type = type;
692 }
693 
694 int
rline(Rune * buf,Rune * end)695 rline(Rune *buf, Rune *end)
696 {
697 	long c;
698 	Rune r;
699 
700 	while ((c = getrune()) >= 0) {
701 		r = c;
702 		if (r == '\\') {
703 			if (buf <= end)
704 				*buf++ = r;
705 			if ((c = getrune()) < 0)
706 				break;
707 			r = c;
708 		} else if (r == '\n') {
709 			*buf = '\0';
710 			return 1;
711 		}
712 		if (buf <= end)
713 			*buf++ = r;
714 	}
715 	*buf = '\0';
716 	return -1;
717 }
718 
719 long
getrune(void)720 getrune(void)
721 {
722 	long c;
723 	Rune r;
724 	char *p;
725 
726 	if (prog.type == P_ARG) {
727 		if ((p = prog.curr) != 0) {
728 			if (*p) {
729 				prog.curr += chartorune(&r, p);
730 				c = r;
731 			} else {
732 				c = '\n';	/* fake an end-of-line */
733 				prog.curr = 0;
734 			}
735 		} else
736 			c = -1;
737 	} else if ((c = Bgetrune(prog.bp)) < 0)
738 		Bterm(prog.bp);
739 	return c;
740 }
741 
742 void
address(Addr * ap)743 address(Addr *ap)
744 {
745 	int c;
746 	long lno;
747 
748 	if((c = *cp++) == '$')
749 		ap->type = A_DOL;
750 	else if(c == '/') {
751 		seof = c;
752 		if (ap->rp = compile())
753 			ap->type = A_RE;
754 		else
755 			ap->type = A_LAST;
756 	}
757 	else if (c >= '0' && c <= '9') {
758 		lno = c - '0';
759 		while ((c = *cp) >= '0' && c <= '9')
760 			lno = lno*10 + *cp++ - '0';
761 		if(!lno)
762 			quit("line number 0 is illegal",0);
763 		ap->type = A_LINE;
764 		ap->line = lno;
765 	}
766 	else {
767 		cp--;
768 		ap->type = A_NONE;
769 	}
770 }
771 
cmp(char * a,char * b)772 cmp(char *a, char *b)		/* compare characters */
773 {
774 	while(*a == *b++)
775 		if (*a == '\0')
776 			return 0;
777 		else
778 			a++;
779 	return 1;
780 }
rcmp(Rune * a,Rune * b)781 rcmp(Rune *a, Rune *b)		/* compare runes */
782 {
783 	while(*a == *b++)
784 		if (*a == '\0')
785 			return 0;
786 		else
787 			a++;
788 	return 1;
789 }
790 
791 char *
text(char * p)792 text(char *p)		/* extract character string */
793 {
794 	Rune r;
795 
796 	while(*cp == ' ' || *cp == '\t')
797 		cp++;
798 	while (*cp) {
799 		if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
800 			break;
801 		if (r == '\n')
802 			while (*cp == ' ' || *cp == '\t')
803 				cp++;
804 		p += runetochar(p, &r);
805 	}
806 	*p++ = '\0';
807 	return p;
808 }
809 
810 Rune *
stext(Rune * p,Rune * end)811 stext(Rune *p, Rune *end)		/* extract rune string */
812 {
813 	while(*cp == L' ' || *cp == L'\t')
814 		cp++;
815 	while (*cp) {
816 		if (*cp == L'\\' && *++cp == L'\0')
817 			break;
818 		if (p >= end-1)
819 			quit(TMMES, linebuf);
820 		if ((*p++ = *cp++) == L'\n')
821 			while(*cp == L' ' || *cp == L'\t')
822 				cp++;
823 	}
824 	*p++ = 0;
825 	return p;
826 }
827 
828 
829 Label *
search(Label * ptr)830 search(Label *ptr)
831 {
832 	Label	*rp;
833 
834 	for (rp = ltab; rp < ptr; rp++)
835 		if(rcmp(rp->uninm, ptr->uninm) == 0)
836 			return(rp);
837 	return(0);
838 }
839 
840 void
dechain(void)841 dechain(void)
842 {
843 	Label	*lptr;
844 	SedCom	*rptr, *trptr;
845 
846 	for(lptr = ltab; lptr < lab; lptr++) {
847 		if(lptr->address == 0)
848 			quit("Undefined label: %S", lptr->uninm);
849 		if(lptr->chain) {
850 			rptr = lptr->chain;
851 			while((trptr = rptr->lb1) != nil) {
852 				rptr->lb1 = lptr->address;
853 				rptr = trptr;
854 			}
855 			rptr->lb1 = lptr->address;
856 		}
857 	}
858 }
859 
860 int
ycomp(SedCom * r)861 ycomp(SedCom *r)
862 {
863 	int i;
864 	Rune *rp, *sp, *tsp;
865 	Rune c, highc;
866 
867 	highc = 0;
868 	for(tsp = cp; *tsp != seof; tsp++) {
869 		if(*tsp == L'\\')
870 			tsp++;
871 		if(*tsp == L'\n' || *tsp == L'\0')
872 			return 0;
873 		if (*tsp > highc)
874 			highc = *tsp;
875 	}
876 	tsp++;
877 	if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
878 		quit("Out of memory");
879 	*rp++ = highc;				/* save upper bound */
880 	for (i = 0; i <= highc; i++)
881 		rp[i] = i;
882 	sp = cp;
883 	while((c = *sp++) != seof) {
884 		if(c == L'\\' && *sp == L'n') {
885 			sp++;
886 			c = L'\n';
887 		}
888 		if((rp[c] = *tsp++) == L'\\' && *tsp == L'n') {
889 			rp[c] = L'\n';
890 			tsp++;
891 		}
892 		if(rp[c] == seof || rp[c] == L'\0') {
893 			free(r->re1);
894 			r->re1 = nil;
895 			return 0;
896 		}
897 	}
898 	if(*tsp != seof) {
899 		free(r->re1);
900 		r->re1 = nil;
901 		return 0;
902 	}
903 	cp = tsp+1;
904 	return 1;
905 }
906 
907 void
execute(void)908 execute(void)
909 {
910 	SedCom	*ipc;
911 
912 	while (spend = gline(linebuf)){
913 		for(ipc = pspace; ipc->command; ) {
914 			if (!executable(ipc)) {
915 				ipc++;
916 				continue;
917 			}
918 			command(ipc);
919 
920 			if(delflag)
921 				break;
922 			if(jflag) {
923 				jflag = 0;
924 				if((ipc = ipc->lb1) == 0)
925 					break;
926 			} else
927 				ipc++;
928 		}
929 		if(!nflag && !delflag)
930 			putline(&fout, linebuf, spend - linebuf);
931 		if(aptr > abuf)
932 			arout();
933 		delflag = 0;
934 	}
935 }
936 
937 /* determine if a statement should be applied to an input line */
938 int
executable(SedCom * ipc)939 executable(SedCom *ipc)
940 {
941 	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
942 		if (ipc->active == 1)		/* Second line */
943 			ipc->active = 2;
944 		switch(ipc->ad2.type) {
945 		case A_NONE:		/* No second addr; use first */
946 			ipc->active = 0;
947 			break;
948 		case A_DOL:		/* Accept everything */
949 			return !ipc->negfl;
950 		case A_LINE:		/* Line at end of range? */
951 			if (lnum <= ipc->ad2.line) {
952 				if (ipc->ad2.line == lnum)
953 					ipc->active = 0;
954 				return !ipc->negfl;
955 			}
956 			ipc->active = 0;	/* out of range */
957 			return ipc->negfl;
958 		case A_RE:		/* Check for matching R.E. */
959 			if (match(ipc->ad2.rp, linebuf))
960 				ipc->active = 0;
961 			return !ipc->negfl;
962 		default:
963 			quit("Internal error");
964 		}
965 	}
966 	switch (ipc->ad1.type) {	/* Check first address */
967 	case A_NONE:			/* Everything matches */
968 		return !ipc->negfl;
969 	case A_DOL:			/* Only last line */
970 		if (dolflag)
971 			return !ipc->negfl;
972 		break;
973 	case A_LINE:			/* Check line number */
974 		if (ipc->ad1.line == lnum) {
975 			ipc->active = 1;	/* In range */
976 			return !ipc->negfl;
977 		}
978 		break;
979 	case A_RE:			/* Check R.E. */
980 		if (match(ipc->ad1.rp, linebuf)) {
981 			ipc->active = 1;	/* In range */
982 			return !ipc->negfl;
983 		}
984 		break;
985 	default:
986 		quit("Internal error");
987 	}
988 	return ipc->negfl;
989 }
990 
991 int
match(Reprog * pattern,Rune * buf)992 match(Reprog *pattern, Rune *buf)
993 {
994 	if (!pattern)
995 		return 0;
996 	subexp[0].rsp = buf;
997 	subexp[0].ep = 0;
998 	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
999 		loc1 = subexp[0].rsp;
1000 		loc2 = subexp[0].rep;
1001 		return 1;
1002 	}
1003 	loc1 = loc2 = 0;
1004 	return 0;
1005 }
1006 
1007 int
substitute(SedCom * ipc)1008 substitute(SedCom *ipc)
1009 {
1010 	int len;
1011 
1012 	if(!match(ipc->re1, linebuf))
1013 		return 0;
1014 
1015 	/*
1016 	 * we have at least one match.  some patterns, e.g. '$' or '^', can
1017 	 * produce 0-length matches, so during a global substitute we must
1018 	 * bump to the character after a 0-length match to keep from looping.
1019 	 */
1020 	sflag = 1;
1021 	if(ipc->gfl == 0)			/* single substitution */
1022 		dosub(ipc->rhs);
1023 	else
1024 		do{				/* global substitution */
1025 			len = loc2 - loc1;	/* length of match */
1026 			dosub(ipc->rhs);	/* dosub moves loc2 */
1027 			if(*loc2 == 0)		/* end of string */
1028 				break;
1029 			if(len == 0)		/* zero-length R.E. match */
1030 				loc2++;		/* bump over 0-length match */
1031 			if(*loc2 == 0)		/* end of string */
1032 				break;
1033 		} while(match(ipc->re1, loc2));
1034 	return 1;
1035 }
1036 
1037 void
dosub(Rune * rhsbuf)1038 dosub(Rune *rhsbuf)
1039 {
1040 	int c, n;
1041 	Rune *lp, *sp, *rp;
1042 
1043 	lp = linebuf;
1044 	sp = genbuf;
1045 	rp = rhsbuf;
1046 	while (lp < loc1)
1047 		*sp++ = *lp++;
1048 	while(c = *rp++) {
1049 		if (c == '&') {
1050 			sp = place(sp, loc1, loc2);
1051 			continue;
1052 		}
1053 		if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1054 			n = c-'0';
1055 			if (subexp[n].rsp && subexp[n].rep) {
1056 				sp = place(sp, subexp[n].rsp, subexp[n].rep);
1057 				continue;
1058 			}
1059 			else {
1060 				fprint(2, "sed: Invalid back reference \\%d\n",n);
1061 				errexit();
1062 			}
1063 		}
1064 		*sp++ = c;
1065 		if (sp >= &genbuf[LBSIZE])
1066 			fprint(2, "sed: Output line too long.\n");
1067 	}
1068 	lp = loc2;
1069 	loc2 = sp - genbuf + linebuf;
1070 	while (*sp++ = *lp++)
1071 		if (sp >= &genbuf[LBSIZE])
1072 			fprint(2, "sed: Output line too long.\n");
1073 	lp = linebuf;
1074 	sp = genbuf;
1075 	while (*lp++ = *sp++)
1076 		;
1077 	spend = lp - 1;
1078 }
1079 
1080 Rune *
place(Rune * sp,Rune * l1,Rune * l2)1081 place(Rune *sp, Rune *l1, Rune *l2)
1082 {
1083 	while (l1 < l2) {
1084 		*sp++ = *l1++;
1085 		if (sp >= &genbuf[LBSIZE])
1086 			fprint(2, "sed: Output line too long.\n");
1087 	}
1088 	return sp;
1089 }
1090 
1091 char *
trans(int c)1092 trans(int c)
1093 {
1094 	static char buf[] = "\\x0000";
1095 	static char hex[] = "0123456789abcdef";
1096 
1097 	switch(c) {
1098 	case '\b':
1099 		return "\\b";
1100 	case '\n':
1101 		return "\\n";
1102 	case '\r':
1103 		return "\\r";
1104 	case '\t':
1105 		return "\\t";
1106 	case '\\':
1107 		return "\\\\";
1108 	}
1109 	buf[2] = hex[(c>>12)&0xF];
1110 	buf[3] = hex[(c>>8)&0xF];
1111 	buf[4] = hex[(c>>4)&0xF];
1112 	buf[5] = hex[c&0xF];
1113 	return buf;
1114 }
1115 
1116 void
command(SedCom * ipc)1117 command(SedCom *ipc)
1118 {
1119 	int i, c;
1120 	char *ucp;
1121 	Rune *execp, *p1, *p2, *rp;
1122 
1123 	switch(ipc->command) {
1124 	case ACOM:
1125 		*aptr++ = ipc;
1126 		if(aptr >= abuf+MAXADDS)
1127 			quit("sed: Too many appends after line %ld\n",
1128 				(char *)lnum);
1129 		*aptr = 0;
1130 		break;
1131 	case CCOM:
1132 		delflag = 1;
1133 		if(ipc->active == 1) {
1134 			for(rp = ipc->text; *rp; rp++)
1135 				Bputrune(&fout, *rp);
1136 			Bputc(&fout, '\n');
1137 		}
1138 		break;
1139 	case DCOM:
1140 		delflag++;
1141 		break;
1142 	case CDCOM:
1143 		p1 = p2 = linebuf;
1144 		while(*p1 != '\n') {
1145 			if(*p1++ == 0) {
1146 				delflag++;
1147 				return;
1148 			}
1149 		}
1150 		p1++;
1151 		while(*p2++ = *p1++)
1152 			;
1153 		spend = p2 - 1;
1154 		jflag++;
1155 		break;
1156 	case EQCOM:
1157 		Bprint(&fout, "%ld\n", lnum);
1158 		break;
1159 	case GCOM:
1160 		p1 = linebuf;
1161 		p2 = holdsp;
1162 		while(*p1++ = *p2++)
1163 			;
1164 		spend = p1 - 1;
1165 		break;
1166 	case CGCOM:
1167 		*spend++ = '\n';
1168 		p1 = spend;
1169 		p2 = holdsp;
1170 		while(*p1++ = *p2++)
1171 			if(p1 >= lbend)
1172 				break;
1173 		spend = p1 - 1;
1174 		break;
1175 	case HCOM:
1176 		p1 = holdsp;
1177 		p2 = linebuf;
1178 		while(*p1++ = *p2++);
1179 		hspend = p1 - 1;
1180 		break;
1181 	case CHCOM:
1182 		*hspend++ = '\n';
1183 		p1 = hspend;
1184 		p2 = linebuf;
1185 		while(*p1++ = *p2++)
1186 			if(p1 >= hend)
1187 				break;
1188 		hspend = p1 - 1;
1189 		break;
1190 	case ICOM:
1191 		for(rp = ipc->text; *rp; rp++)
1192 			Bputrune(&fout, *rp);
1193 		Bputc(&fout, '\n');
1194 		break;
1195 	case BCOM:
1196 		jflag = 1;
1197 		break;
1198 	case LCOM:
1199 		c = 0;
1200 		for (i = 0, rp = linebuf; *rp; rp++) {
1201 			c = *rp;
1202 			if(c >= 0x20 && c < 0x7F && c != '\\') {
1203 				Bputc(&fout, c);
1204 				if(i++ > 71) {
1205 					Bprint(&fout, "\\\n");
1206 					i = 0;
1207 				}
1208 			} else {
1209 				for (ucp = trans(*rp); *ucp; ucp++){
1210 					c = *ucp;
1211 					Bputc(&fout, c);
1212 					if(i++ > 71) {
1213 						Bprint(&fout, "\\\n");
1214 						i = 0;
1215 					}
1216 				}
1217 			}
1218 		}
1219 		if(c == ' ')
1220 			Bprint(&fout, "\\n");
1221 		Bputc(&fout, '\n');
1222 		break;
1223 	case NCOM:
1224 		if(!nflag)
1225 			putline(&fout, linebuf, spend-linebuf);
1226 
1227 		if(aptr > abuf)
1228 			arout();
1229 		if((execp = gline(linebuf)) == 0) {
1230 			delflag = 1;
1231 			break;
1232 		}
1233 		spend = execp;
1234 		break;
1235 	case CNCOM:
1236 		if(aptr > abuf)
1237 			arout();
1238 		*spend++ = '\n';
1239 		if((execp = gline(spend)) == 0) {
1240 			delflag = 1;
1241 			break;
1242 		}
1243 		spend = execp;
1244 		break;
1245 	case PCOM:
1246 		putline(&fout, linebuf, spend-linebuf);
1247 		break;
1248 	case CPCOM:
1249 cpcom:
1250 		for(rp = linebuf; *rp && *rp != '\n'; rp++)
1251 			Bputc(&fout, *rp);
1252 		Bputc(&fout, '\n');
1253 		break;
1254 	case QCOM:
1255 		if(!nflag)
1256 			putline(&fout, linebuf, spend-linebuf);
1257 		if(aptr > abuf)
1258 			arout();
1259 		exits(0);
1260 	case RCOM:
1261 		*aptr++ = ipc;
1262 		if(aptr >= &abuf[MAXADDS])
1263 			quit("sed: Too many reads after line %ld\n",
1264 				(char *)lnum);
1265 		*aptr = 0;
1266 		break;
1267 	case SCOM:
1268 		i = substitute(ipc);
1269 		if(i && ipc->pfl)
1270 			if(ipc->pfl == 1)
1271 				putline(&fout, linebuf, spend-linebuf);
1272 			else
1273 				goto cpcom;
1274 		if(i && ipc->fcode)
1275 			goto wcom;
1276 		break;
1277 
1278 	case TCOM:
1279 		if(sflag) {
1280 			sflag = 0;
1281 			jflag = 1;
1282 		}
1283 		break;
1284 
1285 	case WCOM:
1286 wcom:
1287 		putline(ipc->fcode,linebuf, spend - linebuf);
1288 		break;
1289 	case XCOM:
1290 		p1 = linebuf;
1291 		p2 = genbuf;
1292 		while(*p2++ = *p1++)
1293 			;
1294 		p1 = holdsp;
1295 		p2 = linebuf;
1296 		while(*p2++ = *p1++)
1297 			;
1298 		spend = p2 - 1;
1299 		p1 = genbuf;
1300 		p2 = holdsp;
1301 		while(*p2++ = *p1++)
1302 			;
1303 		hspend = p2 - 1;
1304 		break;
1305 	case YCOM:
1306 		p1 = linebuf;
1307 		p2 = ipc->text;
1308 		for (i = *p2++;	*p1; p1++)
1309 			if (*p1 <= i)
1310 				*p1 = p2[*p1];
1311 		break;
1312 	}
1313 }
1314 
1315 void
putline(Biobuf * bp,Rune * buf,int n)1316 putline(Biobuf *bp, Rune *buf, int n)
1317 {
1318 	while (n--)
1319 		Bputrune(bp, *buf++);
1320 	Bputc(bp, '\n');
1321 }
ecmp(Rune * a,Rune * b,int count)1322 ecmp(Rune *a, Rune *b, int count)
1323 {
1324 	while(count--)
1325 		if(*a++ != *b++)
1326 			return 0;
1327 	return 1;
1328 }
1329 
1330 void
arout(void)1331 arout(void)
1332 {
1333 	int	c;
1334 	char	*s;
1335 	char	buf[128];
1336 	Rune	*p1;
1337 	Biobuf	*fi;
1338 
1339 	for (aptr = abuf; *aptr; aptr++) {
1340 		if((*aptr)->command == ACOM) {
1341 			for(p1 = (*aptr)->text; *p1; p1++ )
1342 				Bputrune(&fout, *p1);
1343 			Bputc(&fout, '\n');
1344 		} else {
1345 			for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1346 				s += runetochar(s, p1);
1347 			*s = '\0';
1348 			if((fi = Bopen(buf, OREAD)) == 0)
1349 				continue;
1350 			while((c = Bgetc(fi)) >= 0)
1351 				Bputc(&fout, c);
1352 			Bterm(fi);
1353 		}
1354 	}
1355 	aptr = abuf;
1356 	*aptr = 0;
1357 }
1358 
1359 void
errexit(void)1360 errexit(void)
1361 {
1362 	exits("error");
1363 }
1364 
1365 void
quit(char * fmt,...)1366 quit(char *fmt, ...)
1367 {
1368 	char *p, *ep;
1369 	char msg[256];
1370 	va_list arg;
1371 
1372 	ep = msg + sizeof msg;
1373 	p = seprint(msg, ep, "sed: ");
1374 	va_start(arg, fmt);
1375 	p = vseprint(p, ep, fmt, arg);
1376 	va_end(arg);
1377 	p = seprint(p, ep, "\n");
1378 	write(2, msg, p - msg);
1379 	errexit();
1380 }
1381 
1382 Rune *
gline(Rune * addr)1383 gline(Rune *addr)
1384 {
1385 	long c;
1386 	Rune *p;
1387 	static long peekc = 0;
1388 
1389 	if (f == 0 && opendata() < 0)
1390 		return 0;
1391 	sflag = 0;
1392 	lnum++;
1393 /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
1394 	do {
1395 		p = addr;
1396 		for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1397 			if (c == '\n') {
1398 				if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1399 					dolflag = 1;
1400 				*p = '\0';
1401 				return p;
1402 			}
1403 			if (c && p < lbend)
1404 				*p++ = c;
1405 		}
1406 		/* return partial final line, adding implicit newline */
1407 		if(p != addr) {
1408 			*p = '\0';
1409 			peekc = -1;
1410 			if (fhead == 0)
1411 				dolflag = 1;
1412 			return p;
1413 		}
1414 		peekc = 0;
1415 		Bterm(f);
1416 	} while (opendata() > 0);		/* Switch to next stream */
1417 	f = 0;
1418 	return 0;
1419 }
1420 
1421 /*
1422  * Data file input section - the intent is to transparently
1423  *	catenate all data input streams.
1424  */
1425 void
enroll(char * filename)1426 enroll(char *filename)		/* Add a file to the input file cache */
1427 {
1428 	FileCache *fp;
1429 
1430 	if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1431 		quit("Out of memory");
1432 	if (ftail == nil)
1433 		fhead = fp;
1434 	else
1435 		ftail->next = fp;
1436 	ftail = fp;
1437 	fp->next = nil;
1438 	fp->name = filename;		/* 0 => stdin */
1439 }
1440 
1441 int
opendata(void)1442 opendata(void)
1443 {
1444 	if (fhead == nil)
1445 		return -1;
1446 	if (fhead->name) {
1447 		if ((f = Bopen(fhead->name, OREAD)) == nil)
1448 			quit("Can't open %s", fhead->name);
1449 	} else {
1450 		Binit(&stdin, 0, OREAD);
1451 		f = &stdin;
1452 	}
1453 	fhead = fhead->next;
1454 	return 1;
1455 }
1456