xref: /inferno-os/utils/sed/sed.c (revision 06155dbb53eb0585800b239acd6e45b946c6e0cf)
1 /*
2  * sed -- stream editor
3  */
4 #include <lib9.h>
5 #include <bio.h>
6 #include <regexp.h>
7 
8 enum {
9 	DEPTH		= 20,		/* max nesting depth of {} */
10 	MAXCMDS		= 512,		/* max sed commands */
11 	ADDSIZE		= 10000,	/* size of add & read buffer */
12 	MAXADDS		= 20,		/* max pending adds and reads */
13 	LBSIZE		= 8192,		/* input line size */
14 	LABSIZE		= 50,		/* max number of labels */
15 	MAXSUB		= 10,		/* max number of sub reg exp */
16 	MAXFILES	= 120		/* max output files */
17 };
18 
19 /*
20  * An address is a line #, a R.E., "$", a reference to the last
21  * R.E., or nothing.
22  */
23 typedef struct {
24 	enum {
25 		A_NONE,
26 		A_DOL,
27 		A_LINE,
28 		A_RE,
29 		A_LAST
30 	}type;
31 /*	union { */
32 		long	line;		/* Line # */
33 		Reprog	*rp;		/* Compiled R.E. */
34 /*	}; */
35 } Addr;
36 
37 typedef struct	SEDCOM {
38 	Addr	ad1;			/* optional start address */
39 	Addr	ad2;			/* optional end address */
40 /*	union { */
41 		Reprog	*re1;		/* compiled R.E. */
42 		Rune	*text;		/* added text or file name */
43 		struct	SEDCOM	*lb1;	/* destination command of branch */
44 /*	}; */
45 	Rune	*rhs;			/* Right-hand side of substitution */
46 	Biobuf*	fcode;			/* File ID for read and write */
47 	char	command;		/* command code -see below */
48 	char	gfl;			/* 'Global' flag for substitutions */
49 	char	pfl;			/* 'print' flag for substitutions */
50 	char	active;			/* 1 => data between start and end */
51 	char	negfl;			/* negation flag */
52 } SedCom;
53 
54 /* Command Codes for field SedCom.command */
55 #define ACOM	01
56 #define BCOM	020
57 #define CCOM	02
58 #define	CDCOM	025
59 #define	CNCOM	022
60 #define COCOM	017
61 #define	CPCOM	023
62 #define DCOM	03
63 #define ECOM	015
64 #define EQCOM	013
65 #define FCOM	016
66 #define GCOM	027
67 #define CGCOM	030
68 #define HCOM	031
69 #define CHCOM	032
70 #define ICOM	04
71 #define LCOM	05
72 #define NCOM	012
73 #define PCOM	010
74 #define QCOM	011
75 #define RCOM	06
76 #define SCOM	07
77 #define TCOM	021
78 #define WCOM	014
79 #define	CWCOM	024
80 #define	YCOM	026
81 #define XCOM	033
82 
83 typedef struct label {			/* Label symbol table */
84 	Rune	uninm[9];		/* Label name */
85 	SedCom	*chain;
86 	SedCom	*address;		/* Command associated with label */
87 } Label;
88 
89 typedef	struct	FILE_CACHE {		/* Data file control block */
90 	struct FILE_CACHE *next;	/* Forward Link */
91 	char	*name;			/* Name of file */
92 } FileCache;
93 
94 SedCom pspace[MAXCMDS];			/* Command storage */
95 SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
96 SedCom *rep = pspace;			/* Current fill point */
97 
98 Reprog	*lastre = 0;			/* Last regular expression */
99 Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
100 
101 Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
102 Rune	*addend = addspace+ADDSIZE;
103 
104 SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
105 SedCom	**aptr = abuf;
106 
107 struct {				/* Sed program input control block */
108 	enum PTYPE { 			/* Either on command line or in file */
109 		P_ARG,
110 		P_FILE
111 	} type;
112 /*	union PCTL { */			/* Pointer to data */
113 		Biobuf	*bp;
114 		char	*curr;
115 /*	}; */
116 } prog;
117 
118 Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
119 
120 FileCache	*fhead = 0;		/* Head of File Cache Chain */
121 FileCache	*ftail = 0;		/* Tail of File Cache Chain */
122 
123 Rune	*loc1;				/* Start of pattern match */
124 Rune	*loc2;				/* End of pattern match */
125 Rune	seof;				/* Pattern delimiter char */
126 
127 Rune	linebuf[LBSIZE+1];		/* Input data buffer */
128 Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
129 Rune	*spend = linebuf;		/* End of input data */
130 Rune	*cp;				/* Current scan point in linebuf */
131 
132 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
133 Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
134 Rune	*hspend = holdsp;		/* End of hold data */
135 
136 int	nflag;				/* Command line flags */
137 int	gflag;
138 
139 int	dolflag;			/* Set when at true EOF */
140 int	sflag;				/* Set when substitution done */
141 int	jflag;				/* Set when jump required */
142 int	delflag;			/* Delete current line when set */
143 
144 long	lnum = 0;			/* Input line count */
145 
146 char	fname[MAXFILES][40];		/* File name cache */
147 Biobuf	*fcode[MAXFILES];		/* File ID cache */
148 int	nfiles = 0;			/* Cache fill point */
149 
150 Biobuf	fout;				/* Output stream */
151 Biobuf	bstdin;				/* Default input */
152 Biobuf*	f = 0;				/* Input data */
153 
154 Label	ltab[LABSIZE];			/* Label name symbol table */
155 Label	*labend = ltab+LABSIZE;		/* End of label table */
156 Label	*lab = ltab+1;			/* Current Fill point */
157 
158 int	depth = 0;			/* {} stack pointer */
159 
160 Rune	bad;				/* Dummy err ptr reference */
161 Rune	*badp = &bad;
162 
163 
164 char	CGMES[]	 = 	"command garbled: %S";
165 char	TMMES[]	 = 	"Too much text: %S";
166 char	LTL[]	 = 	"Label too long: %S";
167 char	AD0MES[] =	"No addresses allowed: %S";
168 char	AD1MES[] =	"Only one address allowed: %S";
169 
170 void	address(Addr *);
171 void	arout(void);
172 int	cmp(char *, char *);
173 int	rcmp(Rune *, Rune *);
174 void	command(SedCom *);
175 Reprog	*compile(void);
176 Rune	*compsub(Rune *, Rune *);
177 void	dechain(void);
178 void	dosub(Rune *);
179 int	ecmp(Rune *, Rune *, int);
180 void	enroll(char *);
181 void	errexit(void);
182 int	executable(SedCom *);
183 void	execute(void);
184 void	fcomp(void);
185 long	getrune(void);
186 Rune	*gline(Rune *);
187 int	match(Reprog *, Rune *);
188 void	newfile(enum PTYPE, char *);
189 int 	opendata(void);
190 Biobuf	*open_file(char *);
191 Rune	*place(Rune *, Rune *, Rune *);
192 void	quit(char *, ...);
193 int	rline(Rune *, Rune *);
194 Label	*search(Label *);
195 int	substitute(SedCom *);
196 char	*text(char *);
197 Rune	*stext(Rune *, Rune *);
198 int	ycomp(SedCom *);
199 char *	trans(int c);
200 void	putline(Biobuf *bp, Rune *buf, int n);
201 
202 void
203 main(int argc, char **argv)
204 {
205 	int compfl;
206 
207 	lnum = 0;
208 	Binit(&fout, 1, OWRITE);
209 	fcode[nfiles++] = &fout;
210 	compfl = 0;
211 
212 	if(argc == 1)
213 		exits(0);
214 	ARGBEGIN{
215 	case 'e':
216 		if (argc <= 1)
217 			quit("missing pattern");
218 		newfile(P_ARG, ARGF());
219 		fcomp();
220 		compfl = 1;
221 		continue;
222 	case 'f':
223 		if(argc <= 1)
224 			quit("no pattern-file");
225 		newfile(P_FILE, ARGF());
226 		fcomp();
227 		compfl = 1;
228 		continue;
229 	case 'g':
230 		gflag++;
231 		continue;
232 	case 'n':
233 		nflag++;
234 		continue;
235 	default:
236 		fprint(2, "sed: Unknown flag: %c\n", ARGC());
237 		continue;
238 	} ARGEND
239 
240 	if(compfl == 0) {
241 		if (--argc < 0)
242 			quit("missing pattern");
243 		newfile(P_ARG, *argv++);
244 		fcomp();
245 	}
246 
247 	if(depth)
248 		quit("Too many {'s");
249 
250 	ltab[0].address = rep;
251 
252 	dechain();
253 
254 	if(argc <= 0)
255 		enroll(0);		/* Add stdin to cache */
256 	else
257 		while(--argc >= 0)
258 			enroll(*argv++);
259 	execute();
260 	exits(0);
261 }
262 
263 void
264 fcomp(void)
265 {
266 	int	i;
267 	Label	*lpt;
268 	Rune	*tp;
269 	SedCom	*pt, *pt1;
270 	static Rune	*p = addspace;
271 	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
272 
273 	while (rline(linebuf, lbend) >= 0) {
274 		cp = linebuf;
275 comploop:
276 		while(*cp == ' ' || *cp == '\t')
277 			cp++;
278 		if(*cp == '\0' || *cp == '#')
279 			continue;
280 		if(*cp == ';') {
281 			cp++;
282 			goto comploop;
283 		}
284 
285 		address(&rep->ad1);
286 		if (rep->ad1.type != A_NONE) {
287 			if (rep->ad1.type == A_LAST) {
288 				if (!lastre)
289 					quit("First RE may not be null");
290 				rep->ad1.type = A_RE;
291 				rep->ad1.rp = lastre;
292 			}
293 			if(*cp == ',' || *cp == ';') {
294 				cp++;
295 				address(&rep->ad2);
296 				if (rep->ad2.type == A_LAST) {
297 					rep->ad2.type = A_RE;
298 					rep->ad2.rp = lastre;
299 				}
300 			} else
301 				rep->ad2.type = A_NONE;
302 		}
303 		while(*cp == ' ' || *cp == '\t')
304 			cp++;
305 
306 swit:
307 		switch(*cp++) {
308 		default:
309 			quit("Unrecognized command: %S", linebuf);
310 
311 		case '!':
312 			rep->negfl = 1;
313 			goto swit;
314 
315 		case '{':
316 			rep->command = BCOM;
317 			rep->negfl = !rep->negfl;
318 			cmpend[depth++] = &rep->lb1;
319 			if(++rep >= pend)
320 				quit("Too many commands: %S", linebuf);
321 			if(*cp == '\0')
322 				continue;
323 			goto comploop;
324 
325 		case '}':
326 			if(rep->ad1.type != A_NONE)
327 				quit(AD0MES, linebuf);
328 			if(--depth < 0)
329 				quit("Too many }'s");
330 			*cmpend[depth] = rep;
331 			if(*cp == 0)
332 				continue;
333 			goto comploop;
334 
335 		case '=':
336 			rep->command = EQCOM;
337 			if(rep->ad2.type != A_NONE)
338 				quit(AD1MES, linebuf);
339 			break;
340 
341 		case ':':
342 			if(rep->ad1.type != A_NONE)
343 				quit(AD0MES, linebuf);
344 
345 			while(*cp == ' ')
346 				cp++;
347 			tp = lab->uninm;
348 			while (*cp && *cp != ';' && *cp != ' ' &&
349 			    *cp != '\t' && *cp != '#') {
350 				*tp++ = *cp++;
351 				if(tp >= &lab->uninm[8])
352 					quit(LTL, linebuf);
353 			}
354 			*tp = '\0';
355 
356 			if (*lab->uninm == '\0')		/* no label? */
357 				quit(CGMES, linebuf);
358 			if(lpt = search(lab)) {
359 				if(lpt->address)
360 					quit("Duplicate labels: %S", linebuf);
361 			} else {
362 				lab->chain = 0;
363 				lpt = lab;
364 				if(++lab >= labend)
365 					quit("Too many labels: %S", linebuf);
366 			}
367 			lpt->address = rep;
368 			if (*cp == '#')
369 				continue;
370 			rep--;			/* reuse this slot */
371 			break;
372 
373 		case 'a':
374 			rep->command = ACOM;
375 			if(rep->ad2.type != A_NONE)
376 				quit(AD1MES, linebuf);
377 			if(*cp == '\\')
378 				cp++;
379 			if(*cp++ != '\n')
380 				quit(CGMES, linebuf);
381 			rep->text = p;
382 			p = stext(p, addend);
383 			break;
384 		case 'c':
385 			rep->command = CCOM;
386 			if(*cp == '\\')
387 				cp++;
388 			if(*cp++ != '\n')
389 				quit(CGMES, linebuf);
390 			rep->text = p;
391 			p = stext(p, addend);
392 			break;
393 		case 'i':
394 			rep->command = ICOM;
395 			if(rep->ad2.type != A_NONE)
396 				quit(AD1MES, linebuf);
397 			if(*cp == '\\')
398 				cp++;
399 			if(*cp++ != '\n')
400 				quit(CGMES, linebuf);
401 			rep->text = p;
402 			p = stext(p, addend);
403 			break;
404 
405 		case 'g':
406 			rep->command = GCOM;
407 			break;
408 
409 		case 'G':
410 			rep->command = CGCOM;
411 			break;
412 
413 		case 'h':
414 			rep->command = HCOM;
415 			break;
416 
417 		case 'H':
418 			rep->command = CHCOM;
419 			break;
420 
421 		case 't':
422 			rep->command = TCOM;
423 			goto jtcommon;
424 
425 		case 'b':
426 			rep->command = BCOM;
427 jtcommon:
428 			while(*cp == ' ')
429 				cp++;
430 			if(*cp == '\0' || *cp == ';') {
431 				/* no label; jump to end */
432 				if(pt = ltab[0].chain) {
433 					while((pt1 = pt->lb1) != nil)
434 						pt = pt1;
435 					pt->lb1 = rep;
436 				} else
437 					ltab[0].chain = rep;
438 				break;
439 			}
440 
441 			/* copy label into lab->uninm */
442 			tp = lab->uninm;
443 			while((*tp = *cp++) != '\0' && *tp != ';')
444 				if(++tp >= &lab->uninm[8])
445 					quit(LTL, linebuf);
446 			cp--;
447 			*tp = '\0';
448 
449 			if (*lab->uninm == '\0')
450 				/* shouldn't get here */
451 				quit(CGMES, linebuf);
452 			if((lpt = search(lab)) != nil) {
453 				if(lpt->address)
454 					rep->lb1 = lpt->address;
455 				else {
456 					for(pt = lpt->chain; pt != nil &&
457 					    (pt1 = pt->lb1) != nil; pt = pt1)
458 						;
459 					if (pt)
460 						pt->lb1 = rep;
461 				}
462 			} else {			/* add new label */
463 				lab->chain = rep;
464 				lab->address = 0;
465 				if(++lab >= labend)
466 					quit("Too many labels: %S", linebuf);
467 			}
468 			break;
469 
470 		case 'n':
471 			rep->command = NCOM;
472 			break;
473 
474 		case 'N':
475 			rep->command = CNCOM;
476 			break;
477 
478 		case 'p':
479 			rep->command = PCOM;
480 			break;
481 
482 		case 'P':
483 			rep->command = CPCOM;
484 			break;
485 
486 		case 'r':
487 			rep->command = RCOM;
488 			if(rep->ad2.type != A_NONE)
489 				quit(AD1MES, linebuf);
490 			if(*cp++ != ' ')
491 				quit(CGMES, linebuf);
492 			rep->text = p;
493 			p = stext(p, addend);
494 			break;
495 
496 		case 'd':
497 			rep->command = DCOM;
498 			break;
499 
500 		case 'D':
501 			rep->command = CDCOM;
502 			rep->lb1 = pspace;
503 			break;
504 
505 		case 'q':
506 			rep->command = QCOM;
507 			if(rep->ad2.type != A_NONE)
508 				quit(AD1MES, linebuf);
509 			break;
510 
511 		case 'l':
512 			rep->command = LCOM;
513 			break;
514 
515 		case 's':
516 			rep->command = SCOM;
517 			seof = *cp++;
518 			if ((rep->re1 = compile()) == 0) {
519 				if(!lastre)
520 					quit("First RE may not be null.");
521 				rep->re1 = lastre;
522 			}
523 			rep->rhs = p;
524 			if((p = compsub(p, addend)) == 0)
525 				quit(CGMES, linebuf);
526 			if(*cp == 'g') {
527 				cp++;
528 				rep->gfl++;
529 			} else if(gflag)
530 				rep->gfl++;
531 
532 			if(*cp == 'p') {
533 				cp++;
534 				rep->pfl = 1;
535 			}
536 
537 			if(*cp == 'P') {
538 				cp++;
539 				rep->pfl = 2;
540 			}
541 
542 			if(*cp == 'w') {
543 				cp++;
544 				if(*cp++ !=  ' ')
545 					quit(CGMES, linebuf);
546 				text(fname[nfiles]);
547 				for(i = nfiles - 1; i >= 0; i--)
548 					if(cmp(fname[nfiles], fname[i]) == 0) {
549 						rep->fcode = fcode[i];
550 						goto done;
551 					}
552 				if(nfiles >= MAXFILES)
553 					quit("Too many files in w commands 1");
554 				rep->fcode = open_file(fname[nfiles]);
555 			}
556 			break;
557 
558 		case 'w':
559 			rep->command = WCOM;
560 			if(*cp++ != ' ')
561 				quit(CGMES, linebuf);
562 			text(fname[nfiles]);
563 			for(i = nfiles - 1; i >= 0; i--)
564 				if(cmp(fname[nfiles], fname[i]) == 0) {
565 					rep->fcode = fcode[i];
566 					goto done;
567 				}
568 			if(nfiles >= MAXFILES){
569 				fprint(2, "sed: Too many files in w commands 2 \n");
570 				fprint(2, "nfiles = %d; MAXF = %d\n",
571 					nfiles, MAXFILES);
572 				errexit();
573 			}
574 			rep->fcode = open_file(fname[nfiles]);
575 			break;
576 
577 		case 'x':
578 			rep->command = XCOM;
579 			break;
580 
581 		case 'y':
582 			rep->command = YCOM;
583 			seof = *cp++;
584 			if (ycomp(rep) == 0)
585 				quit(CGMES, linebuf);
586 			break;
587 
588 		}
589 done:
590 		if(++rep >= pend)
591 			quit("Too many commands, last: %S", linebuf);
592 		if(*cp++ != '\0') {
593 			if(cp[-1] == ';')
594 				goto comploop;
595 			quit(CGMES, linebuf);
596 		}
597 	}
598 }
599 
600 Biobuf *
601 open_file(char *name)
602 {
603 	int fd;
604 	Biobuf *bp;
605 
606 	if ((bp = malloc(sizeof(Biobuf))) == 0)
607 		quit("Out of memory");
608 	if ((fd = open(name, OWRITE)) < 0 &&
609 	    (fd = create(name, OWRITE, 0666)) < 0)
610 		quit("Cannot create %s", name);
611 	Binit(bp, fd, OWRITE);
612 	Bseek(bp, 0, 2);
613 	fcode[nfiles++] = bp;
614 	return bp;
615 }
616 
617 Rune *
618 compsub(Rune *rhs, Rune *end)
619 {
620 	Rune r;
621 
622 	while ((r = *cp++) != '\0') {
623 		if(r == '\\') {
624 			if (rhs < end)
625 				*rhs++ = 0xFFFF;
626 			else
627 				return 0;
628 			r = *cp++;
629 			if(r == 'n')
630 				r = '\n';
631 		} else {
632 			if(r == seof) {
633 				if (rhs < end)
634 					*rhs++ = '\0';
635 				else
636 					return 0;
637 				return rhs;
638 			}
639 		}
640 		if (rhs < end)
641 			*rhs++ = r;
642 		else
643 			return 0;
644 	}
645 	return 0;
646 }
647 
648 Reprog *
649 compile(void)
650 {
651 	Rune c;
652 	char *ep;
653 	char expbuf[512];
654 
655 	if((c = *cp++) == seof)		/* '//' */
656 		return 0;
657 	ep = expbuf;
658 	do {
659 		if (c == '\0' || c == '\n')
660 			quit(TMMES, linebuf);
661 		if (c == '\\') {
662 			if (ep >= expbuf+sizeof(expbuf))
663 				quit(TMMES, linebuf);
664 			ep += runetochar(ep, &c);
665 			if ((c = *cp++) == 'n')
666 				c = '\n';
667 		}
668 		if (ep >= expbuf + sizeof(expbuf))
669 			quit(TMMES, linebuf);
670 		ep += runetochar(ep, &c);
671 	} while ((c = *cp++) != seof);
672 	*ep = 0;
673 	return lastre = regcomp(expbuf);
674 }
675 
676 void
677 regerror(char *s)
678 {
679 	USED(s);
680 	quit(CGMES, linebuf);
681 }
682 
683 void
684 newfile(enum PTYPE type, char *name)
685 {
686 	if (type == P_ARG)
687 		prog.curr = name;
688 	else if ((prog.bp = Bopen(name, OREAD)) == 0)
689 		quit("Cannot open pattern-file: %s\n", name);
690 	prog.type = type;
691 }
692 
693 int
694 rline(Rune *buf, Rune *end)
695 {
696 	long c;
697 	Rune r;
698 
699 	while ((c = getrune()) >= 0) {
700 		if(c == '\r')			/* consume CRs on win95 */
701 			continue;
702 		r = c;
703 		if (r == '\\') {
704 			if (buf <= end)
705 				*buf++ = r;
706 			c = getrune();
707 			if (c == '\r')
708 				c = getrune();
709 			if (c < 0)
710 				break;
711 			r = c;
712 		} else if (r == '\n') {
713 			*buf = '\0';
714 			return 1;
715 		}
716 		if (buf <= end)
717 			*buf++ = r;
718 	}
719 	*buf = '\0';
720 	return -1;
721 }
722 
723 long
724 getrune(void)
725 {
726 	long c;
727 	Rune r;
728 	char *p;
729 
730 	if (prog.type == P_ARG) {
731 		if ((p = prog.curr) != 0) {
732 			if (*p) {
733 				prog.curr += chartorune(&r, p);
734 				c = r;
735 			} else {
736 				c = '\n';	/* fake an end-of-line */
737 				prog.curr = 0;
738 			}
739 		} else
740 			c = -1;
741 	} else if ((c = Bgetrune(prog.bp)) < 0)
742 		Bterm(prog.bp);
743 	return c;
744 }
745 
746 void
747 address(Addr *ap)
748 {
749 	int c;
750 	long lno;
751 
752 	if((c = *cp++) == '$')
753 		ap->type = A_DOL;
754 	else if(c == '/') {
755 		seof = c;
756 		if (ap->rp = compile())
757 			ap->type = A_RE;
758 		else
759 			ap->type = A_LAST;
760 	}
761 	else if (c >= '0' && c <= '9') {
762 		lno = c - '0';
763 		while ((c = *cp) >= '0' && c <= '9')
764 			lno = lno*10 + *cp++ - '0';
765 		if(!lno)
766 			quit("line number 0 is illegal",0);
767 		ap->type = A_LINE;
768 		ap->line = lno;
769 	}
770 	else {
771 		cp--;
772 		ap->type = A_NONE;
773 	}
774 }
775 
776 cmp(char *a, char *b)		/* compare characters */
777 {
778 	while(*a == *b++)
779 		if (*a == '\0')
780 			return 0;
781 		else
782 			a++;
783 	return 1;
784 }
785 rcmp(Rune *a, Rune *b)		/* compare runes */
786 {
787 	while(*a == *b++)
788 		if (*a == '\0')
789 			return 0;
790 		else
791 			a++;
792 	return 1;
793 }
794 
795 char *
796 text(char *p)		/* extract character string */
797 {
798 	Rune r;
799 
800 	while(*cp == ' ' || *cp == '\t')
801 		cp++;
802 	while (*cp) {
803 		if ((r = *cp++) == '\\' && (r = *cp++) == '\0')
804 			break;
805 		if (r == '\n')
806 			while (*cp == ' ' || *cp == '\t')
807 				cp++;
808 		p += runetochar(p, &r);
809 	}
810 	*p++ = '\0';
811 	return p;
812 }
813 
814 Rune *
815 stext(Rune *p, Rune *end)		/* extract rune string */
816 {
817 	while(*cp == ' ' || *cp == '\t')
818 		cp++;
819 	while (*cp) {
820 		if (*cp == '\\' && *++cp == '\0')
821 			break;
822 		if (p >= end-1)
823 			quit(TMMES, linebuf);
824 		if ((*p++ = *cp++) == '\n')
825 			while(*cp == ' ' || *cp == '\t')
826 				cp++;
827 	}
828 	*p++ = 0;
829 	return p;
830 }
831 
832 
833 Label *
834 search(Label *ptr)
835 {
836 	Label	*rp;
837 
838 	for (rp = ltab; rp < ptr; rp++)
839 		if(rcmp(rp->uninm, ptr->uninm) == 0)
840 			return(rp);
841 	return(0);
842 }
843 
844 void
845 dechain(void)
846 {
847 	Label	*lptr;
848 	SedCom	*rptr, *trptr;
849 
850 	for(lptr = ltab; lptr < lab; lptr++) {
851 		if(lptr->address == 0)
852 			quit("Undefined label: %S", lptr->uninm);
853 		if(lptr->chain) {
854 			rptr = lptr->chain;
855 			while((trptr = rptr->lb1) != nil) {
856 				rptr->lb1 = lptr->address;
857 				rptr = trptr;
858 			}
859 			rptr->lb1 = lptr->address;
860 		}
861 	}
862 }
863 
864 int
865 ycomp(SedCom *r)
866 {
867 	int i;
868 	Rune *rp, *sp, *tsp;
869 	Rune c, highc;
870 
871 	highc = 0;
872 	for(tsp = cp; *tsp != seof; tsp++) {
873 		if(*tsp == '\\')
874 			tsp++;
875 		if(*tsp == '\n' || *tsp == '\0')
876 			return 0;
877 		if (*tsp > highc)
878 			highc = *tsp;
879 	}
880 	tsp++;
881 	if ((rp = r->text = (Rune *)malloc(sizeof(Rune) * (highc+2))) == nil)
882 		quit("Out of memory");
883 	*rp++ = highc;				/* save upper bound */
884 	for (i = 0; i <= highc; i++)
885 		rp[i] = i;
886 	sp = cp;
887 	while((c = *sp++) != seof) {
888 		if(c == '\\' && *sp == 'n') {
889 			sp++;
890 			c = '\n';
891 		}
892 		if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
893 			rp[c] = '\n';
894 			tsp++;
895 		}
896 		if(rp[c] == seof || rp[c] == '\0') {
897 			free(r->text);
898 			r->text = nil;
899 			return 0;
900 		}
901 	}
902 	if(*tsp != seof) {
903 		free(r->text);
904 		r->text = nil;
905 		return 0;
906 	}
907 	cp = tsp+1;
908 	return 1;
909 }
910 
911 void
912 execute(void)
913 {
914 	SedCom	*ipc;
915 
916 	while (spend = gline(linebuf)){
917 		for(ipc = pspace; ipc->command; ) {
918 			if (!executable(ipc)) {
919 				ipc++;
920 				continue;
921 			}
922 			command(ipc);
923 
924 			if(delflag)
925 				break;
926 			if(jflag) {
927 				jflag = 0;
928 				if((ipc = ipc->lb1) == 0)
929 					break;
930 			} else
931 				ipc++;
932 		}
933 		if(!nflag && !delflag)
934 			putline(&fout, linebuf, spend - linebuf);
935 		if(aptr > abuf)
936 			arout();
937 		delflag = 0;
938 	}
939 }
940 
941 /* determine if a statement should be applied to an input line */
942 int
943 executable(SedCom *ipc)
944 {
945 	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
946 		if (ipc->active == 1)		/* Second line */
947 			ipc->active = 2;
948 		switch(ipc->ad2.type) {
949 		case A_NONE:		/* No second addr; use first */
950 			ipc->active = 0;
951 			break;
952 		case A_DOL:		/* Accept everything */
953 			return !ipc->negfl;
954 		case A_LINE:		/* Line at end of range? */
955 			if (lnum <= ipc->ad2.line) {
956 				if (ipc->ad2.line == lnum)
957 					ipc->active = 0;
958 				return !ipc->negfl;
959 			}
960 			ipc->active = 0;	/* out of range */
961 			return ipc->negfl;
962 		case A_RE:		/* Check for matching R.E. */
963 			if (match(ipc->ad2.rp, linebuf))
964 				ipc->active = 0;
965 			return !ipc->negfl;
966 		default:
967 			quit("Internal error");
968 		}
969 	}
970 	switch (ipc->ad1.type) {	/* Check first address */
971 	case A_NONE:			/* Everything matches */
972 		return !ipc->negfl;
973 	case A_DOL:			/* Only last line */
974 		if (dolflag)
975 			return !ipc->negfl;
976 		break;
977 	case A_LINE:			/* Check line number */
978 		if (ipc->ad1.line == lnum) {
979 			ipc->active = 1;	/* In range */
980 			return !ipc->negfl;
981 		}
982 		break;
983 	case A_RE:			/* Check R.E. */
984 		if (match(ipc->ad1.rp, linebuf)) {
985 			ipc->active = 1;	/* In range */
986 			return !ipc->negfl;
987 		}
988 		break;
989 	default:
990 		quit("Internal error");
991 	}
992 	return ipc->negfl;
993 }
994 
995 int
996 match(Reprog *pattern, Rune *buf)
997 {
998 	if (!pattern)
999 		return 0;
1000 	subexp[0].s.rsp = buf;
1001 	subexp[0].e.ep = 0;
1002 	if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
1003 		loc1 = subexp[0].s.rsp;
1004 		loc2 = subexp[0].e.rep;
1005 		return 1;
1006 	}
1007 	loc1 = loc2 = 0;
1008 	return 0;
1009 }
1010 
1011 int
1012 substitute(SedCom *ipc)
1013 {
1014 	int len;
1015 
1016 	if(!match(ipc->re1, linebuf))
1017 		return 0;
1018 
1019 	/*
1020 	 * we have at least one match.  some patterns, e.g. '$' or '^', can
1021 	 * produce 0-length matches, so during a global substitute we must
1022 	 * bump to the character after a 0-length match to keep from looping.
1023 	 */
1024 	sflag = 1;
1025 	if(ipc->gfl == 0)			/* single substitution */
1026 		dosub(ipc->rhs);
1027 	else
1028 		do{				/* global substitution */
1029 			len = loc2 - loc1;	/* length of match */
1030 			dosub(ipc->rhs);	/* dosub moves loc2 */
1031 			if(*loc2 == 0)		/* end of string */
1032 				break;
1033 			if(len == 0)		/* zero-length R.E. match */
1034 				loc2++;		/* bump over 0-length match */
1035 			if(*loc2 == 0)		/* end of string */
1036 				break;
1037 		} while(match(ipc->re1, loc2));
1038 	return 1;
1039 }
1040 
1041 void
1042 dosub(Rune *rhsbuf)
1043 {
1044 	int c, n;
1045 	Rune *lp, *sp, *rp;
1046 
1047 	lp = linebuf;
1048 	sp = genbuf;
1049 	rp = rhsbuf;
1050 	while (lp < loc1)
1051 		*sp++ = *lp++;
1052 	while(c = *rp++) {
1053 		if (c == '&') {
1054 			sp = place(sp, loc1, loc2);
1055 			continue;
1056 		}
1057 		if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB + '0') {
1058 			n = c-'0';
1059 			if (subexp[n].s.rsp && subexp[n].e.rep) {
1060 				sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1061 				continue;
1062 			}
1063 			else {
1064 				fprint(2, "sed: Invalid back reference \\%d\n",n);
1065 				errexit();
1066 			}
1067 		}
1068 		*sp++ = c;
1069 		if (sp >= &genbuf[LBSIZE])
1070 			fprint(2, "sed: Output line too long.\n");
1071 	}
1072 	lp = loc2;
1073 	loc2 = sp - genbuf + linebuf;
1074 	while (*sp++ = *lp++)
1075 		if (sp >= &genbuf[LBSIZE])
1076 			fprint(2, "sed: Output line too long.\n");
1077 	lp = linebuf;
1078 	sp = genbuf;
1079 	while (*lp++ = *sp++)
1080 		;
1081 	spend = lp - 1;
1082 }
1083 
1084 Rune *
1085 place(Rune *sp, Rune *l1, Rune *l2)
1086 {
1087 	while (l1 < l2) {
1088 		*sp++ = *l1++;
1089 		if (sp >= &genbuf[LBSIZE])
1090 			fprint(2, "sed: Output line too long.\n");
1091 	}
1092 	return sp;
1093 }
1094 
1095 char *
1096 trans(int c)
1097 {
1098 	static char buf[] = "\\x0000";
1099 	static char hex[] = "0123456789abcdef";
1100 
1101 	switch(c) {
1102 	case '\b':
1103 		return "\\b";
1104 	case '\n':
1105 		return "\\n";
1106 	case '\r':
1107 		return "\\r";
1108 	case '\t':
1109 		return "\\t";
1110 	case '\\':
1111 		return "\\\\";
1112 	}
1113 	buf[2] = hex[(c>>12)&0xF];
1114 	buf[3] = hex[(c>>8)&0xF];
1115 	buf[4] = hex[(c>>4)&0xF];
1116 	buf[5] = hex[c&0xF];
1117 	return buf;
1118 }
1119 
1120 void
1121 command(SedCom *ipc)
1122 {
1123 	int i, c;
1124 	char *ucp;
1125 	Rune *execp, *p1, *p2, *rp;
1126 
1127 	switch(ipc->command) {
1128 	case ACOM:
1129 		*aptr++ = ipc;
1130 		if(aptr >= abuf+MAXADDS)
1131 			quit("sed: Too many appends after line %ld\n",
1132 				(char *)lnum);
1133 		*aptr = 0;
1134 		break;
1135 	case CCOM:
1136 		delflag = 1;
1137 		if(ipc->active == 1) {
1138 			for(rp = ipc->text; *rp; rp++)
1139 				Bputrune(&fout, *rp);
1140 			Bputc(&fout, '\n');
1141 		}
1142 		break;
1143 	case DCOM:
1144 		delflag++;
1145 		break;
1146 	case CDCOM:
1147 		p1 = p2 = linebuf;
1148 		while(*p1 != '\n') {
1149 			if(*p1++ == 0) {
1150 				delflag++;
1151 				return;
1152 			}
1153 		}
1154 		p1++;
1155 		while(*p2++ = *p1++)
1156 			;
1157 		spend = p2 - 1;
1158 		jflag++;
1159 		break;
1160 	case EQCOM:
1161 		Bprint(&fout, "%ld\n", lnum);
1162 		break;
1163 	case GCOM:
1164 		p1 = linebuf;
1165 		p2 = holdsp;
1166 		while(*p1++ = *p2++)
1167 			;
1168 		spend = p1 - 1;
1169 		break;
1170 	case CGCOM:
1171 		*spend++ = '\n';
1172 		p1 = spend;
1173 		p2 = holdsp;
1174 		while(*p1++ = *p2++)
1175 			if(p1 >= lbend)
1176 				break;
1177 		spend = p1 - 1;
1178 		break;
1179 	case HCOM:
1180 		p1 = holdsp;
1181 		p2 = linebuf;
1182 		while(*p1++ = *p2++);
1183 		hspend = p1 - 1;
1184 		break;
1185 	case CHCOM:
1186 		*hspend++ = '\n';
1187 		p1 = hspend;
1188 		p2 = linebuf;
1189 		while(*p1++ = *p2++)
1190 			if(p1 >= hend)
1191 				break;
1192 		hspend = p1 - 1;
1193 		break;
1194 	case ICOM:
1195 		for(rp = ipc->text; *rp; rp++)
1196 			Bputrune(&fout, *rp);
1197 		Bputc(&fout, '\n');
1198 		break;
1199 	case BCOM:
1200 		jflag = 1;
1201 		break;
1202 	case LCOM:
1203 		c = 0;
1204 		for (i = 0, rp = linebuf; *rp; rp++) {
1205 			c = *rp;
1206 			if(c >= 0x20 && c < 0x7F && c != '\\') {
1207 				Bputc(&fout, c);
1208 				if(i++ > 71) {
1209 					Bprint(&fout, "\\\n");
1210 					i = 0;
1211 				}
1212 			} else {
1213 				for (ucp = trans(*rp); *ucp; ucp++){
1214 					c = *ucp;
1215 					Bputc(&fout, c);
1216 					if(i++ > 71) {
1217 						Bprint(&fout, "\\\n");
1218 						i = 0;
1219 					}
1220 				}
1221 			}
1222 		}
1223 		if(c == ' ')
1224 			Bprint(&fout, "\\n");
1225 		Bputc(&fout, '\n');
1226 		break;
1227 	case NCOM:
1228 		if(!nflag)
1229 			putline(&fout, linebuf, spend-linebuf);
1230 
1231 		if(aptr > abuf)
1232 			arout();
1233 		if((execp = gline(linebuf)) == 0) {
1234 			delflag = 1;
1235 			break;
1236 		}
1237 		spend = execp;
1238 		break;
1239 	case CNCOM:
1240 		if(aptr > abuf)
1241 			arout();
1242 		*spend++ = '\n';
1243 		if((execp = gline(spend)) == 0) {
1244 			delflag = 1;
1245 			break;
1246 		}
1247 		spend = execp;
1248 		break;
1249 	case PCOM:
1250 		putline(&fout, linebuf, spend-linebuf);
1251 		break;
1252 	case CPCOM:
1253 cpcom:
1254 		for(rp = linebuf; *rp && *rp != '\n'; rp++)
1255 			Bputc(&fout, *rp);
1256 		Bputc(&fout, '\n');
1257 		break;
1258 	case QCOM:
1259 		if(!nflag)
1260 			putline(&fout, linebuf, spend-linebuf);
1261 		if(aptr > abuf)
1262 			arout();
1263 		exits(0);
1264 	case RCOM:
1265 		*aptr++ = ipc;
1266 		if(aptr >= &abuf[MAXADDS])
1267 			quit("sed: Too many reads after line %ld\n",
1268 				(char *)lnum);
1269 		*aptr = 0;
1270 		break;
1271 	case SCOM:
1272 		i = substitute(ipc);
1273 		if(i && ipc->pfl)
1274 			if(ipc->pfl == 1)
1275 				putline(&fout, linebuf, spend-linebuf);
1276 			else
1277 				goto cpcom;
1278 		if(i && ipc->fcode)
1279 			goto wcom;
1280 		break;
1281 
1282 	case TCOM:
1283 		if(sflag) {
1284 			sflag = 0;
1285 			jflag = 1;
1286 		}
1287 		break;
1288 
1289 	case WCOM:
1290 wcom:
1291 		putline(ipc->fcode,linebuf, spend - linebuf);
1292 		break;
1293 	case XCOM:
1294 		p1 = linebuf;
1295 		p2 = genbuf;
1296 		while(*p2++ = *p1++)
1297 			;
1298 		p1 = holdsp;
1299 		p2 = linebuf;
1300 		while(*p2++ = *p1++)
1301 			;
1302 		spend = p2 - 1;
1303 		p1 = genbuf;
1304 		p2 = holdsp;
1305 		while(*p2++ = *p1++)
1306 			;
1307 		hspend = p2 - 1;
1308 		break;
1309 	case YCOM:
1310 		p1 = linebuf;
1311 		p2 = ipc->text;
1312 		for (i = *p2++;	*p1; p1++)
1313 			if (*p1 <= i)
1314 				*p1 = p2[*p1];
1315 		break;
1316 	}
1317 }
1318 
1319 void
1320 putline(Biobuf *bp, Rune *buf, int n)
1321 {
1322 	while (n--)
1323 		Bputrune(bp, *buf++);
1324 	Bputc(bp, '\n');
1325 }
1326 ecmp(Rune *a, Rune *b, int count)
1327 {
1328 	while(count--)
1329 		if(*a++ != *b++)
1330 			return 0;
1331 	return 1;
1332 }
1333 
1334 void
1335 arout(void)
1336 {
1337 	int	c;
1338 	char	*s;
1339 	char	buf[128];
1340 	Rune	*p1;
1341 	Biobuf	*fi;
1342 
1343 	for (aptr = abuf; *aptr; aptr++) {
1344 		if((*aptr)->command == ACOM) {
1345 			for(p1 = (*aptr)->text; *p1; p1++ )
1346 				Bputrune(&fout, *p1);
1347 			Bputc(&fout, '\n');
1348 		} else {
1349 			for(s = buf, p1 = (*aptr)->text; *p1; p1++)
1350 				s += runetochar(s, p1);
1351 			*s = '\0';
1352 			if((fi = Bopen(buf, OREAD)) == 0)
1353 				continue;
1354 			while((c = Bgetc(fi)) >= 0)
1355 				Bputc(&fout, c);
1356 			Bterm(fi);
1357 		}
1358 	}
1359 	aptr = abuf;
1360 	*aptr = 0;
1361 }
1362 
1363 void
1364 errexit(void)
1365 {
1366 	exits("error");
1367 }
1368 
1369 void
1370 quit(char *fmt, ...)
1371 {
1372 	char *p, *ep;
1373 	char msg[256];
1374 	va_list arg;
1375 
1376 	ep = msg + sizeof msg;
1377 	p = seprint(msg, ep, "sed: ");
1378 	va_start(arg, fmt);
1379 	p = vseprint(p, ep, fmt, arg);
1380 	va_end(arg);
1381 	p = seprint(p, ep, "\n");
1382 	write(2, msg, p - msg);
1383 	errexit();
1384 }
1385 
1386 Rune *
1387 gline(Rune *addr)
1388 {
1389 	long c;
1390 	Rune *p;
1391 	static long peekc = 0;
1392 
1393 	if (f == 0 && opendata() < 0)
1394 		return 0;
1395 	sflag = 0;
1396 	lnum++;
1397 /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
1398 	do {
1399 		p = addr;
1400 		for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1401 			if (c == '\n') {
1402 				if ((peekc = Bgetrune(f)) < 0 && fhead == 0)
1403 					dolflag = 1;
1404 				*p = '\0';
1405 				return p;
1406 			}
1407 			if (c && p < lbend)
1408 				*p++ = c;
1409 		}
1410 		/* return partial final line, adding implicit newline */
1411 		if(p != addr) {
1412 			*p = '\0';
1413 			peekc = -1;
1414 			if (fhead == 0)
1415 				dolflag = 1;
1416 			return p;
1417 		}
1418 		peekc = 0;
1419 		Bterm(f);
1420 	} while (opendata() > 0);		/* Switch to next stream */
1421 	f = 0;
1422 	return 0;
1423 }
1424 
1425 /*
1426  * Data file input section - the intent is to transparently
1427  *	catenate all data input streams.
1428  */
1429 void
1430 enroll(char *filename)		/* Add a file to the input file cache */
1431 {
1432 	FileCache *fp;
1433 
1434 	if ((fp = (FileCache *)malloc(sizeof (FileCache))) == nil)
1435 		quit("Out of memory");
1436 	if (ftail == nil)
1437 		fhead = fp;
1438 	else
1439 		ftail->next = fp;
1440 	ftail = fp;
1441 	fp->next = nil;
1442 	fp->name = filename;		/* 0 => stdin */
1443 }
1444 
1445 int
1446 opendata(void)
1447 {
1448 	if (fhead == nil)
1449 		return -1;
1450 	if (fhead->name) {
1451 		if ((f = Bopen(fhead->name, OREAD)) == nil)
1452 			quit("Can't open %s", fhead->name);
1453 	} else {
1454 		Binit(&bstdin, 0, OREAD);
1455 		f = &bstdin;
1456 	}
1457 	fhead = fhead->next;
1458 	return 1;
1459 }
1460