xref: /plan9-contrib/sys/src/cmd/sed.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 /*
2  * sed -- stream  editor
3  *
4  *
5  */
6 #include <u.h>
7 #include <libc.h>
8 #include <bio.h>
9 #include <regexp.h>
10 
11 enum {
12 	DEPTH		= 20,		/* max nesting depth of {} */
13 	MAXCMDS		= 512,		/* max sed commands */
14 	ADDSIZE		= 10000,	/* size of add & read buffer */
15 	MAXADDS		= 20,		/* max pending adds and reads */
16 	LBSIZE		= 8192,		/* input line size */
17 	LABSIZE		= 50,		/* max label name size */
18 	MAXSUB		= 10,		/* max number of sub reg exp */
19 	MAXFILES	= 120,		/* max output files */
20 };
21 	/* An address is a line #, a R.E., "$", a reference to the last
22 	 * R.E., or nothing.
23 	 */
24 typedef struct	{
25 	enum {
26 		A_NONE,
27 		A_DOL,
28 		A_LINE,
29 		A_RE,
30 		A_LAST,
31 	}type;
32 	union {
33 		long line;		/* Line # */
34 		Reprog *rp;		/* Compiled R.E. */
35 	};
36 } Addr;
37 
38 typedef struct	SEDCOM {
39 	Addr	ad1;			/* optional start address */
40 	Addr	ad2;			/* optional end address */
41 	union {
42 		Reprog	*re1;		/* compiled R.E. */
43 		Rune	*text;		/* added text or file name */
44 		struct	SEDCOM	*lb1;	/* destination command of branch */
45 	};
46 	Rune	*rhs;			/* Right-hand side of substitution */
47 	Biobuf*	fcode;			/* File ID for read and write */
48 	char	command;		/* command code -see below */
49 	char	gfl;			/* 'Global' flag for substitutions */
50 	char	pfl;			/* 'print' flag for substitutions */
51 	char	active;			/* 1 => data between start and end */
52 	char	negfl;			/* negation flag */
53 } SedCom;
54 
55 	/* Command Codes for field SedCom.command */
56 #define ACOM	01
57 #define BCOM	020
58 #define CCOM	02
59 #define	CDCOM	025
60 #define	CNCOM	022
61 #define COCOM	017
62 #define	CPCOM	023
63 #define DCOM	03
64 #define ECOM	015
65 #define EQCOM	013
66 #define FCOM	016
67 #define GCOM	027
68 #define CGCOM	030
69 #define HCOM	031
70 #define CHCOM	032
71 #define ICOM	04
72 #define LCOM	05
73 #define NCOM	012
74 #define PCOM	010
75 #define QCOM	011
76 #define RCOM	06
77 #define SCOM	07
78 #define TCOM	021
79 #define WCOM	014
80 #define	CWCOM	024
81 #define	YCOM	026
82 #define XCOM	033
83 
84 
85 typedef struct label {			/* Label symbol table */
86 	Rune	asc[9];			/* Label name */
87 	SedCom	*chain;
88 	SedCom	*address;		/* Command associated with label */
89 } Label;
90 
91 typedef	struct	FILE_CACHE {		/* Data file control block */
92 	struct FILE_CACHE *next;	/* Forward Link */
93 	char 		  *name;	/* Name of file */
94 } FileCache;
95 
96 SedCom pspace[MAXCMDS];			/* Command storage */
97 SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
98 SedCom *rep = pspace;			/* Current fill point */
99 
100 Reprog	*lastre = 0;			/* Last regular expression */
101 Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
102 
103 Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
104 Rune	*addend = addspace+ADDSIZE;
105 
106 SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
107 SedCom	**aptr = abuf;
108 
109 struct {				/* Sed program input control block */
110 	enum PTYPE 			/* Either on command line or in file */
111 		{ P_ARG,
112 		  P_FILE
113 		} type;
114 	union PCTL {			/* Pointer to data */
115 		Biobuf	*bp;
116 		char	*curr;
117 	};
118 } prog;
119 
120 Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
121 
122 FileCache	*fhead = 0;		/* Head of File Cache Chain */
123 FileCache	*ftail = 0;		/* Tail of File Cache Chain */
124 
125 Rune	*loc1;				/* Start of pattern match */
126 Rune	*loc2;				/* End of pattern match */
127 Rune	seof;				/* Pattern delimiter char */
128 
129 Rune	linebuf[LBSIZE+1];		/* Input data buffer */
130 Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
131 Rune	*spend = linebuf;			/* End of input data */
132 Rune	*cp;				/* Current scan point in linebuf */
133 
134 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
135 Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
136 Rune	*hspend = holdsp;		/* End of hold data */
137 
138 int	nflag;				/* Command line flags */
139 int	gflag;
140 
141 int	dolflag;			/* Set when at true EOF */
142 int	sflag;				/* Set when substitution done */
143 int	jflag;				/* Set when jump required */
144 int	delflag;			/* Delete current line when set */
145 
146 long	lnum = 0;			/* Input line count */
147 
148 char	fname[MAXFILES][40];		/* File name cache */
149 Biobuf	*fcode[MAXFILES];		/* File ID cache */
150 int	nfiles = 0;			/* Cache fill point */
151 
152 Biobuf	fout;				/* Output stream */
153 Biobuf	stdin;				/* Default input */
154 Biobuf*	f = 0;				/* Input data */
155 
156 Label	ltab[LABSIZE];			/* Label name symbol table */
157 Label	*labend = ltab+LABSIZE;		/* End of label table */
158 Label	*lab = ltab+1;			/* Current Fill point */
159 
160 int	depth = 0;			/* {} stack pointer */
161 
162 Rune	bad;				/* Dummy err ptr reference */
163 Rune	*badp = &bad;
164 
165 
166 char	CGMES[]	 = 	"Command garbled: %S";
167 char	TMMES[]	 = 	"Too much text: %S";
168 char	LTL[]	 = 	"Label too long: %S";
169 char	AD0MES[] =	"No addresses allowed: %S";
170 char	AD1MES[] =	"Only one address allowed: %S";
171 
172 void	address(Addr *);
173 void	arout(void);
174 int	cmp(char *, char *);
175 int	rcmp(Rune *, Rune *);
176 void	command(SedCom *);
177 Reprog	*compile(void);
178 Rune	*compsub(Rune *, Rune *);
179 void	dechain(void);
180 void	dosub(Rune *);
181 int	ecmp(Rune *, Rune *, int);
182 void	enroll(char *);
183 void	errexit(void);
184 int	executable(SedCom *);
185 void	execute(void);
186 void	fcomp(void);
187 long	getrune(void);
188 Rune	*gline(Rune *);
189 int	match(Reprog *, Rune *);
190 void	newfile(enum PTYPE, char *);
191 int 	opendata(void);
192 Biobuf	*open_file(char *);
193 Rune	*place(Rune *, Rune *, Rune *);
194 void	quit(char *, char *);
195 int	rline(Rune *, Rune *);
196 Label	*search(Label *);
197 int	substitute(SedCom *);
198 char	*text(char *);
199 Rune	*stext(Rune *, Rune *);
200 int	ycomp(SedCom *);
201 char *	trans(int c);
202 void	putline(Biobuf *bp, Rune *buf, int n);
203 
204 void
205 main(int argc, char **argv)
206 {
207 	int	compfl;
208 
209 	lnum = 0;
210 	Binit(&fout, 1, OWRITE);
211 	fcode[nfiles++] = &fout;
212 	compfl = 0;
213 
214 	if(argc == 1)
215 		exits(0);
216 	ARGBEGIN{
217 		case 'n':
218 			nflag++;
219 			continue;
220 		case 'f':
221 			if(argc <= 1)
222 				quit("no pattern-file", 0);
223 			newfile(P_FILE, ARGF());
224 			fcomp();
225 			compfl = 1;
226 			continue;
227 		case 'e':
228 			if (argc <= 1)
229 				quit("missing pattern", 0);
230 			newfile(P_ARG, ARGF());
231 			fcomp();
232 			compfl = 1;
233 			continue;
234 		case 'g':
235 			gflag++;
236 			continue;
237 		default:
238 			fprint(2, "sed: Unknown flag: %c\n", ARGC());
239 			continue;
240 	} ARGEND
241 
242 	if(compfl == 0) {
243 		if (--argc < 0)
244 			quit("missing pattern", 0);
245 		newfile(P_ARG, *argv++);
246 		fcomp();
247 	}
248 
249 	if(depth)
250 		quit("Too many {'s", 0);
251 
252 	ltab[0].address = rep;
253 
254 	dechain();
255 
256 	if(argc <= 0)
257 		enroll(0);		/* Add stdin to cache */
258 	else while(--argc >= 0) {
259 		enroll(*argv++);
260 	}
261 	execute();
262 	exits(0);
263 }
264 void
265 fcomp(void)
266 {
267 	Rune	*tp;
268 	SedCom	*pt, *pt1;
269 	int	i;
270 	Label	*lpt;
271 
272 	static Rune	*p = addspace;
273 	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
274 
275 	while (rline(linebuf, lbend) >= 0) {
276 		cp = linebuf;
277 comploop:
278 		while(*cp == ' ' || *cp == '\t')
279 			cp++;
280 		if(*cp == '\0' || *cp == '#')
281 			continue;
282 		if(*cp == ';') {
283 			cp++;
284 			goto comploop;
285 		}
286 
287 		address(&rep->ad1);
288 		if (rep->ad1.type != A_NONE) {
289 			if (rep->ad1.type == A_LAST) {
290 				if (!lastre)
291 					quit("First RE may not be null", 0);
292 				rep->ad1.type = A_RE;
293 				rep->ad1.rp = lastre;
294 			}
295 			if(*cp == ',' || *cp == ';') {
296 				cp++;
297 				address(&rep->ad2);
298 				if (rep->ad2.type == A_LAST) {
299 					rep->ad1.type = A_RE;
300 					rep->ad2.rp = lastre;
301 				}
302 			} else
303 				rep->ad2.type = A_NONE;
304 		}
305 		while(*cp == ' ' || *cp == '\t')
306 			cp++;
307 
308 swit:
309 		switch(*cp++) {
310 
311 			default:
312 				quit("Unrecognized command: %S", (char *)linebuf);
313 
314 			case '!':
315 				rep->negfl = 1;
316 				goto swit;
317 
318 			case '{':
319 				rep->command = BCOM;
320 				rep->negfl = !(rep->negfl);
321 				cmpend[depth++] = &rep->lb1;
322 				if(++rep >= pend)
323 					quit("Too many commands: %S", (char *) linebuf);
324 				if(*cp == '\0')	continue;
325 				goto comploop;
326 
327 			case '}':
328 				if(rep->ad1.type != A_NONE)
329 					quit(AD0MES, (char *) linebuf);
330 				if(--depth < 0)
331 					quit("Too many }'s", 0);
332 				*cmpend[depth] = rep;
333 				if(*cp == 0)	continue;
334 				goto comploop;
335 
336 			case '=':
337 				rep->command = EQCOM;
338 				if(rep->ad2.type != A_NONE)
339 					quit(AD1MES, (char *) linebuf);
340 				break;
341 
342 			case ':':
343 				if(rep->ad1.type != A_NONE)
344 					quit(AD0MES, (char *) linebuf);
345 
346 				while(*cp == ' ')
347 					cp++;
348 				tp = lab->asc;
349 				while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
350 					*tp++ = *cp++;
351 					if(tp >= &(lab->asc[8]))
352 						quit(LTL, (char *) linebuf);
353 				}
354 				*tp = '\0';
355 
356 				if(lpt = search(lab)) {
357 					if(lpt->address)
358 						quit("Duplicate labels: %S", (char *) linebuf);
359 				} else {
360 					lab->chain = 0;
361 					lpt = lab;
362 					if(++lab >= labend)
363 						quit("Too many labels: %S", (char *) linebuf);
364 				}
365 				lpt->address = rep;
366 				if (*cp == '#')
367 					continue;
368 				rep--;			/* reuse this slot */
369 				break;
370 
371 			case 'a':
372 				rep->command = ACOM;
373 				if(rep->ad2.type != A_NONE)
374 					quit(AD1MES, (char *) linebuf);
375 				if(*cp == '\\')	cp++;
376 				if(*cp++ != '\n')
377 					quit(CGMES, (char *) linebuf);
378 				rep->text = p;
379 				p = stext(p, addend);
380 				break;
381 			case 'c':
382 				rep->command = CCOM;
383 				if(*cp == '\\')	cp++;
384 				if(*cp++ != '\n')
385 					quit(CGMES, (char *) linebuf);
386 				rep->text = p;
387 				p = stext(p, addend);
388 				break;
389 			case 'i':
390 				rep->command = ICOM;
391 				if(rep->ad2.type != A_NONE)
392 					quit(AD1MES, (char *) linebuf);
393 				if(*cp == '\\')	cp++;
394 				if(*cp++ != '\n')
395 					quit(CGMES, (char *) linebuf);
396 				rep->text = p;
397 				p = stext(p, addend);
398 				break;
399 
400 			case 'g':
401 				rep->command = GCOM;
402 				break;
403 
404 			case 'G':
405 				rep->command = CGCOM;
406 				break;
407 
408 			case 'h':
409 				rep->command = HCOM;
410 				break;
411 
412 			case 'H':
413 				rep->command = CHCOM;
414 				break;
415 
416 			case 't':
417 				rep->command = TCOM;
418 				goto jtcommon;
419 
420 			case 'b':
421 				rep->command = BCOM;
422 jtcommon:
423 				while(*cp == ' ')cp++;
424 				if(*cp == '\0') {
425 					if(pt = ltab[0].chain) {
426 						while(pt1 = pt->lb1)
427 							pt = pt1;
428 						pt->lb1 = rep;
429 					} else
430 						ltab[0].chain = rep;
431 					break;
432 				}
433 				tp = lab->asc;
434 				while((*tp++ = *cp++))
435 					if(tp >= &(lab->asc[8]))
436 						quit(LTL, (char *) linebuf);
437 				cp--;
438 				tp[-1] = '\0';
439 
440 				if(lpt = search(lab)) {
441 					if(lpt->address) {
442 						rep->lb1 = lpt->address;
443 					} else {
444 						pt = lpt->chain;
445 						while(pt1 = pt->lb1)
446 							pt = pt1;
447 						pt->lb1 = rep;
448 					}
449 				} else {
450 					lab->chain = rep;
451 					lab->address = 0;
452 					if(++lab >= labend)
453 						quit("Too many labels: %S",
454 							(char *) linebuf);
455 				}
456 				break;
457 
458 			case 'n':
459 				rep->command = NCOM;
460 				break;
461 
462 			case 'N':
463 				rep->command = CNCOM;
464 				break;
465 
466 			case 'p':
467 				rep->command = PCOM;
468 				break;
469 
470 			case 'P':
471 				rep->command = CPCOM;
472 				break;
473 
474 			case 'r':
475 				rep->command = RCOM;
476 				if(rep->ad2.type != A_NONE)
477 					quit(AD1MES, (char *) linebuf);
478 				if(*cp++ != ' ')
479 					quit(CGMES, (char *) linebuf);
480 				rep->text = p;
481 				p = stext(p, addend);
482 				break;
483 
484 			case 'd':
485 				rep->command = DCOM;
486 				break;
487 
488 			case 'D':
489 				rep->command = CDCOM;
490 				rep->lb1 = pspace;
491 				break;
492 
493 			case 'q':
494 				rep->command = QCOM;
495 				if(rep->ad2.type != A_NONE)
496 					quit(AD1MES, (char *) linebuf);
497 				break;
498 
499 			case 'l':
500 				rep->command = LCOM;
501 				break;
502 
503 			case 's':
504 				rep->command = SCOM;
505 				seof = *cp++;
506 				if ((rep->re1 = compile()) == 0) {
507 					if(!lastre)
508 						quit("First RE may not be null.", 0);
509 					rep->re1 = lastre;
510 				}
511 				rep->rhs = p;
512 				if((p = compsub(p, addend)) == 0)
513 					quit(CGMES, (char *) linebuf);
514 				if(*cp == 'g') {
515 					cp++;
516 					rep->gfl++;
517 				} else if(gflag)
518 					rep->gfl++;
519 
520 				if(*cp == 'p') {
521 					cp++;
522 					rep->pfl = 1;
523 				}
524 
525 				if(*cp == 'P') {
526 					cp++;
527 					rep->pfl = 2;
528 				}
529 
530 				if(*cp == 'w') {
531 					cp++;
532 					if(*cp++ !=  ' ')
533 						quit(CGMES, (char *) linebuf);
534 					text(fname[nfiles]);
535 					for(i = nfiles - 1; i >= 0; i--)
536 						if(cmp(fname[nfiles],fname[i]) == 0) {
537 							rep->fcode = fcode[i];
538 							goto done;
539 						}
540 					if(nfiles >= MAXFILES)
541 						quit("Too many files in w commands 1", 0);
542 					rep->fcode = open_file(fname[nfiles]);
543 				}
544 				break;
545 
546 			case 'w':
547 				rep->command = WCOM;
548 				if(*cp++ != ' ')
549 					quit(CGMES, (char *) linebuf);
550 				text(fname[nfiles]);
551 				for(i = nfiles - 1; i >= 0; i--)
552 					if(cmp(fname[nfiles], fname[i]) == 0) {
553 						rep->fcode = fcode[i];
554 						goto done;
555 					}
556 				if(nfiles >= MAXFILES){
557 					fprint(2, "sed: Too many files in w commands 2 \n");
558 					fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
559 					errexit();
560 				}
561 				rep->fcode = open_file(fname[nfiles]);
562 				break;
563 
564 			case 'x':
565 				rep->command = XCOM;
566 				break;
567 
568 			case 'y':
569 				rep->command = YCOM;
570 				seof = *cp++;
571 				if (ycomp(rep) == 0)
572 					quit(CGMES, (char *) linebuf);
573 				break;
574 
575 		}
576 done:
577 		if(++rep >= pend)
578 			quit("Too many commands, last: %S", (char *) linebuf);
579 
580 		if(*cp++ != '\0') {
581 			if(cp[-1] == ';')
582 				goto comploop;
583 			quit(CGMES, (char *) linebuf);
584 		}
585 
586 	}
587 }
588 
589 Biobuf *
590 open_file(char *name)
591 {
592 	Biobuf *bp;
593 	int fd;
594 
595 	if ((bp = malloc(sizeof(Biobuf))) == 0)
596 		quit("Out of memory", 0);
597 	if ((fd = open(name, OWRITE)) < 0 &&
598 		(fd = create(name, OWRITE, 0666)) < 0)
599 			quit("Cannot create %s", name);
600 	Binit(bp, fd, OWRITE);
601 	Bseek(bp, 0, 2);
602 	fcode[nfiles++] = bp;
603 	return bp;
604 }
605 
606 Rune	*
607 compsub(Rune *rhs, Rune *end)
608 {
609 	Rune	r;
610 
611 	while ((r = *cp++) != '\0') {
612 		if(r == '\\') {
613 			if (rhs < end)
614 				*rhs++ = 0xFFFF;
615 			else
616 				return 0;
617 			r = *cp++;
618 			if(r == 'n')
619 				r = '\n';
620 		} else {
621 			if(r == seof) {
622 				if (rhs < end)
623 					*rhs++ = '\0';
624 				else
625 					return 0;
626 				return rhs;
627 			}
628 		}
629 		if (rhs < end)
630 			*rhs++ = r;
631 		else
632 			return 0;
633 
634 	}
635 	return 0;
636 }
637 
638 Reprog *
639 compile(void)
640 {
641 	Rune c;
642 	char *ep;
643 	char expbuf[512];
644 
645 	if((c = *cp++) == seof)		/* '//' */
646 		return 0;
647 	ep = expbuf;
648 	do {
649 		if (c == 0 || c == '\n')
650 			quit(TMMES, (char *) linebuf);
651 		if (c == '\\') {
652 			if (ep >= expbuf+sizeof(expbuf))
653 				quit(TMMES, (char *) linebuf);
654 			ep += runetochar(ep, &c);
655 			if ((c = *cp++) == 'n')
656 				c = '\n';
657 		}
658 		if (ep >= expbuf+sizeof(expbuf))
659 			quit(TMMES, (char *) linebuf);
660 		ep += runetochar(ep, &c);
661 	} while ((c = *cp++) != seof);
662 	*ep = 0;
663 	return lastre = regcomp(expbuf);
664 }
665 
666 void
667 regerror(char *s)
668 {
669 	USED(s);
670 	quit(CGMES, (char *) linebuf);
671 }
672 
673 void
674 newfile(enum PTYPE type, char *name)
675 {
676 	if (type == P_ARG)
677 		prog.curr = name;
678 	else if ((prog.bp = Bopen(name, OREAD)) == 0)
679 		quit("Cannot open pattern-file: %s\n", name);
680 	prog.type = type;
681 }
682 
683 int
684 rline(Rune *buf, Rune *end)
685 {
686 	long c;
687 	Rune r;
688 
689 	while ((c = getrune()) >= 0) {
690 		r = c;
691 		if (r == '\\') {
692 			if (buf <= end)
693 				*buf++ = r;
694 			if ((c = getrune()) < 0)
695 				break;
696 			r = c;
697 		} else if (r == '\n') {
698 			*buf = '\0';
699 			return(1);
700 		}
701 		if (buf <= end)
702 			*buf++ = r;
703 	}
704 	*buf = '\0';
705 	return(-1);
706 }
707 
708 long
709 getrune(void)
710 {
711 	char *p;
712 	long c;
713 	Rune r;
714 
715 	if (prog.type == P_ARG) {
716 		if ((p = prog.curr) != 0) {
717 			if (*p) {
718 				prog.curr += chartorune(&r, p);
719 				c = r;
720 			} else {
721 				c = '\n';	/* fake an end-of-line */
722 				prog.curr = 0;
723 			}
724 		} else
725 			c = -1;
726 	} else if ((c = Bgetrune(prog.bp)) < 0)
727 			Bterm(prog.bp);
728 	return c;
729 }
730 
731 void
732 address(Addr *ap)
733 {
734 	int c;
735 	long	lno;
736 
737 	if((c = *cp++) == '$')
738 		ap->type = A_DOL;
739 	else if(c == '/') {
740 		seof = c;
741 		if (ap->rp = compile())
742 			ap->type = A_RE;
743 		else
744 			ap->type = A_LAST;
745 	}
746 	else if (c >= '0' && c <= '9') {
747 		lno = c-'0';
748 		while ((c = *cp) >= '0' && c <= '9')
749 			lno = lno*10 + *cp++-'0';
750 		if(!lno)
751 			quit("line number 0 is illegal",0);
752 		ap->type = A_LINE;
753 		ap->line = lno;
754 	}
755 	else {
756 		cp--;
757 		ap->type = A_NONE;
758 	}
759 }
760 
761 cmp(char *a, char *b)		/* compare characters */
762 {
763 	while(*a == *b++)
764 		if (*a == '\0')
765 			return(0);
766 		else a++;
767 	return(1);
768 }
769 rcmp(Rune *a, Rune *b)		/* compare runes */
770 {
771 	while(*a == *b++)
772 		if (*a == '\0')
773 			return(0);
774 		else a++;
775 	return(1);
776 }
777 
778 char *
779 text(char *p)		/* extract character string */
780 {
781 	Rune	r;
782 
783 	while(*cp == '\t' || *cp == ' ')
784 			cp++;
785 	while (*cp) {
786 		if ((r = *cp++) == '\\')
787 			if ((r = *cp++) == 0)
788 				break;;
789 		if (r == '\n')
790 			while (*cp == '\t' || *cp == ' ')
791 					cp++;
792 		p += runetochar(p, &r);
793 	}
794 	*p++ = '\0';
795 	return p;
796 }
797 
798 Rune *
799 stext(Rune *p, Rune *end)		/* extract rune string */
800 {
801 	while(*cp == '\t' || *cp == ' ')
802 		cp++;
803 	while (*cp) {
804 		if (*cp == '\\')
805 			if (*++cp == 0)
806 				break;
807 		if (p >= end-1)
808 			quit(TMMES, (char *) linebuf);
809 		if ((*p++ = *cp++) == '\n')
810 			while(*cp == '\t' || *cp == ' ')
811 					cp++;
812 	}
813 	*p++ = 0;
814 	return p;
815 }
816 
817 
818 Label *
819 search (Label *ptr)
820 {
821 	Label	*rp;
822 
823 	for (rp = ltab; rp < ptr; rp++)
824 		if(rcmp(rp->asc, ptr->asc) == 0)
825 			return(rp);
826 	return(0);
827 }
828 
829 void
830 dechain(void)
831 {
832 	Label	*lptr;
833 	SedCom	*rptr, *trptr;
834 
835 	for(lptr = ltab; lptr < lab; lptr++) {
836 
837 		if(lptr->address == 0)
838 			quit("Undefined label: %S", (char *) lptr->asc);
839 
840 		if(lptr->chain) {
841 			rptr = lptr->chain;
842 			while(trptr = rptr->lb1) {
843 				rptr->lb1 = lptr->address;
844 				rptr = trptr;
845 			}
846 			rptr->lb1 = lptr->address;
847 		}
848 	}
849 }
850 
851 int
852 ycomp(SedCom *r)
853 {
854 	int 	i;
855 	Rune	*rp;
856 	Rune	c, *tsp, highc;
857 	Rune	*sp;
858 
859 	highc = 0;
860 	for(tsp = cp; *tsp != seof; tsp++) {
861 		if(*tsp == '\\')
862 			tsp++;
863 		if(*tsp == '\n' || *tsp == '\0')
864 			return(0);
865 		if (*tsp > highc) highc = *tsp;
866 	}
867 	tsp++;
868 	if ((rp = r->text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
869 		quit("Out of memory", 0);
870 	*rp++ = highc;				/* save upper bound */
871 	for (i = 0; i <= highc; i++)
872 		rp[i] = i;
873 	sp = cp;
874 	while((c = *sp++) != seof) {
875 		if(c == '\\' && *sp == 'n') {
876 			sp++;
877 			c = '\n';
878 		}
879 		if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
880 			rp[c] = '\n';
881 			tsp++;
882 		}
883 		if(rp[c] == seof || rp[c] == '\0') {
884 			free(r->re1);
885 			r->re1 = 0;
886 			return(0);
887 		}
888 	}
889 	if(*tsp != seof) {
890 		free(r->re1);
891 		r->re1 = 0;
892 		return(0);
893 	}
894 	cp = tsp+1;
895 	return(1);
896 }
897 
898 void
899 execute(void)
900 {
901 	SedCom	*ipc;
902 
903 	while (spend = gline(linebuf)){
904 		for(ipc = pspace; ipc->command; ) {
905 			if (!executable(ipc)) {
906 				ipc++;
907 				continue;
908 			}
909 			command(ipc);
910 
911 			if(ipc->ad2.type == A_RE && match(ipc->ad2.rp, linebuf))
912 				ipc->active = 0;
913 
914 			if(delflag)
915 				break;
916 
917 			if(jflag) {
918 				jflag = 0;
919 				if((ipc = ipc->lb1) == 0)
920 					break;
921 			} else
922 				ipc++;
923 
924 		}
925 		if(!nflag && !delflag)
926 			putline(&fout, linebuf, spend-linebuf);
927 		if(aptr > abuf) {
928 			arout();
929 		}
930 		delflag = 0;
931 	}
932 }
933 	/* determine if a statement should be applied to an input line */
934 int
935 executable(SedCom *ipc)
936 {
937 	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
938 		if (ipc->active == 1)		/* Second line */
939 			ipc->active = 2;
940 		switch(ipc->ad2.type) {
941 			case A_NONE:	/* No second addr; use first */
942 				ipc->active = 0;
943 				break;
944 			case A_DOL:	/* Accept everything */
945 				return !ipc->negfl;
946 			case A_LINE:	/* Line at end of range? */
947 				if (lnum <= ipc->ad2.line) {
948 					if (ipc->ad2.line == lnum)
949 						ipc->active = 0;
950 					return !ipc->negfl;
951 				}
952 				ipc->active = 0;	/* out of range */
953 				return ipc->negfl;
954 			case A_RE:	/* Check for matching R.E. */
955 				if (match(ipc->ad2.rp, linebuf))
956 					ipc->active = 0;
957 				return !ipc->negfl;
958 			default:		/* internal error */
959 				quit("Internal error", 0);
960 		}
961 	}
962 	switch (ipc->ad1.type) {	/* Check first address */
963 		case A_NONE:			/* Everything matches */
964 			return !ipc->negfl;
965 		case A_DOL:			/* Only last line */
966 			if (dolflag)
967 				return !ipc->negfl;
968 			break;
969 		case A_LINE:			/* Check line number */
970 			if (ipc->ad1.line == lnum) {
971 				ipc->active = 1;	/* In range */
972 				return !ipc->negfl;
973 			}
974 			break;
975 		case A_RE:			/* Check R.E. */
976 			if (match(ipc->ad1.rp, linebuf)) {
977 				ipc->active = 1;	/* In range */
978 				return !ipc->negfl;
979 			}
980 			break;
981 		default:
982 			quit("Internal error", 0);
983 	}
984 	return ipc->negfl;
985 }
986 match(Reprog *pattern, Rune *buf)
987 {
988 	if (!pattern)
989 		return 0;
990 	subexp[0].rsp = buf;
991 	subexp[0].ep = 0;
992 	if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
993 		loc1 = subexp[0].rsp;
994 		loc2 = subexp[0].rep;
995 		return 1;
996 	}
997 	loc1 = loc2 = 0;
998 	return 0;
999 }
1000 substitute(SedCom *ipc)
1001 {
1002 	if(match(ipc->re1, linebuf)) {
1003 		sflag = 1;
1004 		dosub(ipc->rhs);
1005 		if(!ipc->gfl)
1006 			return 1;
1007 		while (match(ipc->re1, loc2)) {
1008 			if (loc2-loc1 == 0){	/* NULL R.E. match */
1009 				if (*loc2++ == 0)
1010 					break;
1011 			} else {
1012 				dosub(ipc->rhs);
1013 				if(*loc2 == 0)
1014 					break;
1015 			}
1016 		}
1017 		return 1;
1018 	}
1019 	return 0;
1020 }
1021 
1022 void
1023 dosub(Rune *rhsbuf)
1024 {
1025 	Rune *lp, *sp;
1026 	Rune *rp;
1027 	int c, n;
1028 
1029 	lp = linebuf;
1030 	sp = genbuf;
1031 	rp = rhsbuf;
1032 	while (lp < loc1)
1033 		*sp++ = *lp++;
1034 	while(c = *rp++) {
1035 		if (c == '&') {
1036 			sp = place(sp, loc1, loc2);
1037 			continue;
1038 		}
1039 		if (c == 0xFFFF && (c = *rp++) >= '1' && c <= MAXSUB+'0') {
1040 			n = c-'0';
1041 			if (subexp[n].rsp && subexp[n].rep) {
1042 				sp = place(sp, subexp[n].rsp, subexp[n].rep);
1043 				continue;
1044 			}
1045 			else {
1046 				fprint(2, "sed: Invalid back reference \\%d\n",n);
1047 				errexit();
1048 			}
1049 		}
1050 		*sp++ = c;
1051 		if (sp >= &genbuf[LBSIZE])
1052 			fprint(2, "sed: Output line too long.\n");
1053 	}
1054 	lp = loc2;
1055 	loc2 = sp - genbuf + linebuf;
1056 	while (*sp++ = *lp++)
1057 		if (sp >= &genbuf[LBSIZE])
1058 			fprint(2, "sed: Output line too long.\n");
1059 	lp = linebuf;
1060 	sp = genbuf;
1061 	while (*lp++ = *sp++)
1062 		;
1063 	spend = lp-1;
1064 }
1065 
1066 Rune *
1067 place(Rune *sp, Rune *l1, Rune *l2)
1068 {
1069 	while (l1 < l2) {
1070 		*sp++ = *l1++;
1071 		if (sp >= &genbuf[LBSIZE])
1072 			fprint(2, "sed: Output line too long.\n");
1073 	}
1074 	return(sp);
1075 }
1076 
1077 char *
1078 trans(int c)
1079 {
1080 	static char buf[] = "\\x0000";
1081 	static char hex[] = "0123456789abcdef";
1082 
1083 	switch(c) {
1084 		case '\b':
1085 			return "\\b";
1086 		case '\n':
1087 			return "\\n";
1088 		case '\r':
1089 			return "\\r";
1090 		case '\t':
1091 			return "\\t";
1092 		case '\\':
1093 			return "\\\\";
1094 	}
1095 	buf[2] = hex[(c>>12)&0xF];
1096 	buf[3] = hex[(c>>8)&0xF];
1097 	buf[4] = hex[(c>>4)&0xF];
1098 	buf[5] = hex[c&0xF];
1099 	return buf;
1100 }
1101 
1102 void
1103 command(SedCom *ipc)
1104 {
1105 	int	i, c;
1106 	Rune	*p1, *p2;
1107 	char	*ucp;
1108 	Rune	*rp;
1109 	Rune	*execp;
1110 
1111 	switch(ipc->command) {
1112 
1113 		case ACOM:
1114 			*aptr++ = ipc;
1115 			if(aptr >= abuf+MAXADDS) {
1116 				quit("sed: Too many appends after line %ld\n",
1117 					(char *) lnum);
1118 			}
1119 			*aptr = 0;
1120 			break;
1121 		case CCOM:
1122 			delflag = 1;
1123 			if(ipc->active == 1) {
1124 				for(rp = ipc->text; *rp; rp++)
1125 					Bputrune(&fout, *rp);
1126 				Bputc(&fout, '\n');
1127 			}
1128 			break;
1129 		case DCOM:
1130 			delflag++;
1131 			break;
1132 		case CDCOM:
1133 			p1 = p2 = linebuf;
1134 			while(*p1 != '\n') {
1135 				if(*p1++ == 0) {
1136 					delflag++;
1137 					return;
1138 				}
1139 			}
1140 			p1++;
1141 			while(*p2++ = *p1++)
1142 				;
1143 			spend = p2-1;
1144 			jflag++;
1145 			break;
1146 		case EQCOM:
1147 			Bprint(&fout, "%ld\n", lnum);
1148 			break;
1149 		case GCOM:
1150 			p1 = linebuf;
1151 			p2 = holdsp;
1152 			while(*p1++ = *p2++)
1153 				;
1154 			spend = p1-1;
1155 			break;
1156 		case CGCOM:
1157 			*spend++ = '\n';
1158 			p1 = spend;
1159 			p2 = holdsp;
1160 			while(*p1++ = *p2++)
1161 				if(p1 >= lbend)
1162 					break;
1163 			spend = p1-1;
1164 			break;
1165 		case HCOM:
1166 			p1 = holdsp;
1167 			p2 = linebuf;
1168 			while(*p1++ = *p2++);
1169 			hspend = p1-1;
1170 			break;
1171 		case CHCOM:
1172 			*hspend++ = '\n';
1173 			p1 = hspend;
1174 			p2 = linebuf;
1175 			while(*p1++ = *p2++)
1176 				if(p1 >= hend)
1177 					break;
1178 			hspend = p1-1;
1179 			break;
1180 		case ICOM:
1181 			for(rp = ipc->text; *rp; rp++)
1182 				Bputrune(&fout, *rp);
1183 			Bputc(&fout, '\n');
1184 			break;
1185 		case BCOM:
1186 			jflag = 1;
1187 			break;
1188 		case LCOM:
1189 			c = 0;
1190 			for (i = 0, rp = linebuf; *rp; rp++) {
1191 				c = *rp;
1192 				if(c >= 0x20 && c < 0x7F && c != '\\') {
1193 					Bputc(&fout, c);
1194 					if(i++ > 71) {
1195 						Bprint(&fout, "\\\n");
1196 						i = 0;
1197 					}
1198 				} else {
1199 					for (ucp = trans(*rp); *ucp; ucp++){
1200 						c = *ucp;
1201 						Bputc(&fout, c);
1202 						if(i++ > 71) {
1203 							Bprint(&fout, "\\\n");
1204 							i = 0;
1205 						}
1206 					}
1207 				}
1208 			}
1209 			if(c == ' ')
1210 				Bprint(&fout, "\\n");
1211 			Bputc(&fout, '\n');
1212 			break;
1213 		case NCOM:
1214 			if(!nflag)
1215 				putline(&fout, linebuf, spend-linebuf);
1216 
1217 			if(aptr > abuf)
1218 				arout();
1219 			if((execp = gline(linebuf)) == 0) {
1220 				delflag = 1;
1221 				break;
1222 			}
1223 			spend = execp;
1224 			break;
1225 		case CNCOM:
1226 			if(aptr > abuf)
1227 				arout();
1228 			*spend++ = '\n';
1229 			if((execp = gline(spend)) == 0) {
1230 				delflag = 1;
1231 				break;
1232 			}
1233 			spend = execp;
1234 			break;
1235 		case PCOM:
1236 			putline(&fout, linebuf, spend-linebuf);
1237 			break;
1238 		case CPCOM:
1239 	cpcom:
1240 			for(rp = linebuf; *rp && *rp != '\n'; rp++)
1241 				Bputc(&fout, *rp);
1242 			Bputc(&fout, '\n');
1243 			break;
1244 		case QCOM:
1245 			if(!nflag)
1246 				putline(&fout, linebuf, spend-linebuf);
1247 			if(aptr > abuf)
1248 				arout();
1249 			exits(0);
1250 		case RCOM:
1251 			*aptr++ = ipc;
1252 			if(aptr >= &abuf[MAXADDS])
1253 				quit("sed: Too many reads after line %ld\n",
1254 					(char *) lnum);
1255 			*aptr = 0;
1256 			break;
1257 		case SCOM:
1258 			i = substitute(ipc);
1259 			if(i && ipc->pfl)
1260 				if(ipc->pfl == 1)
1261 					putline(&fout, linebuf, spend-linebuf);
1262 				else
1263 					goto cpcom;
1264 			if(i && ipc->fcode)
1265 				goto wcom;
1266 			break;
1267 
1268 		case TCOM:
1269 			if(sflag == 0)	break;
1270 			sflag = 0;
1271 			jflag = 1;
1272 			break;
1273 
1274 		wcom:
1275 		case WCOM:
1276 			putline(ipc->fcode,linebuf, spend-linebuf);
1277 			break;
1278 		case XCOM:
1279 			p1 = linebuf;
1280 			p2 = genbuf;
1281 			while(*p2++ = *p1++);
1282 			p1 = holdsp;
1283 			p2 = linebuf;
1284 			while(*p2++ = *p1++);
1285 			spend = p2 - 1;
1286 			p1 = genbuf;
1287 			p2 = holdsp;
1288 			while(*p2++ = *p1++);
1289 			hspend = p2 - 1;
1290 			break;
1291 		case YCOM:
1292 			p1 = linebuf;
1293 			p2 = ipc->text;
1294 			for (i = *p2++;	*p1; p1++){
1295 				if (*p1 <= i) *p1 = p2[*p1];
1296 			}
1297 			break;
1298 	}
1299 
1300 }
1301 
1302 void
1303 putline(Biobuf *bp, Rune *buf, int n)
1304 {
1305 	while (n--)
1306 		Bputrune(bp, *buf++);
1307 	Bputc(bp, '\n');
1308 }
1309 ecmp(Rune *a, Rune *b, int count)
1310 {
1311 	while(count--)
1312 		if(*a++ != *b++)	return(0);
1313 	return(1);
1314 }
1315 
1316 void
1317 arout(void)
1318 {
1319 	Rune	*p1;
1320 	Biobuf	*fi;
1321 	int	c;
1322 	char	*s;
1323 	char	buf[128];
1324 
1325 	for (aptr = abuf; *aptr; aptr++) {
1326 		if((*aptr)->command == ACOM) {
1327 			for(p1 = (*aptr)->text; *p1; p1++ )
1328 				Bputrune(&fout, *p1);
1329 			Bputc(&fout, '\n');
1330 		} else {
1331 			for(s = buf, p1= (*aptr)->text; *p1; p1++)
1332 					s += runetochar(s, p1);
1333 			*s = '\0';
1334 			if((fi = Bopen(buf, OREAD)) == 0)
1335 				continue;
1336 			while((c = Bgetc(fi)) >= 0)
1337 				Bputc(&fout, c);
1338 			Bterm(fi);
1339 		}
1340 	}
1341 	aptr = abuf;
1342 	*aptr = 0;
1343 }
1344 
1345 void
1346 errexit(void)
1347 {
1348 	exits("error");
1349 }
1350 
1351 void
1352 quit (char *msg, char *arg)
1353 {
1354 	fprint(2, "sed: ");
1355 	fprint(2, msg, arg);
1356 	fprint(2, "\n");
1357 	errexit();
1358 }
1359 
1360 Rune *
1361 gline(Rune *addr)
1362 {
1363 	long	c;
1364 	Rune *p;
1365 
1366 	static long peekc = 0;
1367 
1368 	if (f == 0 && opendata() < 0)
1369 		return 0;
1370 	sflag = 0;
1371 	lnum++;
1372 /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
1373 	do {
1374 		p = addr;
1375 		for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1376 			if (c == '\n') {
1377 				if ((peekc = Bgetrune(f)) < 0) {
1378 					Bterm(f);
1379 					if (fhead == 0)
1380 						dolflag = 1;
1381 				}
1382 				*p = '\0';
1383 				return p;
1384 			}
1385 			if (c && p < lbend)
1386 				*p++ = c;
1387 		}
1388 		peekc = 0;
1389 	} while (opendata() > 0);	/* Switch to next stream */
1390 	return 0;
1391 }
1392 
1393 	/* Data file input section - the intent is to transparently
1394 	 *	catenate all data input streams.
1395 	 */
1396 void
1397 enroll(char *filename)		/* Add a file to the input file cache */
1398 {
1399 	FileCache *fp;
1400 
1401 	if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1402 		quit("Out of memory", 0);
1403 	if (ftail == 0)
1404 		fhead = fp;
1405 	else
1406 		ftail->next = fp;
1407 	ftail = fp;
1408 	fp->next = 0;
1409 	fp->name = filename;	/* 0 => stdin */
1410 }
1411 
1412 int
1413 opendata(void)
1414 {
1415 	if (fhead == 0)
1416 		return -1;
1417 	if (fhead->name) {
1418 		if ((f = Bopen(fhead->name, OREAD)) == 0)
1419 			quit("Can't open %s", fhead->name);
1420 	} else {
1421 		Binit(&stdin, 0, OREAD);
1422 		f = &stdin;
1423 	}
1424 	fhead = fhead->next;
1425 	return 1;
1426 }
1427