xref: /csrg-svn/usr.bin/ex/ex_re.c (revision 63046)
148255Sbostic /*-
2*63046Sbostic  * Copyright (c) 1980, 1993
3*63046Sbostic  *	The Regents of the University of California.  All rights reserved.
448255Sbostic  *
548255Sbostic  * %sccs.include.proprietary.c%
621663Sdist  */
721663Sdist 
821663Sdist #ifndef lint
9*63046Sbostic static char sccsid[] = "@(#)ex_re.c	8.1 (Berkeley) 06/09/93";
1048255Sbostic #endif /* not lint */
1121663Sdist 
12438Smark #include "ex.h"
13438Smark #include "ex_re.h"
14438Smark 
15438Smark /*
16438Smark  * Global, substitute and regular expressions.
17438Smark  * Very similar to ed, with some re extensions and
18438Smark  * confirmed substitute.
19438Smark  */
global(k)20438Smark global(k)
21438Smark 	bool k;
22438Smark {
23438Smark 	register char *gp;
24438Smark 	register int c;
25438Smark 	register line *a1;
26438Smark 	char globuf[GBSIZE], *Cwas;
27438Smark 	int lines = lineDOL();
28438Smark 	int oinglobal = inglobal;
29438Smark 	char *oglobp = globp;
30438Smark 
31438Smark 	Cwas = Command;
32438Smark 	/*
33438Smark 	 * States of inglobal:
34438Smark 	 *  0: ordinary - not in a global command.
35438Smark 	 *  1: text coming from some buffer, not tty.
36438Smark 	 *  2: like 1, but the source of the buffer is a global command.
37438Smark 	 * Hence you're only in a global command if inglobal==2. This
38438Smark 	 * strange sounding convention is historically derived from
39438Smark 	 * everybody simulating a global command.
40438Smark 	 */
41438Smark 	if (inglobal==2)
42438Smark 		error("Global within global@not allowed");
43438Smark 	markDOT();
44438Smark 	setall();
45438Smark 	nonzero();
46438Smark 	if (skipend())
47438Smark 		error("Global needs re|Missing regular expression for global");
4830596Sconrad 	c = ex_getchar();
49438Smark 	ignore(compile(c, 1));
50438Smark 	savere(scanre);
51438Smark 	gp = globuf;
5230596Sconrad 	while ((c = ex_getchar()) != '\n') {
53438Smark 		switch (c) {
54438Smark 
55438Smark 		case EOF:
56438Smark 			c = '\n';
57438Smark 			goto brkwh;
58438Smark 
59438Smark 		case '\\':
6030596Sconrad 			c = ex_getchar();
61438Smark 			switch (c) {
62438Smark 
63438Smark 			case '\\':
64438Smark 				ungetchar(c);
65438Smark 				break;
66438Smark 
67438Smark 			case '\n':
68438Smark 				break;
69438Smark 
70438Smark 			default:
71438Smark 				*gp++ = '\\';
72438Smark 				break;
73438Smark 			}
74438Smark 			break;
75438Smark 		}
76438Smark 		*gp++ = c;
77438Smark 		if (gp >= &globuf[GBSIZE - 2])
78438Smark 			error("Global command too long");
79438Smark 	}
80438Smark brkwh:
81438Smark 	ungetchar(c);
82438Smark 	newline();
83438Smark 	*gp++ = c;
84438Smark 	*gp++ = 0;
85492Smark 	saveall();
86438Smark 	inglobal = 2;
87438Smark 	for (a1 = one; a1 <= dol; a1++) {
88438Smark 		*a1 &= ~01;
89438Smark 		if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k)
90438Smark 			*a1 |= 01;
91438Smark 	}
9221688Sdist #ifdef notdef
9321688Sdist /*
9421688Sdist  * This code is commented out for now.  The problem is that we don't
9521688Sdist  * fix up the undo area the way we should.  Basically, I think what has
9621688Sdist  * to be done is to copy the undo area down (since we shrunk everything)
9721688Sdist  * and move the various pointers into it down too.  I will do this later
9821688Sdist  * when I have time. (Mark, 10-20-80)
9921688Sdist  */
10021688Sdist 	/*
10121688Sdist 	 * Special case: g/.../d (avoid n^2 algorithm)
10221688Sdist 	 */
10321688Sdist 	if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') {
10421688Sdist 		gdelete();
10521688Sdist 		return;
10621688Sdist 	}
10721688Sdist #endif
108438Smark 	if (inopen)
109438Smark 		inopen = -1;
11021688Sdist 	/*
11121688Sdist 	 * Now for each marked line, set dot there and do the commands.
11221688Sdist 	 * Note the n^2 behavior here for lots of lines matching.
11321688Sdist 	 * This is really needed: in some cases you could delete lines,
11421688Sdist 	 * causing a marked line to be moved before a1 and missed if
11521688Sdist 	 * we didn't restart at zero each time.
11621688Sdist 	 */
117438Smark 	for (a1 = one; a1 <= dol; a1++) {
118438Smark 		if (*a1 & 01) {
119438Smark 			*a1 &= ~01;
120438Smark 			dot = a1;
121438Smark 			globp = globuf;
122438Smark 			commands(1, 1);
123438Smark 			a1 = zero;
124438Smark 		}
125438Smark 	}
126438Smark 	globp = oglobp;
127438Smark 	inglobal = oinglobal;
128438Smark 	endline = 1;
129438Smark 	Command = Cwas;
130438Smark 	netchHAD(lines);
131438Smark 	setlastchar(EOF);
132438Smark 	if (inopen) {
133438Smark 		ungetchar(EOF);
134438Smark 		inopen = 1;
135438Smark 	}
136438Smark }
137438Smark 
13821688Sdist /*
13921688Sdist  * gdelete: delete inside a global command. Handles the
14021688Sdist  * special case g/r.e./d. All lines to be deleted have
14121688Sdist  * already been marked. Squeeze the remaining lines together.
14221688Sdist  * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
14321688Sdist  * and g/r.e./.,/r.e.2/d are not treated specially.  There is no
14421688Sdist  * good reason for this except the question: where to you draw the line?
14521688Sdist  */
gdelete()14621688Sdist gdelete()
14721688Sdist {
14821688Sdist 	register line *a1, *a2, *a3;
14921688Sdist 
15021688Sdist 	a3 = dol;
15121688Sdist 	/* find first marked line. can skip all before it */
15221688Sdist 	for (a1=zero; (*a1&01)==0; a1++)
15321688Sdist 		if (a1>=a3)
15421688Sdist 			return;
15521688Sdist 	/* copy down unmarked lines, compacting as we go. */
15621688Sdist 	for (a2=a1+1; a2<=a3;) {
15721688Sdist 		if (*a2&01) {
15821688Sdist 			a2++;		/* line is marked, skip it */
15921688Sdist 			dot = a1;	/* dot left after line deletion */
16021688Sdist 		} else
16121688Sdist 			*a1++ = *a2++;	/* unmarked, copy it */
16221688Sdist 	}
16321688Sdist 	dol = a1-1;
16421688Sdist 	if (dot>dol)
16521688Sdist 		dot = dol;
16621688Sdist 	change();
16721688Sdist }
16821688Sdist 
169518Smark bool	cflag;
170438Smark int	scount, slines, stotal;
171438Smark 
substitute(c)172438Smark substitute(c)
173438Smark 	int c;
174438Smark {
175438Smark 	register line *addr;
176438Smark 	register int n;
17721688Sdist 	int gsubf, hopcount;
178438Smark 
179438Smark 	gsubf = compsub(c);
180492Smark 	if(FIXUNDO)
181438Smark 		save12(), undkind = UNDCHANGE;
182438Smark 	stotal = 0;
183438Smark 	slines = 0;
184438Smark 	for (addr = addr1; addr <= addr2; addr++) {
18521688Sdist 		scount = hopcount = 0;
186438Smark 		if (dosubcon(0, addr) == 0)
187438Smark 			continue;
188438Smark 		if (gsubf) {
189438Smark 			/*
19021688Sdist 			 * The loop can happen from s/\</&/g
19121688Sdist 			 * but we don't want to break other, reasonable cases.
192438Smark 			 */
19321688Sdist 			while (*loc2) {
19421688Sdist 				if (++hopcount > sizeof linebuf)
19521688Sdist 					error("substitution loop");
196438Smark 				if (dosubcon(1, addr) == 0)
197438Smark 					break;
19821688Sdist 			}
199438Smark 		}
200438Smark 		if (scount) {
201438Smark 			stotal += scount;
202438Smark 			slines++;
203438Smark 			putmark(addr);
204438Smark 			n = append(getsub, addr);
205438Smark 			addr += n;
206438Smark 			addr2 += n;
207438Smark 		}
208438Smark 	}
209518Smark 	if (stotal == 0 && !inglobal && !cflag)
210438Smark 		error("Fail|Substitute pattern match failed");
211438Smark 	snote(stotal, slines);
212438Smark 	return (stotal);
213438Smark }
214438Smark 
compsub(ch)215438Smark compsub(ch)
216438Smark {
217438Smark 	register int seof, c, uselastre;
218438Smark 	static int gsubf;
219438Smark 
220438Smark 	if (!value(EDCOMPATIBLE))
221518Smark 		gsubf = cflag = 0;
222438Smark 	uselastre = 0;
223438Smark 	switch (ch) {
224438Smark 
225438Smark 	case 's':
226438Smark 		ignore(skipwh());
22730596Sconrad 		seof = ex_getchar();
228438Smark 		if (endcmd(seof) || any(seof, "gcr")) {
229438Smark 			ungetchar(seof);
230438Smark 			goto redo;
231438Smark 		}
232438Smark 		if (isalpha(seof) || isdigit(seof))
233438Smark 			error("Substitute needs re|Missing regular expression for substitute");
234438Smark 		seof = compile(seof, 1);
235438Smark 		uselastre = 1;
236438Smark 		comprhs(seof);
237438Smark 		gsubf = 0;
238518Smark 		cflag = 0;
239438Smark 		break;
240438Smark 
241438Smark 	case '~':
242438Smark 		uselastre = 1;
243438Smark 		/* fall into ... */
244438Smark 	case '&':
245438Smark 	redo:
246438Smark 		if (re.Expbuf[0] == 0)
247438Smark 			error("No previous re|No previous regular expression");
248518Smark 		if (subre.Expbuf[0] == 0)
249518Smark 			error("No previous substitute re|No previous substitute to repeat");
250438Smark 		break;
251438Smark 	}
252438Smark 	for (;;) {
25330596Sconrad 		c = ex_getchar();
254438Smark 		switch (c) {
255438Smark 
256438Smark 		case 'g':
257438Smark 			gsubf = !gsubf;
258438Smark 			continue;
259438Smark 
260438Smark 		case 'c':
261518Smark 			cflag = !cflag;
262438Smark 			continue;
263438Smark 
264438Smark 		case 'r':
265438Smark 			uselastre = 1;
266438Smark 			continue;
267438Smark 
268438Smark 		default:
269438Smark 			ungetchar(c);
270438Smark 			setcount();
271438Smark 			newline();
272438Smark 			if (uselastre)
273438Smark 				savere(subre);
274438Smark 			else
275438Smark 				resre(subre);
276438Smark 			return (gsubf);
277438Smark 		}
278438Smark 	}
279438Smark }
280438Smark 
comprhs(seof)281438Smark comprhs(seof)
282438Smark 	int seof;
283438Smark {
284438Smark 	register char *rp, *orp;
285438Smark 	register int c;
28621688Sdist 	char orhsbuf[RHSSIZE];
287438Smark 
288438Smark 	rp = rhsbuf;
289438Smark 	CP(orhsbuf, rp);
290438Smark 	for (;;) {
29130596Sconrad 		c = ex_getchar();
292438Smark 		if (c == seof)
293438Smark 			break;
294438Smark 		switch (c) {
295438Smark 
296438Smark 		case '\\':
29730596Sconrad 			c = ex_getchar();
298438Smark 			if (c == EOF) {
299438Smark 				ungetchar(c);
300438Smark 				break;
301438Smark 			}
302438Smark 			if (value(MAGIC)) {
303438Smark 				/*
304438Smark 				 * When "magic", \& turns into a plain &,
305438Smark 				 * and all other chars work fine quoted.
306438Smark 				 */
307438Smark 				if (c != '&')
308438Smark 					c |= QUOTE;
309438Smark 				break;
310438Smark 			}
311438Smark magic:
312438Smark 			if (c == '~') {
313438Smark 				for (orp = orhsbuf; *orp; *rp++ = *orp++)
31421688Sdist 					if (rp >= &rhsbuf[RHSSIZE - 1])
315438Smark 						goto toobig;
316438Smark 				continue;
317438Smark 			}
318438Smark 			c |= QUOTE;
319438Smark 			break;
320438Smark 
321438Smark 		case '\n':
322438Smark 		case EOF:
323492Smark 			if (!(globp && globp[0])) {
324492Smark 				ungetchar(c);
325492Smark 				goto endrhs;
326492Smark 			}
327438Smark 
328438Smark 		case '~':
329438Smark 		case '&':
330438Smark 			if (value(MAGIC))
331438Smark 				goto magic;
332438Smark 			break;
333438Smark 		}
33421688Sdist 		if (rp >= &rhsbuf[RHSSIZE - 1]) {
335438Smark toobig:
33621688Sdist 			*rp = 0;
337438Smark 			error("Replacement pattern too long@- limit 256 characters");
33821688Sdist 		}
339438Smark 		*rp++ = c;
340438Smark 	}
341438Smark endrhs:
342438Smark 	*rp++ = 0;
343438Smark }
344438Smark 
getsub()345438Smark getsub()
346438Smark {
347438Smark 	register char *p;
348438Smark 
349438Smark 	if ((p = linebp) == 0)
350438Smark 		return (EOF);
351438Smark 	strcLIN(p);
352438Smark 	linebp = 0;
353438Smark 	return (0);
354438Smark }
355438Smark 
dosubcon(f,a)356438Smark dosubcon(f, a)
357438Smark 	bool f;
358438Smark 	line *a;
359438Smark {
360438Smark 
361438Smark 	if (execute(f, a) == 0)
362438Smark 		return (0);
363438Smark 	if (confirmed(a)) {
364438Smark 		dosub();
365438Smark 		scount++;
366438Smark 	}
367438Smark 	return (1);
368438Smark }
369438Smark 
confirmed(a)370438Smark confirmed(a)
371438Smark 	line *a;
372438Smark {
373438Smark 	register int c, ch;
374438Smark 
375518Smark 	if (cflag == 0)
376438Smark 		return (1);
377438Smark 	pofix();
378438Smark 	pline(lineno(a));
379438Smark 	if (inopen)
38030596Sconrad 		ex_putchar('\n' | QUOTE);
381438Smark 	c = column(loc1 - 1);
382438Smark 	ugo(c - 1 + (inopen ? 1 : 0), ' ');
383438Smark 	ugo(column(loc2 - 1) - c, '^');
384438Smark 	flush();
385438Smark 	ch = c = getkey();
386438Smark again:
387438Smark 	if (c == '\r')
388438Smark 		c = '\n';
389438Smark 	if (inopen)
39030596Sconrad 		ex_putchar(c), flush();
391438Smark 	if (c != '\n' && c != EOF) {
392438Smark 		c = getkey();
393438Smark 		goto again;
394438Smark 	}
395438Smark 	noteinp();
396438Smark 	return (ch == 'y');
397438Smark }
398438Smark 
getch()399438Smark getch()
400438Smark {
401438Smark 	char c;
402438Smark 
403438Smark 	if (read(2, &c, 1) != 1)
404438Smark 		return (EOF);
405438Smark 	return (c & TRIM);
406438Smark }
407438Smark 
ugo(cnt,with)408438Smark ugo(cnt, with)
409438Smark 	int with;
410438Smark 	int cnt;
411438Smark {
412438Smark 
413438Smark 	if (cnt > 0)
414438Smark 		do
41530596Sconrad 			ex_putchar(with);
416438Smark 		while (--cnt > 0);
417438Smark }
418438Smark 
419438Smark int	casecnt;
420438Smark bool	destuc;
421438Smark 
dosub()422438Smark dosub()
423438Smark {
424438Smark 	register char *lp, *sp, *rp;
425438Smark 	int c;
426438Smark 
427438Smark 	lp = linebuf;
428438Smark 	sp = genbuf;
429438Smark 	rp = rhsbuf;
430438Smark 	while (lp < loc1)
431438Smark 		*sp++ = *lp++;
432438Smark 	casecnt = 0;
433438Smark 	while (c = *rp++) {
43421688Sdist 		/* ^V <return> from vi to split lines */
43521688Sdist 		if (c == '\r')
43621688Sdist 			c = '\n';
43721688Sdist 
438438Smark 		if (c & QUOTE)
439438Smark 			switch (c & TRIM) {
440438Smark 
441438Smark 			case '&':
442438Smark 				sp = place(sp, loc1, loc2);
443438Smark 				if (sp == 0)
444438Smark 					goto ovflo;
445438Smark 				continue;
446438Smark 
447438Smark 			case 'l':
448438Smark 				casecnt = 1;
449438Smark 				destuc = 0;
450438Smark 				continue;
451438Smark 
452438Smark 			case 'L':
453438Smark 				casecnt = LBSIZE;
454438Smark 				destuc = 0;
455438Smark 				continue;
456438Smark 
457438Smark 			case 'u':
458438Smark 				casecnt = 1;
459438Smark 				destuc = 1;
460438Smark 				continue;
461438Smark 
462438Smark 			case 'U':
463438Smark 				casecnt = LBSIZE;
464438Smark 				destuc = 1;
465438Smark 				continue;
466438Smark 
467438Smark 			case 'E':
468438Smark 			case 'e':
469438Smark 				casecnt = 0;
470438Smark 				continue;
471438Smark 			}
472438Smark 		if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
473438Smark 			sp = place(sp, braslist[c - '1'], braelist[c - '1']);
474438Smark 			if (sp == 0)
475438Smark 				goto ovflo;
476438Smark 			continue;
477438Smark 		}
478438Smark 		if (casecnt)
479438Smark 			*sp++ = fixcase(c & TRIM);
480438Smark 		else
481438Smark 			*sp++ = c & TRIM;
482438Smark 		if (sp >= &genbuf[LBSIZE])
483438Smark ovflo:
484473Smark 			error("Line overflow@in substitute");
485438Smark 	}
486438Smark 	lp = loc2;
487438Smark 	loc2 = sp + (linebuf - genbuf);
488438Smark 	while (*sp++ = *lp++)
489438Smark 		if (sp >= &genbuf[LBSIZE])
490438Smark 			goto ovflo;
491438Smark 	strcLIN(genbuf);
492438Smark }
493438Smark 
fixcase(c)494438Smark fixcase(c)
495438Smark 	register int c;
496438Smark {
497438Smark 
498438Smark 	if (casecnt == 0)
499438Smark 		return (c);
500438Smark 	casecnt--;
501438Smark 	if (destuc) {
502438Smark 		if (islower(c))
503438Smark 			c = toupper(c);
504438Smark 	} else
505438Smark 		if (isupper(c))
506438Smark 			c = tolower(c);
507438Smark 	return (c);
508438Smark }
509438Smark 
510438Smark char *
place(sp,l1,l2)511438Smark place(sp, l1, l2)
512438Smark 	register char *sp, *l1, *l2;
513438Smark {
514438Smark 
515438Smark 	while (l1 < l2) {
516438Smark 		*sp++ = fixcase(*l1++);
517438Smark 		if (sp >= &genbuf[LBSIZE])
518438Smark 			return (0);
519438Smark 	}
520438Smark 	return (sp);
521438Smark }
522438Smark 
snote(total,lines)523438Smark snote(total, lines)
524438Smark 	register int total, lines;
525438Smark {
526438Smark 
527438Smark 	if (!notable(total))
528438Smark 		return;
52930596Sconrad 	ex_printf(mesg("%d subs|%d substitutions"), total);
530438Smark 	if (lines != 1 && lines != total)
53130596Sconrad 		ex_printf(" on %d lines", lines);
532438Smark 	noonl();
533438Smark 	flush();
534438Smark }
535438Smark 
compile(eof,oknl)536438Smark compile(eof, oknl)
537438Smark 	int eof;
538438Smark 	int oknl;
539438Smark {
540438Smark 	register int c;
541438Smark 	register char *ep;
542438Smark 	char *lastep;
543438Smark 	char bracket[NBRA], *bracketp, *rhsp;
544438Smark 	int cclcnt;
545438Smark 
546438Smark 	if (isalpha(eof) || isdigit(eof))
547438Smark 		error("Regular expressions cannot be delimited by letters or digits");
548438Smark 	ep = expbuf;
54930596Sconrad 	c = ex_getchar();
550438Smark 	if (eof == '\\')
551438Smark 		switch (c) {
552438Smark 
553438Smark 		case '/':
554438Smark 		case '?':
555438Smark 			if (scanre.Expbuf[0] == 0)
556438Smark error("No previous scan re|No previous scanning regular expression");
557438Smark 			resre(scanre);
558438Smark 			return (c);
559438Smark 
560438Smark 		case '&':
561438Smark 			if (subre.Expbuf[0] == 0)
562438Smark error("No previous substitute re|No previous substitute regular expression");
563438Smark 			resre(subre);
564438Smark 			return (c);
565438Smark 
566438Smark 		default:
567438Smark 			error("Badly formed re|Regular expression \\ must be followed by / or ?");
568438Smark 		}
569438Smark 	if (c == eof || c == '\n' || c == EOF) {
570438Smark 		if (*ep == 0)
571438Smark 			error("No previous re|No previous regular expression");
572438Smark 		if (c == '\n' && oknl == 0)
573438Smark 			error("Missing closing delimiter@for regular expression");
574438Smark 		if (c != eof)
575438Smark 			ungetchar(c);
576438Smark 		return (eof);
577438Smark 	}
578438Smark 	bracketp = bracket;
579438Smark 	nbra = 0;
580438Smark 	circfl = 0;
581438Smark 	if (c == '^') {
58230596Sconrad 		c = ex_getchar();
583438Smark 		circfl++;
584438Smark 	}
585438Smark 	ungetchar(c);
586438Smark 	for (;;) {
587438Smark 		if (ep >= &expbuf[ESIZE - 2])
588438Smark complex:
589438Smark 			cerror("Re too complex|Regular expression too complicated");
59030596Sconrad 		c = ex_getchar();
591438Smark 		if (c == eof || c == EOF) {
592438Smark 			if (bracketp != bracket)
593438Smark cerror("Unmatched \\(|More \\('s than \\)'s in regular expression");
594518Smark 			*ep++ = CEOFC;
595438Smark 			if (c == EOF)
596438Smark 				ungetchar(c);
597438Smark 			return (eof);
598438Smark 		}
599438Smark 		if (value(MAGIC)) {
600438Smark 			if (c != '*' || ep == expbuf)
601438Smark 				lastep = ep;
602438Smark 		} else
603438Smark 			if (c != '\\' || peekchar() != '*' || ep == expbuf)
604438Smark 				lastep = ep;
605438Smark 		switch (c) {
606438Smark 
607438Smark 		case '\\':
60830596Sconrad 			c = ex_getchar();
609438Smark 			switch (c) {
610438Smark 
611438Smark 			case '(':
612438Smark 				if (nbra >= NBRA)
613438Smark cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression");
614438Smark 				*bracketp++ = nbra;
615438Smark 				*ep++ = CBRA;
616438Smark 				*ep++ = nbra++;
617438Smark 				continue;
618438Smark 
619438Smark 			case ')':
620438Smark 				if (bracketp <= bracket)
621438Smark cerror("Extra \\)|More \\)'s than \\('s in regular expression");
622438Smark 				*ep++ = CKET;
623438Smark 				*ep++ = *--bracketp;
624438Smark 				continue;
625438Smark 
626438Smark 			case '<':
627438Smark 				*ep++ = CBRC;
628438Smark 				continue;
629438Smark 
630438Smark 			case '>':
631438Smark 				*ep++ = CLET;
632438Smark 				continue;
633438Smark 			}
634438Smark 			if (value(MAGIC) == 0)
635438Smark magic:
636438Smark 			switch (c) {
637438Smark 
638438Smark 			case '.':
639438Smark 				*ep++ = CDOT;
640438Smark 				continue;
641438Smark 
642438Smark 			case '~':
643438Smark 				rhsp = rhsbuf;
644438Smark 				while (*rhsp) {
645438Smark 					if (*rhsp & QUOTE) {
646438Smark 						c = *rhsp & TRIM;
647438Smark 						if (c == '&')
648438Smark error("Replacement pattern contains &@- cannot use in re");
649438Smark 						if (c >= '1' && c <= '9')
650438Smark error("Replacement pattern contains \\d@- cannot use in re");
651438Smark 					}
652438Smark 					if (ep >= &expbuf[ESIZE-2])
653438Smark 						goto complex;
654438Smark 					*ep++ = CCHR;
655438Smark 					*ep++ = *rhsp++ & TRIM;
656438Smark 				}
657438Smark 				continue;
658438Smark 
659438Smark 			case '*':
660438Smark 				if (ep == expbuf)
661438Smark 					break;
662438Smark 				if (*lastep == CBRA || *lastep == CKET)
663438Smark cerror("Illegal *|Can't * a \\( ... \\) in regular expression");
664438Smark 				if (*lastep == CCHR && (lastep[1] & QUOTE))
665438Smark cerror("Illegal *|Can't * a \\n in regular expression");
666438Smark 				*lastep |= STAR;
667438Smark 				continue;
668438Smark 
669438Smark 			case '[':
670438Smark 				*ep++ = CCL;
671438Smark 				*ep++ = 0;
672438Smark 				cclcnt = 1;
67330596Sconrad 				c = ex_getchar();
674438Smark 				if (c == '^') {
67530596Sconrad 					c = ex_getchar();
676438Smark 					ep[-2] = NCCL;
677438Smark 				}
678438Smark 				if (c == ']')
679438Smark cerror("Bad character class|Empty character class '[]' or '[^]' cannot match");
680438Smark 				while (c != ']') {
681438Smark 					if (c == '\\' && any(peekchar(), "]-^\\"))
68230596Sconrad 						c = ex_getchar() | QUOTE;
683438Smark 					if (c == '\n' || c == EOF)
684438Smark 						cerror("Missing ]");
685438Smark 					*ep++ = c;
686438Smark 					cclcnt++;
687438Smark 					if (ep >= &expbuf[ESIZE])
688438Smark 						goto complex;
68930596Sconrad 					c = ex_getchar();
690438Smark 				}
691438Smark 				lastep[1] = cclcnt;
692438Smark 				continue;
693438Smark 			}
694438Smark 			if (c == EOF) {
695438Smark 				ungetchar(EOF);
696438Smark 				c = '\\';
697438Smark 				goto defchar;
698438Smark 			}
699438Smark 			*ep++ = CCHR;
700438Smark 			if (c == '\n')
701438Smark cerror("No newlines in re's|Can't escape newlines into regular expressions");
702438Smark /*
703438Smark 			if (c < '1' || c > NBRA + '1') {
704438Smark */
705438Smark 				*ep++ = c;
706438Smark 				continue;
707438Smark /*
708438Smark 			}
709438Smark 			c -= '1';
710438Smark 			if (c >= nbra)
711438Smark cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s");
712438Smark 			*ep++ = c | QUOTE;
713438Smark 			continue;
714438Smark */
715438Smark 
716438Smark 		case '\n':
717438Smark 			if (oknl) {
718438Smark 				ungetchar(c);
719518Smark 				*ep++ = CEOFC;
720438Smark 				return (eof);
721438Smark 			}
722438Smark cerror("Badly formed re|Missing closing delimiter for regular expression");
723438Smark 
724438Smark 		case '$':
725438Smark 			if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
726438Smark 				*ep++ = CDOL;
727438Smark 				continue;
728438Smark 			}
729438Smark 			goto defchar;
730438Smark 
731438Smark 		case '.':
732438Smark 		case '~':
733438Smark 		case '*':
734438Smark 		case '[':
735438Smark 			if (value(MAGIC))
736438Smark 				goto magic;
737438Smark defchar:
738438Smark 		default:
739438Smark 			*ep++ = CCHR;
740438Smark 			*ep++ = c;
741438Smark 			continue;
742438Smark 		}
743438Smark 	}
744438Smark }
745438Smark 
cerror(s)746438Smark cerror(s)
747438Smark 	char *s;
748438Smark {
749438Smark 
750438Smark 	expbuf[0] = 0;
751438Smark 	error(s);
752438Smark }
753438Smark 
same(a,b)754438Smark same(a, b)
755438Smark 	register int a, b;
756438Smark {
757438Smark 
758438Smark 	return (a == b || value(IGNORECASE) &&
759438Smark 	   ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
760438Smark }
761438Smark 
762438Smark char	*locs;
763438Smark 
76430596Sconrad /* VARARGS1 */
execute(gf,addr)765438Smark execute(gf, addr)
766438Smark 	line *addr;
767438Smark {
768438Smark 	register char *p1, *p2;
769438Smark 	register int c;
770438Smark 
771438Smark 	if (gf) {
772438Smark 		if (circfl)
773438Smark 			return (0);
774438Smark 		locs = p1 = loc2;
775438Smark 	} else {
776438Smark 		if (addr == zero)
777438Smark 			return (0);
778438Smark 		p1 = linebuf;
779438Smark 		getline(*addr);
780438Smark 		locs = 0;
781438Smark 	}
782438Smark 	p2 = expbuf;
783438Smark 	if (circfl) {
784438Smark 		loc1 = p1;
785438Smark 		return (advance(p1, p2));
786438Smark 	}
787438Smark 	/* fast check for first character */
788438Smark 	if (*p2 == CCHR) {
789438Smark 		c = p2[1];
790438Smark 		do {
791438Smark 			if (c != *p1 && (!value(IGNORECASE) ||
792438Smark 			   !((islower(c) && toupper(c) == *p1) ||
793438Smark 			   (islower(*p1) && toupper(*p1) == c))))
794438Smark 				continue;
795438Smark 			if (advance(p1, p2)) {
796438Smark 				loc1 = p1;
797438Smark 				return (1);
798438Smark 			}
799438Smark 		} while (*p1++);
800438Smark 		return (0);
801438Smark 	}
802438Smark 	/* regular algorithm */
803438Smark 	do {
804438Smark 		if (advance(p1, p2)) {
805438Smark 			loc1 = p1;
806438Smark 			return (1);
807438Smark 		}
808438Smark 	} while (*p1++);
809438Smark 	return (0);
810438Smark }
811438Smark 
812438Smark #define	uletter(c)	(isalpha(c) || c == '_')
813438Smark 
advance(lp,ep)814438Smark advance(lp, ep)
815438Smark 	register char *lp, *ep;
816438Smark {
817438Smark 	register char *curlp;
818438Smark 
819438Smark 	for (;;) switch (*ep++) {
820438Smark 
821438Smark 	case CCHR:
822438Smark /* useless
823438Smark 		if (*ep & QUOTE) {
824438Smark 			c = *ep++ & TRIM;
825438Smark 			sp = braslist[c];
826438Smark 			sp1 = braelist[c];
827438Smark 			while (sp < sp1) {
828438Smark 				if (!same(*sp, *lp))
829438Smark 					return (0);
830438Smark 				sp++, lp++;
831438Smark 			}
832438Smark 			continue;
833438Smark 		}
834438Smark */
835438Smark 		if (!same(*ep, *lp))
836438Smark 			return (0);
837438Smark 		ep++, lp++;
838438Smark 		continue;
839438Smark 
840438Smark 	case CDOT:
841438Smark 		if (*lp++)
842438Smark 			continue;
843438Smark 		return (0);
844438Smark 
845438Smark 	case CDOL:
846438Smark 		if (*lp == 0)
847438Smark 			continue;
848438Smark 		return (0);
849438Smark 
850518Smark 	case CEOFC:
851438Smark 		loc2 = lp;
852438Smark 		return (1);
853438Smark 
854438Smark 	case CCL:
855438Smark 		if (cclass(ep, *lp++, 1)) {
856438Smark 			ep += *ep;
857438Smark 			continue;
858438Smark 		}
859438Smark 		return (0);
860438Smark 
861438Smark 	case NCCL:
862438Smark 		if (cclass(ep, *lp++, 0)) {
863438Smark 			ep += *ep;
864438Smark 			continue;
865438Smark 		}
866438Smark 		return (0);
867438Smark 
868438Smark 	case CBRA:
869438Smark 		braslist[*ep++] = lp;
870438Smark 		continue;
871438Smark 
872438Smark 	case CKET:
873438Smark 		braelist[*ep++] = lp;
874438Smark 		continue;
875438Smark 
876438Smark 	case CDOT|STAR:
877438Smark 		curlp = lp;
878438Smark 		while (*lp++)
879438Smark 			continue;
880438Smark 		goto star;
881438Smark 
882438Smark 	case CCHR|STAR:
883438Smark 		curlp = lp;
884438Smark 		while (same(*lp, *ep))
885438Smark 			lp++;
886438Smark 		lp++;
887438Smark 		ep++;
888438Smark 		goto star;
889438Smark 
890438Smark 	case CCL|STAR:
891438Smark 	case NCCL|STAR:
892438Smark 		curlp = lp;
893438Smark 		while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
894438Smark 			continue;
895438Smark 		ep += *ep;
896438Smark 		goto star;
897438Smark star:
898438Smark 		do {
899438Smark 			lp--;
900438Smark 			if (lp == locs)
901438Smark 				break;
902438Smark 			if (advance(lp, ep))
903438Smark 				return (1);
904438Smark 		} while (lp > curlp);
905438Smark 		return (0);
906438Smark 
907438Smark 	case CBRC:
90821688Sdist 		if (lp == linebuf)
909438Smark 			continue;
910438Smark 		if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1]))
911438Smark 			continue;
912438Smark 		return (0);
913438Smark 
914438Smark 	case CLET:
915438Smark 		if (!uletter(*lp) && !isdigit(*lp))
916438Smark 			continue;
917438Smark 		return (0);
918438Smark 
919438Smark 	default:
920438Smark 		error("Re internal error");
921438Smark 	}
922438Smark }
923438Smark 
cclass(set,c,af)924438Smark cclass(set, c, af)
925438Smark 	register char *set;
926438Smark 	register int c;
927438Smark 	int af;
928438Smark {
929438Smark 	register int n;
930438Smark 
931438Smark 	if (c == 0)
932438Smark 		return (0);
933438Smark 	if (value(IGNORECASE) && isupper(c))
934438Smark 		c = tolower(c);
935438Smark 	n = *set++;
936438Smark 	while (--n)
937438Smark 		if (n > 2 && set[1] == '-') {
938438Smark 			if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
939438Smark 				return (af);
940438Smark 			set += 3;
941438Smark 			n -= 2;
942438Smark 		} else
943438Smark 			if ((*set++ & TRIM) == c)
944438Smark 				return (af);
945438Smark 	return (!af);
946438Smark }
947