xref: /csrg-svn/usr.bin/sed/compile.c (revision 55992)
1*55992Sbostic /*-
2*55992Sbostic  * Copyright (c) 1992 Diomidis Spinellis.
3*55992Sbostic  * Copyright (c) 1992 The Regents of the University of California.
4*55992Sbostic  * All rights reserved.
5*55992Sbostic  *
6*55992Sbostic  * This code is derived from software contributed to Berkeley by
7*55992Sbostic  * Diomidis Spinellis of Imperial College, University of London.
8*55992Sbostic  *
9*55992Sbostic  * %sccs.include.redist.c%
10*55992Sbostic  */
11*55992Sbostic 
12*55992Sbostic #ifndef lint
13*55992Sbostic static char sccsid[] = "@(#)compile.c	5.1 (Berkeley) 08/23/92";
14*55992Sbostic #endif /* not lint */
15*55992Sbostic 
16*55992Sbostic #include <sys/types.h>
17*55992Sbostic #include  <sys/stat.h>
18*55992Sbostic 
19*55992Sbostic #include <ctype.h>
20*55992Sbostic #include <errno.h>
21*55992Sbostic #include <fcntl.h>
22*55992Sbostic #include <limits.h>
23*55992Sbostic #include <regex.h>
24*55992Sbostic #include <stdio.h>
25*55992Sbostic #include <stdlib.h>
26*55992Sbostic #include <string.h>
27*55992Sbostic 
28*55992Sbostic #include "defs.h"
29*55992Sbostic #include "extern.h"
30*55992Sbostic 
31*55992Sbostic static char	 *compile_addr __P((char *, struct s_addr *));
32*55992Sbostic static char	 *compile_delimited __P((char *, char *));
33*55992Sbostic static char	 *compile_flags __P((char *, struct s_subst *));
34*55992Sbostic static char	 *compile_re __P((char *, regex_t *, int));
35*55992Sbostic static char	 *compile_subst __P((char *, char **, size_t));
36*55992Sbostic static char	 *compile_text __P((void));
37*55992Sbostic static char	 *compile_tr __P((char *, char **));
38*55992Sbostic static struct s_command
39*55992Sbostic 		**compile_stream __P((char *, struct s_command **, char *));
40*55992Sbostic static char	 *duptoeol __P((char *));
41*55992Sbostic static struct s_command
42*55992Sbostic 		 *findlabel __P((struct s_command *, struct s_command *));
43*55992Sbostic static void	  fixuplabel __P((struct s_command *, struct s_command *));
44*55992Sbostic 
45*55992Sbostic /*
46*55992Sbostic  * Command specification.  This is used to drive the command parser.
47*55992Sbostic  */
48*55992Sbostic struct s_format {
49*55992Sbostic 	char code;				/* Command code */
50*55992Sbostic 	int naddr;				/* Number of address args */
51*55992Sbostic 	enum e_args args;			/* Argument type */
52*55992Sbostic };
53*55992Sbostic 
54*55992Sbostic static struct s_format cmd_fmts[] = {
55*55992Sbostic 	{'{', 2, GROUP},
56*55992Sbostic 	{'a', 1, TEXT},
57*55992Sbostic 	{'b', 2, BRANCH},
58*55992Sbostic 	{'c', 2, TEXT},
59*55992Sbostic 	{'d', 2, EMPTY},
60*55992Sbostic 	{'D', 2, EMPTY},
61*55992Sbostic 	{'g', 2, EMPTY},
62*55992Sbostic 	{'G', 2, EMPTY},
63*55992Sbostic 	{'h', 2, EMPTY},
64*55992Sbostic 	{'H', 2, EMPTY},
65*55992Sbostic 	{'i', 1, TEXT},
66*55992Sbostic 	{'l', 2, EMPTY},
67*55992Sbostic 	{'n', 2, EMPTY},
68*55992Sbostic 	{'N', 2, EMPTY},
69*55992Sbostic 	{'p', 2, EMPTY},
70*55992Sbostic 	{'P', 2, EMPTY},
71*55992Sbostic 	{'q', 1, EMPTY},
72*55992Sbostic 	{'r', 1, RFILE},
73*55992Sbostic 	{'s', 2, SUBST},
74*55992Sbostic 	{'t', 2, BRANCH},
75*55992Sbostic 	{'w', 2, WFILE},
76*55992Sbostic 	{'x', 2, EMPTY},
77*55992Sbostic 	{'y', 2, TR},
78*55992Sbostic 	{'!', 2, NONSEL},
79*55992Sbostic 	{':', 0, LABEL},
80*55992Sbostic 	{'#', 0, COMMENT},
81*55992Sbostic 	{'=', 1, EMPTY},
82*55992Sbostic 	{'\0', 0, COMMENT},
83*55992Sbostic };
84*55992Sbostic 
85*55992Sbostic /* The compiled program */
86*55992Sbostic struct s_command *prog;
87*55992Sbostic 
88*55992Sbostic /*
89*55992Sbostic  * Compile the program into prog.
90*55992Sbostic  * Initialise appends
91*55992Sbostic  */
92*55992Sbostic void
93*55992Sbostic compile()
94*55992Sbostic {
95*55992Sbostic 	*compile_stream(NULL, &prog, NULL) = NULL;
96*55992Sbostic 	fixuplabel(prog, prog);
97*55992Sbostic 	appends = xmalloc(sizeof(struct s_appends) * appendnum);
98*55992Sbostic }
99*55992Sbostic 
100*55992Sbostic #define EATSPACE() do {							\
101*55992Sbostic 	if (p)								\
102*55992Sbostic 		while (*p && isascii(*p) && isspace(*p))		\
103*55992Sbostic 			p++;						\
104*55992Sbostic 	} while (0)
105*55992Sbostic 
106*55992Sbostic static struct s_command **
107*55992Sbostic compile_stream(terminator, link, p)
108*55992Sbostic 	char *terminator;
109*55992Sbostic 	struct s_command **link;
110*55992Sbostic 	register char *p;
111*55992Sbostic {
112*55992Sbostic 	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
113*55992Sbostic 	struct s_command *cmd, *cmd2;
114*55992Sbostic 	struct s_format *fp;
115*55992Sbostic 	int naddr;				/* Number of addresses */
116*55992Sbostic 
117*55992Sbostic 	if (p != NULL)
118*55992Sbostic 		goto semicolon;
119*55992Sbostic 	for (;;) {
120*55992Sbostic 		if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
121*55992Sbostic 			if (terminator != NULL)
122*55992Sbostic 				err(COMPILE, "unexpected EOF (pending }'s)");
123*55992Sbostic 			return (link);
124*55992Sbostic 		}
125*55992Sbostic 
126*55992Sbostic semicolon:	EATSPACE();
127*55992Sbostic 		if (p && (*p == '#' || *p == '\0'))
128*55992Sbostic 			continue;
129*55992Sbostic 		if (*p == '}') {
130*55992Sbostic 			if (terminator == NULL)
131*55992Sbostic 				err(COMPILE, "unexpected }");
132*55992Sbostic 			return (link);
133*55992Sbostic 		}
134*55992Sbostic 		*link = cmd = xmalloc(sizeof(struct s_command));
135*55992Sbostic 		link = &cmd->next;
136*55992Sbostic 		cmd->nonsel = cmd->inrange = 0;
137*55992Sbostic 		/* First parse the addresses */
138*55992Sbostic 		naddr = 0;
139*55992Sbostic 		cmd->a1 = cmd->a2 = NULL;
140*55992Sbostic 
141*55992Sbostic /* Valid characters to start an address */
142*55992Sbostic #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
143*55992Sbostic 		if (addrchar(*p)) {
144*55992Sbostic 			naddr++;
145*55992Sbostic 			cmd->a1 = xmalloc(sizeof(struct s_addr));
146*55992Sbostic 			p = compile_addr(p, cmd->a1);
147*55992Sbostic 			EATSPACE();				/* EXTENSION */
148*55992Sbostic 			if (*p == ',') {
149*55992Sbostic 				naddr++;
150*55992Sbostic 				p++;
151*55992Sbostic 				EATSPACE();			/* EXTENSION */
152*55992Sbostic 				cmd->a2 = xmalloc(sizeof(struct s_addr));
153*55992Sbostic 				p = compile_addr(p, cmd->a2);
154*55992Sbostic 			}
155*55992Sbostic 		}
156*55992Sbostic 
157*55992Sbostic nonsel:		/* Now parse the command */
158*55992Sbostic 		EATSPACE();
159*55992Sbostic 		if (!*p)
160*55992Sbostic 			err(COMPILE, "command expected");
161*55992Sbostic 		cmd->code = *p;
162*55992Sbostic 		for (fp = cmd_fmts; fp->code; fp++)
163*55992Sbostic 			if (fp->code == *p)
164*55992Sbostic 				break;
165*55992Sbostic 		if (!fp->code)
166*55992Sbostic 			err(COMPILE, "invalid command code %c", *p);
167*55992Sbostic 		if (naddr > fp->naddr)
168*55992Sbostic 			err(COMPILE,
169*55992Sbostic "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
170*55992Sbostic 		switch (fp->args) {
171*55992Sbostic 		case NONSEL:			/* ! */
172*55992Sbostic 			cmd->nonsel = ! cmd->nonsel;
173*55992Sbostic 			p++;
174*55992Sbostic 			goto nonsel;
175*55992Sbostic 		case GROUP:			/* { */
176*55992Sbostic 			p++;
177*55992Sbostic 			EATSPACE();
178*55992Sbostic 			if (!*p)
179*55992Sbostic 				p = NULL;
180*55992Sbostic 			cmd2 = xmalloc(sizeof(struct s_command));
181*55992Sbostic 			cmd2->code = '}';
182*55992Sbostic 			*compile_stream("}", &cmd->u.c, p) = cmd2;
183*55992Sbostic 			cmd->next = cmd2;
184*55992Sbostic 			link = &cmd2->next;
185*55992Sbostic 			break;
186*55992Sbostic 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
187*55992Sbostic 			p++;
188*55992Sbostic 			EATSPACE();
189*55992Sbostic 			if (*p == ';') {
190*55992Sbostic 				p++;
191*55992Sbostic 				link = &cmd->next;
192*55992Sbostic 				goto semicolon;
193*55992Sbostic 			}
194*55992Sbostic 			if (*p)
195*55992Sbostic 				err(COMPILE,
196*55992Sbostic "extra characters at the end of %c command", cmd->code);
197*55992Sbostic 			break;
198*55992Sbostic 		case TEXT:			/* a c i */
199*55992Sbostic 			p++;
200*55992Sbostic 			EATSPACE();
201*55992Sbostic 			if (*p != '\\')
202*55992Sbostic 				err(COMPILE,
203*55992Sbostic "command %c expects \\ followed by text", cmd->code);
204*55992Sbostic 			p++;
205*55992Sbostic 			EATSPACE();
206*55992Sbostic 			if (*p)
207*55992Sbostic 				err(COMPILE,
208*55992Sbostic "extra characters after \\ at the end of %c command", cmd->code);
209*55992Sbostic 			cmd->t = compile_text();
210*55992Sbostic 			break;
211*55992Sbostic 		case COMMENT:			/* \0 # */
212*55992Sbostic 			break;
213*55992Sbostic 		case WFILE:			/* w */
214*55992Sbostic 			p++;
215*55992Sbostic 			EATSPACE();
216*55992Sbostic 			if (*p == '\0')
217*55992Sbostic 				err(COMPILE, "filename expected");
218*55992Sbostic 			cmd->t = duptoeol(p);
219*55992Sbostic 			if (aflag)
220*55992Sbostic 				cmd->u.fd = -1;
221*55992Sbostic 			else if ((cmd->u.fd = open(p,
222*55992Sbostic 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
223*55992Sbostic 			    DEFFILEMODE)) == -1)
224*55992Sbostic 				err(FATAL, "%s: %s\n", p, strerror(errno));
225*55992Sbostic 			break;
226*55992Sbostic 		case RFILE:			/* r */
227*55992Sbostic 			p++;
228*55992Sbostic 			EATSPACE();
229*55992Sbostic 			if (*p == '\0')
230*55992Sbostic 				err(COMPILE, "filename expected");
231*55992Sbostic 			else
232*55992Sbostic 				cmd->t = duptoeol(p);
233*55992Sbostic 			break;
234*55992Sbostic 		case BRANCH:			/* b t */
235*55992Sbostic 			p++;
236*55992Sbostic 			EATSPACE();
237*55992Sbostic 			if (*p == '\0')
238*55992Sbostic 				cmd->t = NULL;
239*55992Sbostic 			else
240*55992Sbostic 				cmd->t = duptoeol(p);
241*55992Sbostic 			break;
242*55992Sbostic 		case LABEL:			/* : */
243*55992Sbostic 			p++;
244*55992Sbostic 			EATSPACE();
245*55992Sbostic 			cmd->t = duptoeol(p);
246*55992Sbostic 			if (strlen(p) == 0)
247*55992Sbostic 				err(COMPILE, "empty label");
248*55992Sbostic 			break;
249*55992Sbostic 		case SUBST:			/* s */
250*55992Sbostic 			p++;
251*55992Sbostic 			if (*p == '\0' || *p == '\\')
252*55992Sbostic 				err(COMPILE,
253*55992Sbostic "substitute pattern can not be delimited by newline or backslash");
254*55992Sbostic 			cmd->u.s = xmalloc(sizeof(struct s_subst));
255*55992Sbostic 			p = compile_re(p, &cmd->u.s->re, 0);
256*55992Sbostic 			if (p == NULL)
257*55992Sbostic 				err(COMPILE, "newline in substitution pattern");
258*55992Sbostic 			cmd->u.s->pmatch = xmalloc((cmd->u.s->re.re_nsub + 1) *
259*55992Sbostic 			    sizeof(regmatch_t));
260*55992Sbostic 			p--;
261*55992Sbostic 			p = compile_subst(p,
262*55992Sbostic 			    &cmd->u.s->new, cmd->u.s->re.re_nsub);
263*55992Sbostic 			if (p == NULL)
264*55992Sbostic 				err(COMPILE,
265*55992Sbostic "unterminated substitute replace in regular expression");
266*55992Sbostic 			p = compile_flags(p, cmd->u.s);
267*55992Sbostic 			EATSPACE();
268*55992Sbostic 			if (*p == ';') {
269*55992Sbostic 				p++;
270*55992Sbostic 				link = &cmd->next;
271*55992Sbostic 				goto semicolon;
272*55992Sbostic 			}
273*55992Sbostic 			break;
274*55992Sbostic 		case TR:			/* y */
275*55992Sbostic 			p++;
276*55992Sbostic 			p = compile_tr(p, (char **)&cmd->u.y);
277*55992Sbostic 			EATSPACE();
278*55992Sbostic 			if (*p == ';') {
279*55992Sbostic 				p++;
280*55992Sbostic 				link = &cmd->next;
281*55992Sbostic 				goto semicolon;
282*55992Sbostic 			}
283*55992Sbostic 			if (*p)
284*55992Sbostic 				err(COMPILE,
285*55992Sbostic "extra text at the end of a transform command");
286*55992Sbostic 			break;
287*55992Sbostic 		}
288*55992Sbostic 	}
289*55992Sbostic }
290*55992Sbostic 
291*55992Sbostic /*
292*55992Sbostic  * Get a delimited string.  P points to the delimeter of the string; d points
293*55992Sbostic  * to a buffer area.  Newline and delimiter escapes are processed; other
294*55992Sbostic  * escapes are ignored.
295*55992Sbostic  *
296*55992Sbostic  * Returns a pointer to the first character after the final delimiter or NULL
297*55992Sbostic  * in the case of a non-terminated string.  The character array d is filled
298*55992Sbostic  * with the processed string.
299*55992Sbostic  */
300*55992Sbostic static char *
301*55992Sbostic compile_delimited(p, d)
302*55992Sbostic 	char *p, *d;
303*55992Sbostic {
304*55992Sbostic 	char c;
305*55992Sbostic 
306*55992Sbostic 	c = *p++;
307*55992Sbostic 	if (c == '\0')
308*55992Sbostic 		return (NULL);
309*55992Sbostic 	else if (c == '\\')
310*55992Sbostic 		err(COMPILE, "\\ can not be used as a string delimiter");
311*55992Sbostic 	else if (c == '\n')
312*55992Sbostic 		err(COMPILE, "newline can not be used as a string delimiter");
313*55992Sbostic 	while (*p) {
314*55992Sbostic 		if (*p == '\\' && p[1] == c)
315*55992Sbostic 				p++;
316*55992Sbostic 		else if (*p == '\\' && p[1] == 'n') {
317*55992Sbostic 				*d++ = '\n';
318*55992Sbostic 				p += 2;
319*55992Sbostic 				continue;
320*55992Sbostic 		} else if (*p == c) {
321*55992Sbostic 			*d = '\0';
322*55992Sbostic 			return (p + 1);
323*55992Sbostic 		}
324*55992Sbostic 		*d++ = *p++;
325*55992Sbostic 	}
326*55992Sbostic 	return (NULL);
327*55992Sbostic }
328*55992Sbostic 
329*55992Sbostic /*
330*55992Sbostic  * Get a regular expression.  P points to the delimeter of the regular
331*55992Sbostic  * expression; d points a regexp pointer.  Newline and delimiter escapes
332*55992Sbostic  * are processed; other escapes are ignored.
333*55992Sbostic  * Returns a pointer to the first character after the final delimiter
334*55992Sbostic  * or NULL in the case of a non terminated regular expression.
335*55992Sbostic  * The regexp pointer is set to the compiled regular expression.
336*55992Sbostic  * Cflags are passed to regcomp.
337*55992Sbostic  */
338*55992Sbostic static char *
339*55992Sbostic compile_re(p, rep, cflags)
340*55992Sbostic 	char *p;
341*55992Sbostic 	regex_t *rep;
342*55992Sbostic 	int cflags;
343*55992Sbostic {
344*55992Sbostic 	int eval;
345*55992Sbostic 	char re[_POSIX2_LINE_MAX + 1];
346*55992Sbostic 
347*55992Sbostic 	p = compile_delimited(p, re);
348*55992Sbostic 	if (p && (eval = regcomp(rep, re, cflags)) != 0)
349*55992Sbostic 		err(COMPILE, "RE error: %s", strregerror(eval, rep));
350*55992Sbostic 	return (p);
351*55992Sbostic }
352*55992Sbostic 
353*55992Sbostic /*
354*55992Sbostic  * Compile the substitution string of a regular expression and set res to
355*55992Sbostic  * point to a saved copy of it.  Nsub is the number of parenthesized regular
356*55992Sbostic  * expressions.
357*55992Sbostic  */
358*55992Sbostic static char *
359*55992Sbostic compile_subst(p, res, nsub)
360*55992Sbostic 	char *p, **res;
361*55992Sbostic 	size_t nsub;
362*55992Sbostic {
363*55992Sbostic 	static char lbuf[_POSIX2_LINE_MAX + 1];
364*55992Sbostic 	int asize, size;
365*55992Sbostic 	char c, *text, *op, *s;
366*55992Sbostic 
367*55992Sbostic 	c = *p++;			/* Terminator character */
368*55992Sbostic 	if (c == '\0')
369*55992Sbostic 		return (NULL);
370*55992Sbostic 
371*55992Sbostic 	asize = 2 * _POSIX2_LINE_MAX + 1;
372*55992Sbostic 	text = xmalloc(asize);
373*55992Sbostic 	size = 0;
374*55992Sbostic 	do {
375*55992Sbostic 		op = s = text + size;
376*55992Sbostic 		for (; *p; p++) {
377*55992Sbostic 			if (*p == '\\') {
378*55992Sbostic 				p++;
379*55992Sbostic 				if (strchr("123456789", *p) != NULL) {
380*55992Sbostic 					*s++ = '\\';
381*55992Sbostic 					if (*p - '1' > nsub)
382*55992Sbostic 						err(COMPILE,
383*55992Sbostic "\\%c not defined in regular expression (use \\1-\\%d)", *p, nsub + 1);
384*55992Sbostic 				} else if (*p == '&')
385*55992Sbostic 					*s++ = '\\';
386*55992Sbostic 			} else if (*p == c) {
387*55992Sbostic 				p++;
388*55992Sbostic 				*s++ = '\0';
389*55992Sbostic 				size += s - op;
390*55992Sbostic 				*res = xrealloc(text, size);
391*55992Sbostic 				return (p);
392*55992Sbostic 			} else if (*p == '\n') {
393*55992Sbostic 				err(COMPILE,
394*55992Sbostic "unescaped newline inside substitute pattern");
395*55992Sbostic 				return (NULL);
396*55992Sbostic 			}
397*55992Sbostic 			*s++ = *p;
398*55992Sbostic 		}
399*55992Sbostic 		size += s - op;
400*55992Sbostic 		if (asize - size < _POSIX2_LINE_MAX + 1) {
401*55992Sbostic 			asize *= 2;
402*55992Sbostic 			text = xmalloc(asize);
403*55992Sbostic 		}
404*55992Sbostic 	} while (cu_fgets(p = lbuf, sizeof(lbuf)));
405*55992Sbostic 	err(COMPILE, "EOF in substitute pattern");
406*55992Sbostic 	return (NULL);
407*55992Sbostic }
408*55992Sbostic 
409*55992Sbostic /*
410*55992Sbostic  * Compile the flags of the s command
411*55992Sbostic  */
412*55992Sbostic static char *
413*55992Sbostic compile_flags(p, s)
414*55992Sbostic 	char *p;
415*55992Sbostic 	struct s_subst *s;
416*55992Sbostic {
417*55992Sbostic 	int gn;			/* True if we have seen g or n */
418*55992Sbostic 	char wfile[_POSIX2_LINE_MAX + 1], *q;
419*55992Sbostic 
420*55992Sbostic 	s->n = 1;				/* Default */
421*55992Sbostic 	s->p = 0;
422*55992Sbostic 	s->wfile = NULL;
423*55992Sbostic 	s->wfd = -1;
424*55992Sbostic 	for (gn = 0;;) {
425*55992Sbostic 		EATSPACE();			/* EXTENSION */
426*55992Sbostic 		switch (*p) {
427*55992Sbostic 		case 'g':
428*55992Sbostic 			if (gn)
429*55992Sbostic 				err(WARNING,
430*55992Sbostic 				    "both g and number in substitute flags");
431*55992Sbostic 			gn = 1;
432*55992Sbostic 			s->n = 0;
433*55992Sbostic 			break;
434*55992Sbostic 		case '\0':
435*55992Sbostic 		case '\n':
436*55992Sbostic 		case ';':
437*55992Sbostic 			return (p);
438*55992Sbostic 		case 'p':
439*55992Sbostic 			s->p = 1;
440*55992Sbostic 			break;
441*55992Sbostic 		case '1': case '2': case '3':
442*55992Sbostic 		case '4': case '5': case '6':
443*55992Sbostic 		case '7': case '8': case '9':
444*55992Sbostic 			if (gn)
445*55992Sbostic 				err(WARNING,
446*55992Sbostic 				    "both g and number in substitute flags");
447*55992Sbostic 			gn = 1;
448*55992Sbostic 			/* XXX Check for overflow */
449*55992Sbostic 			s->n = (int)strtol(p, &p, 10);
450*55992Sbostic 			break;
451*55992Sbostic 		case 'w':
452*55992Sbostic 			p++;
453*55992Sbostic #ifdef HISTORIC_PRACTICE
454*55992Sbostic 			if (*p != ' ') {
455*55992Sbostic 				err(WARNING, "space missing before w wfile");
456*55992Sbostic 				return (p);
457*55992Sbostic 			}
458*55992Sbostic #endif
459*55992Sbostic 			EATSPACE();
460*55992Sbostic 			q = wfile;
461*55992Sbostic 			while (*p) {
462*55992Sbostic 				if (*p == '\n')
463*55992Sbostic 					break;
464*55992Sbostic 				*q++ = *p++;
465*55992Sbostic 			}
466*55992Sbostic 			*q = '\0';
467*55992Sbostic 			if (q == wfile)
468*55992Sbostic 				err(COMPILE, "empty wfile specified");
469*55992Sbostic 			s->wfile = strdup(wfile);
470*55992Sbostic 			if (!aflag && (s->wfd = open(wfile,
471*55992Sbostic 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
472*55992Sbostic 			    DEFFILEMODE)) == -1)
473*55992Sbostic 				err(FATAL, "%s: %s\n", wfile, strerror(errno));
474*55992Sbostic 			return (p);
475*55992Sbostic 		default:
476*55992Sbostic 			err(COMPILE,
477*55992Sbostic 			    "bad flag in substitute command: '%c'", p[-1]);
478*55992Sbostic 			break;
479*55992Sbostic 		}
480*55992Sbostic 		p++;
481*55992Sbostic 	}
482*55992Sbostic }
483*55992Sbostic 
484*55992Sbostic /*
485*55992Sbostic  * Compile a translation set of strings into a lookup table.
486*55992Sbostic  */
487*55992Sbostic static char *
488*55992Sbostic compile_tr(p, transtab)
489*55992Sbostic 	char *p;
490*55992Sbostic 	char **transtab;
491*55992Sbostic {
492*55992Sbostic 	int i;
493*55992Sbostic 	char *lt, *op, *np;
494*55992Sbostic 	char old[_POSIX2_LINE_MAX + 1];
495*55992Sbostic 	char new[_POSIX2_LINE_MAX + 1];
496*55992Sbostic 
497*55992Sbostic 	if (*p == '\0' || *p == '\\')
498*55992Sbostic 		err(COMPILE,
499*55992Sbostic "transform pattern can not be delimited by newline or backslash");
500*55992Sbostic 	p = compile_delimited(p, old);
501*55992Sbostic 	if (p == NULL) {
502*55992Sbostic 		err(COMPILE, "unterminated transform source string");
503*55992Sbostic 		return (NULL);
504*55992Sbostic 	}
505*55992Sbostic 	p = compile_delimited(--p, new);
506*55992Sbostic 	if (p == NULL) {
507*55992Sbostic 		err(COMPILE, "unterminated transform target string");
508*55992Sbostic 		return (NULL);
509*55992Sbostic 	}
510*55992Sbostic 	EATSPACE();
511*55992Sbostic 	if (strlen(new) != strlen(old)) {
512*55992Sbostic 		err(COMPILE, "transform strings are not the same length");
513*55992Sbostic 		return (NULL);
514*55992Sbostic 	}
515*55992Sbostic 	/* We assume characters are 8 bits */
516*55992Sbostic 	lt = xmalloc(UCHAR_MAX);
517*55992Sbostic 	for (i = 0; i <= UCHAR_MAX; i++)
518*55992Sbostic 		lt[i] = (char)i;
519*55992Sbostic 	for (op = old, np = new; *op; op++, np++)
520*55992Sbostic 		lt[(u_char)*op] = *np;
521*55992Sbostic 	*transtab = lt;
522*55992Sbostic 	return (p);
523*55992Sbostic }
524*55992Sbostic 
525*55992Sbostic /*
526*55992Sbostic  * Compile the text following an a or i command.
527*55992Sbostic  */
528*55992Sbostic static char *
529*55992Sbostic compile_text()
530*55992Sbostic {
531*55992Sbostic 	int asize, size;
532*55992Sbostic 	char *text, *p, *op, *s;
533*55992Sbostic 	char lbuf[_POSIX2_LINE_MAX + 1];
534*55992Sbostic 
535*55992Sbostic 	asize = 2 * _POSIX2_LINE_MAX + 1;
536*55992Sbostic 	text = xmalloc(asize);
537*55992Sbostic 	size = 0;
538*55992Sbostic 	while (cu_fgets(lbuf, sizeof(lbuf))) {
539*55992Sbostic 		op = s = text + size;
540*55992Sbostic 		p = lbuf;
541*55992Sbostic 		EATSPACE();
542*55992Sbostic 		for (; *p; p++) {
543*55992Sbostic 			if (*p == '\\')
544*55992Sbostic 				p++;
545*55992Sbostic 			*s++ = *p;
546*55992Sbostic 		}
547*55992Sbostic 		size += s - op;
548*55992Sbostic 		if (p[-2] != '\\') {
549*55992Sbostic 			*s = '\0';
550*55992Sbostic 			break;
551*55992Sbostic 		}
552*55992Sbostic 		if (asize - size < _POSIX2_LINE_MAX + 1) {
553*55992Sbostic 			asize *= 2;
554*55992Sbostic 			text = xmalloc(asize);
555*55992Sbostic 		}
556*55992Sbostic 	}
557*55992Sbostic 	return (xrealloc(text, size + 1));
558*55992Sbostic }
559*55992Sbostic 
560*55992Sbostic /*
561*55992Sbostic  * Get an address and return a pointer to the first character after
562*55992Sbostic  * it.  Fill the structure pointed to according to the address.
563*55992Sbostic  */
564*55992Sbostic static char *
565*55992Sbostic compile_addr(p, a)
566*55992Sbostic 	char *p;
567*55992Sbostic 	struct s_addr *a;
568*55992Sbostic {
569*55992Sbostic 	regex_t *re;
570*55992Sbostic 	char *end;
571*55992Sbostic 
572*55992Sbostic 	switch (*p) {
573*55992Sbostic 	case '\\':				/* Context address */
574*55992Sbostic 		re = xmalloc(sizeof(regex_t));
575*55992Sbostic 		a->u.r = re;
576*55992Sbostic 		p = compile_re(p + 1, re, REG_NOSUB);
577*55992Sbostic 		if (p == NULL)
578*55992Sbostic 			err(COMPILE, "unterminated regular expression");
579*55992Sbostic 		a->type = AT_RE;
580*55992Sbostic 		return (p);
581*55992Sbostic 	case '/':				/* Context address */
582*55992Sbostic 		re = xmalloc(sizeof(regex_t));
583*55992Sbostic 		a->u.r = re;
584*55992Sbostic 		p = compile_re(p, a->u.r, REG_NOSUB);
585*55992Sbostic 		if (p == NULL)
586*55992Sbostic 			err(COMPILE, "unterminated regular expression");
587*55992Sbostic 		a->type = AT_RE;
588*55992Sbostic 		return (p);
589*55992Sbostic 	case '$':				/* Last line */
590*55992Sbostic 		a->type = AT_LAST;
591*55992Sbostic 		return (p + 1);
592*55992Sbostic 						/* Line number */
593*55992Sbostic 	case '0': case '1': case '2': case '3': case '4':
594*55992Sbostic 	case '5': case '6': case '7': case '8': case '9':
595*55992Sbostic 		a->type = AT_LINE;
596*55992Sbostic 		a->u.l = strtol(p, &end, 10);
597*55992Sbostic 		return (end);
598*55992Sbostic 	default:
599*55992Sbostic 		err(COMPILE, "expected context address");
600*55992Sbostic 		return (NULL);
601*55992Sbostic 	}
602*55992Sbostic }
603*55992Sbostic 
604*55992Sbostic /*
605*55992Sbostic  * Return a copy of all the characters up to \n or \0
606*55992Sbostic  */
607*55992Sbostic static char *
608*55992Sbostic duptoeol(s)
609*55992Sbostic 	register char *s;
610*55992Sbostic {
611*55992Sbostic 	size_t len;
612*55992Sbostic 	char *start;
613*55992Sbostic 
614*55992Sbostic 	for (start = s; *s != '\0' && *s != '\n'; ++s);
615*55992Sbostic 	*s = '\0';
616*55992Sbostic 	len = s - start + 1;
617*55992Sbostic 	return (memmove(xmalloc(len), start, len));
618*55992Sbostic }
619*55992Sbostic 
620*55992Sbostic /*
621*55992Sbostic  * Find the label contained in the command l in the command linked list cp.
622*55992Sbostic  * L is excluded from the search.  Return NULL if not found.
623*55992Sbostic  */
624*55992Sbostic static struct s_command *
625*55992Sbostic findlabel(l, cp)
626*55992Sbostic 	struct s_command *l, *cp;
627*55992Sbostic {
628*55992Sbostic 	struct s_command *r;
629*55992Sbostic 
630*55992Sbostic 	for (; cp; cp = cp->next)
631*55992Sbostic 		if (cp->code == ':' && cp != l && strcmp(l->t, cp->t) == 0)
632*55992Sbostic 			return (cp);
633*55992Sbostic 		else if (cp->code == '{' && (r = findlabel(l, cp->u.c)))
634*55992Sbostic 			return (r);
635*55992Sbostic 	return (NULL);
636*55992Sbostic }
637*55992Sbostic 
638*55992Sbostic /*
639*55992Sbostic  * Convert goto label names to addresses.
640*55992Sbostic  * Detect duplicate labels.
641*55992Sbostic  * Set appendnum to the number of a and r commands in the script.
642*55992Sbostic  * Free the memory used by labels in b and t commands (but not by :)
643*55992Sbostic  * Root is a pointer to the script linked list; cp points to the
644*55992Sbostic  * search start.
645*55992Sbostic  * TODO: Remove } nodes
646*55992Sbostic  */
647*55992Sbostic static void
648*55992Sbostic fixuplabel(root, cp)
649*55992Sbostic 	struct s_command *root, *cp;
650*55992Sbostic {
651*55992Sbostic 	struct s_command *cp2;
652*55992Sbostic 
653*55992Sbostic 	for (; cp; cp = cp->next)
654*55992Sbostic 		switch (cp->code) {
655*55992Sbostic 		case 'a':
656*55992Sbostic 		case 'r':
657*55992Sbostic 			appendnum++;
658*55992Sbostic 			break;
659*55992Sbostic 		case 'b':
660*55992Sbostic 		case 't':
661*55992Sbostic 			if (cp->t == NULL) {
662*55992Sbostic 				cp->u.c = NULL;
663*55992Sbostic 				break;
664*55992Sbostic 			}
665*55992Sbostic 			if ((cp2 = findlabel(cp, root)) == NULL)
666*55992Sbostic 				err(COMPILE2, "unspecified label %s", cp->t);
667*55992Sbostic 			free(cp->t);
668*55992Sbostic 			cp->u.c = cp2;
669*55992Sbostic 			break;
670*55992Sbostic 		case '{':
671*55992Sbostic 			fixuplabel(root, cp->u.c);
672*55992Sbostic 			break;
673*55992Sbostic 		case ':':
674*55992Sbostic 			if (findlabel(cp, root))
675*55992Sbostic 				err(COMPILE2, "duplicate label %s", cp->t);
676*55992Sbostic 			break;
677*55992Sbostic 		}
678*55992Sbostic }
679