xref: /minix3/usr.bin/sed/compile.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: compile.c,v 1.46 2015/03/12 12:40:41 christos Exp $	*/
2f789fee2SBen Gras 
3f789fee2SBen Gras /*-
4*0a6a1f1dSLionel Sambuc  * Copyright (c) 1992 Diomidis Spinellis.
5f789fee2SBen Gras  * Copyright (c) 1992, 1993
6f789fee2SBen Gras  *	The Regents of the University of California.  All rights reserved.
7f789fee2SBen Gras  *
8f789fee2SBen Gras  * This code is derived from software contributed to Berkeley by
9f789fee2SBen Gras  * Diomidis Spinellis of Imperial College, University of London.
10f789fee2SBen Gras  *
11f789fee2SBen Gras  * Redistribution and use in source and binary forms, with or without
12f789fee2SBen Gras  * modification, are permitted provided that the following conditions
13f789fee2SBen Gras  * are met:
14f789fee2SBen Gras  * 1. Redistributions of source code must retain the above copyright
15f789fee2SBen Gras  *    notice, this list of conditions and the following disclaimer.
16f789fee2SBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
17f789fee2SBen Gras  *    notice, this list of conditions and the following disclaimer in the
18f789fee2SBen Gras  *    documentation and/or other materials provided with the distribution.
19f789fee2SBen Gras  * 3. Neither the name of the University nor the names of its contributors
20f789fee2SBen Gras  *    may be used to endorse or promote products derived from this software
21f789fee2SBen Gras  *    without specific prior written permission.
22f789fee2SBen Gras  *
23f789fee2SBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24f789fee2SBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25f789fee2SBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26f789fee2SBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27f789fee2SBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28f789fee2SBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29f789fee2SBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30f789fee2SBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31f789fee2SBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32f789fee2SBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33f789fee2SBen Gras  * SUCH DAMAGE.
34f789fee2SBen Gras  */
35f789fee2SBen Gras 
36f789fee2SBen Gras #if HAVE_NBTOOL_CONFIG_H
37f789fee2SBen Gras #include "nbtool_config.h"
38f789fee2SBen Gras #endif
39f789fee2SBen Gras 
40f789fee2SBen Gras #include <sys/cdefs.h>
41*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: compile.c,v 1.46 2015/03/12 12:40:41 christos Exp $");
42*0a6a1f1dSLionel Sambuc #ifdef __FBSDID
43*0a6a1f1dSLionel Sambuc __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
44f789fee2SBen Gras #endif
45*0a6a1f1dSLionel Sambuc 
46*0a6a1f1dSLionel Sambuc #if 0
47*0a6a1f1dSLionel Sambuc static const char sccsid[] = "@(#)compile.c	8.1 (Berkeley) 6/6/93";
48*0a6a1f1dSLionel Sambuc #endif
49f789fee2SBen Gras 
50f789fee2SBen Gras #include <sys/types.h>
51f789fee2SBen Gras #include <sys/stat.h>
52f789fee2SBen Gras 
53f789fee2SBen Gras #include <ctype.h>
54*0a6a1f1dSLionel Sambuc #include <err.h>
55f789fee2SBen Gras #include <errno.h>
56f789fee2SBen Gras #include <fcntl.h>
57f789fee2SBen Gras #include <limits.h>
58f789fee2SBen Gras #include <regex.h>
59f789fee2SBen Gras #include <stdio.h>
60f789fee2SBen Gras #include <stdlib.h>
61f789fee2SBen Gras #include <string.h>
62*0a6a1f1dSLionel Sambuc #include <wchar.h>
63f789fee2SBen Gras 
64f789fee2SBen Gras #include "defs.h"
65f789fee2SBen Gras #include "extern.h"
66f789fee2SBen Gras 
67f789fee2SBen Gras #define LHSZ	128
68f789fee2SBen Gras #define	LHMASK	(LHSZ - 1)
69f789fee2SBen Gras static struct labhash {
70f789fee2SBen Gras 	struct	labhash *lh_next;
71f789fee2SBen Gras 	u_int	lh_hash;
72f789fee2SBen Gras 	struct	s_command *lh_cmd;
73f789fee2SBen Gras 	int	lh_ref;
74f789fee2SBen Gras } *labels[LHSZ];
75f789fee2SBen Gras 
76f789fee2SBen Gras static char	 *compile_addr(char *, struct s_addr *);
77f789fee2SBen Gras static char	 *compile_ccl(char **, char *);
78*0a6a1f1dSLionel Sambuc static char	 *compile_delimited(char *, char *, int);
79f789fee2SBen Gras static char	 *compile_flags(char *, struct s_subst *);
80*0a6a1f1dSLionel Sambuc static regex_t	 *compile_re(char *, int);
81f789fee2SBen Gras static char	 *compile_subst(char *, struct s_subst *);
82f789fee2SBen Gras static char	 *compile_text(void);
83*0a6a1f1dSLionel Sambuc static char	 *compile_tr(char *, struct s_tr **);
84f789fee2SBen Gras static struct s_command
85f789fee2SBen Gras 		**compile_stream(struct s_command **);
86f789fee2SBen Gras static char	 *duptoeol(char *, const char *);
87f789fee2SBen Gras static void	  enterlabel(struct s_command *);
88f789fee2SBen Gras static struct s_command
89f789fee2SBen Gras 		 *findlabel(char *);
90f789fee2SBen Gras static void	  fixuplabel(struct s_command *, struct s_command *);
91f789fee2SBen Gras static void	  uselabel(void);
92f789fee2SBen Gras 
93f789fee2SBen Gras /*
94f789fee2SBen Gras  * Command specification.  This is used to drive the command parser.
95f789fee2SBen Gras  */
96f789fee2SBen Gras struct s_format {
97f789fee2SBen Gras 	char code;				/* Command code */
98f789fee2SBen Gras 	int naddr;				/* Number of address args */
99f789fee2SBen Gras 	enum e_args args;			/* Argument type */
100f789fee2SBen Gras };
101f789fee2SBen Gras 
102f789fee2SBen Gras static struct s_format cmd_fmts[] = {
103f789fee2SBen Gras 	{'{', 2, GROUP},
104f789fee2SBen Gras 	{'}', 0, ENDGROUP},
105f789fee2SBen Gras 	{'a', 1, TEXT},
106f789fee2SBen Gras 	{'b', 2, BRANCH},
107f789fee2SBen Gras 	{'c', 2, TEXT},
108f789fee2SBen Gras 	{'d', 2, EMPTY},
109f789fee2SBen Gras 	{'D', 2, EMPTY},
110f789fee2SBen Gras 	{'g', 2, EMPTY},
111f789fee2SBen Gras 	{'G', 2, EMPTY},
112f789fee2SBen Gras 	{'h', 2, EMPTY},
113f789fee2SBen Gras 	{'H', 2, EMPTY},
114f789fee2SBen Gras 	{'i', 1, TEXT},
115f789fee2SBen Gras 	{'l', 2, EMPTY},
116f789fee2SBen Gras 	{'n', 2, EMPTY},
117f789fee2SBen Gras 	{'N', 2, EMPTY},
118f789fee2SBen Gras 	{'p', 2, EMPTY},
119f789fee2SBen Gras 	{'P', 2, EMPTY},
120f789fee2SBen Gras 	{'q', 1, EMPTY},
121f789fee2SBen Gras 	{'r', 1, RFILE},
122f789fee2SBen Gras 	{'s', 2, SUBST},
123f789fee2SBen Gras 	{'t', 2, BRANCH},
124f789fee2SBen Gras 	{'w', 2, WFILE},
125f789fee2SBen Gras 	{'x', 2, EMPTY},
126f789fee2SBen Gras 	{'y', 2, TR},
127f789fee2SBen Gras 	{'!', 2, NONSEL},
128f789fee2SBen Gras 	{':', 0, LABEL},
129f789fee2SBen Gras 	{'#', 0, COMMENT},
130f789fee2SBen Gras 	{'=', 1, EMPTY},
131f789fee2SBen Gras 	{'\0', 0, COMMENT},
132f789fee2SBen Gras };
133f789fee2SBen Gras 
134f789fee2SBen Gras /* The compiled program. */
135f789fee2SBen Gras struct s_command *prog;
136f789fee2SBen Gras 
137f789fee2SBen Gras /*
138f789fee2SBen Gras  * Compile the program into prog.
139f789fee2SBen Gras  * Initialise appends.
140f789fee2SBen Gras  */
141f789fee2SBen Gras void
compile(void)142f789fee2SBen Gras compile(void)
143f789fee2SBen Gras {
144f789fee2SBen Gras 	*compile_stream(&prog) = NULL;
145f789fee2SBen Gras 	fixuplabel(prog, NULL);
146f789fee2SBen Gras 	uselabel();
147f789fee2SBen Gras 	if (appendnum > 0)
148f789fee2SBen Gras 		appends = xmalloc(sizeof(struct s_appends) * appendnum);
149f789fee2SBen Gras 	match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
150f789fee2SBen Gras }
151f789fee2SBen Gras 
152*0a6a1f1dSLionel Sambuc #define EATSPACE() do {							\
153*0a6a1f1dSLionel Sambuc 	if (p)								\
154*0a6a1f1dSLionel Sambuc 		while (*p && isspace((unsigned char)*p))                \
155*0a6a1f1dSLionel Sambuc 			p++;						\
156*0a6a1f1dSLionel Sambuc 	} while (0)
157f789fee2SBen Gras 
158f789fee2SBen Gras static struct s_command **
compile_stream(struct s_command ** link)159f789fee2SBen Gras compile_stream(struct s_command **link)
160f789fee2SBen Gras {
161f789fee2SBen Gras 	char *p;
162*0a6a1f1dSLionel Sambuc 	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
163f789fee2SBen Gras 	struct s_command *cmd, *cmd2, *stack;
164f789fee2SBen Gras 	struct s_format *fp;
165*0a6a1f1dSLionel Sambuc 	char re[_POSIX2_LINE_MAX + 1];
166f789fee2SBen Gras 	int naddr;				/* Number of addresses */
167f789fee2SBen Gras 
168f789fee2SBen Gras 	stack = 0;
169f789fee2SBen Gras 	for (;;) {
170*0a6a1f1dSLionel Sambuc 		if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
171f789fee2SBen Gras 			if (stack != 0)
172*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: unexpected EOF (pending }'s)",
173*0a6a1f1dSLionel Sambuc 							linenum, fname);
174f789fee2SBen Gras 			return (link);
175f789fee2SBen Gras 		}
176f789fee2SBen Gras 
177f789fee2SBen Gras semicolon:	EATSPACE();
178*0a6a1f1dSLionel Sambuc 		if (p) {
179f789fee2SBen Gras 			if (*p == '#' || *p == '\0')
180f789fee2SBen Gras 				continue;
181f789fee2SBen Gras 			else if (*p == ';') {
182f789fee2SBen Gras 				p++;
183f789fee2SBen Gras 				goto semicolon;
184f789fee2SBen Gras 			}
185*0a6a1f1dSLionel Sambuc 		}
186f789fee2SBen Gras 		*link = cmd = xmalloc(sizeof(struct s_command));
187f789fee2SBen Gras 		link = &cmd->next;
188*0a6a1f1dSLionel Sambuc 		cmd->startline = cmd->nonsel = 0;
189f789fee2SBen Gras 		/* First parse the addresses */
190f789fee2SBen Gras 		naddr = 0;
191f789fee2SBen Gras 
192f789fee2SBen Gras /* Valid characters to start an address */
193f789fee2SBen Gras #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
194f789fee2SBen Gras 		if (addrchar(*p)) {
195f789fee2SBen Gras 			naddr++;
196f789fee2SBen Gras 			cmd->a1 = xmalloc(sizeof(struct s_addr));
197f789fee2SBen Gras 			p = compile_addr(p, cmd->a1);
198f789fee2SBen Gras 			EATSPACE();				/* EXTENSION */
199f789fee2SBen Gras 			if (*p == ',') {
200f789fee2SBen Gras 				p++;
201f789fee2SBen Gras 				EATSPACE();			/* EXTENSION */
202f789fee2SBen Gras 				naddr++;
203f789fee2SBen Gras 				cmd->a2 = xmalloc(sizeof(struct s_addr));
204f789fee2SBen Gras 				p = compile_addr(p, cmd->a2);
205f789fee2SBen Gras 				EATSPACE();
206f789fee2SBen Gras 			} else
207f789fee2SBen Gras 				cmd->a2 = 0;
208f789fee2SBen Gras 		} else
209f789fee2SBen Gras 			cmd->a1 = cmd->a2 = 0;
210f789fee2SBen Gras 
211f789fee2SBen Gras nonsel:		/* Now parse the command */
212f789fee2SBen Gras 		if (!*p)
213*0a6a1f1dSLionel Sambuc 			errx(1, "%lu: %s: command expected", linenum, fname);
214f789fee2SBen Gras 		cmd->code = *p;
215f789fee2SBen Gras 		for (fp = cmd_fmts; fp->code; fp++)
216f789fee2SBen Gras 			if (fp->code == *p)
217f789fee2SBen Gras 				break;
218f789fee2SBen Gras 		if (!fp->code)
219*0a6a1f1dSLionel Sambuc 			errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
220f789fee2SBen Gras 		if (naddr > fp->naddr)
221*0a6a1f1dSLionel Sambuc 			errx(1,
222*0a6a1f1dSLionel Sambuc 				"%lu: %s: command %c expects up to %d address(es), found %d",
223*0a6a1f1dSLionel Sambuc 				linenum, fname, *p, fp->naddr, naddr);
224f789fee2SBen Gras 		switch (fp->args) {
225f789fee2SBen Gras 		case NONSEL:			/* ! */
226f789fee2SBen Gras 			p++;
227f789fee2SBen Gras 			EATSPACE();
228f789fee2SBen Gras 			cmd->nonsel = ! cmd->nonsel;
229f789fee2SBen Gras 			goto nonsel;
230f789fee2SBen Gras 		case GROUP:			/* { */
231f789fee2SBen Gras 			p++;
232f789fee2SBen Gras 			EATSPACE();
233f789fee2SBen Gras 			cmd->next = stack;
234f789fee2SBen Gras 			stack = cmd;
235f789fee2SBen Gras 			link = &cmd->u.c;
236f789fee2SBen Gras 			if (*p)
237f789fee2SBen Gras 				goto semicolon;
238f789fee2SBen Gras 			break;
239f789fee2SBen Gras 		case ENDGROUP:
240f789fee2SBen Gras 			/*
241f789fee2SBen Gras 			 * Short-circuit command processing, since end of
242f789fee2SBen Gras 			 * group is really just a noop.
243f789fee2SBen Gras 			 */
244f789fee2SBen Gras 			cmd->nonsel = 1;
245f789fee2SBen Gras 			if (stack == 0)
246*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: unexpected }", linenum, fname);
247f789fee2SBen Gras 			cmd2 = stack;
248f789fee2SBen Gras 			stack = cmd2->next;
249f789fee2SBen Gras 			cmd2->next = cmd;
250f789fee2SBen Gras 			/*FALLTHROUGH*/
251f789fee2SBen Gras 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
252f789fee2SBen Gras 			p++;
253f789fee2SBen Gras 			EATSPACE();
254*0a6a1f1dSLionel Sambuc 			switch (*p) {
255*0a6a1f1dSLionel Sambuc 			case ';':
256f789fee2SBen Gras 				p++;
257f789fee2SBen Gras 				link = &cmd->next;
258f789fee2SBen Gras 				goto semicolon;
259*0a6a1f1dSLionel Sambuc 			case '}':
260*0a6a1f1dSLionel Sambuc 				goto semicolon;
261*0a6a1f1dSLionel Sambuc 			case '\0':
262*0a6a1f1dSLionel Sambuc 				break;
263*0a6a1f1dSLionel Sambuc 			default:
264*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: extra characters at the end of %c command",
265*0a6a1f1dSLionel Sambuc 						linenum, fname, cmd->code);
266f789fee2SBen Gras 			}
267f789fee2SBen Gras 			break;
268f789fee2SBen Gras 		case TEXT:			/* a c i */
269f789fee2SBen Gras 			p++;
270f789fee2SBen Gras 			EATSPACE();
271f789fee2SBen Gras 			if (*p != '\\')
272*0a6a1f1dSLionel Sambuc 				errx(1,
273*0a6a1f1dSLionel Sambuc "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
274f789fee2SBen Gras 			p++;
275f789fee2SBen Gras 			EATSPACE();
276f789fee2SBen Gras 			if (*p)
277*0a6a1f1dSLionel Sambuc 				errx(1,
278*0a6a1f1dSLionel Sambuc 				"%lu: %s: extra characters after \\ at the end of %c command",
279*0a6a1f1dSLionel Sambuc 				linenum, fname, cmd->code);
280f789fee2SBen Gras 			cmd->t = compile_text();
281f789fee2SBen Gras 			break;
282f789fee2SBen Gras 		case COMMENT:			/* \0 # */
283f789fee2SBen Gras 			break;
284f789fee2SBen Gras 		case WFILE:			/* w */
285f789fee2SBen Gras 			p++;
286f789fee2SBen Gras 			EATSPACE();
287f789fee2SBen Gras 			if (*p == '\0')
288*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: filename expected", linenum, fname);
289f789fee2SBen Gras 			cmd->t = duptoeol(p, "w command");
290f789fee2SBen Gras 			if (aflag)
291f789fee2SBen Gras 				cmd->u.fd = -1;
292f789fee2SBen Gras 			else if ((cmd->u.fd = open(p,
293f789fee2SBen Gras 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
294f789fee2SBen Gras 			    DEFFILEMODE)) == -1)
295*0a6a1f1dSLionel Sambuc 				err(1, "%s", p);
296f789fee2SBen Gras 			break;
297f789fee2SBen Gras 		case RFILE:			/* r */
298f789fee2SBen Gras 			p++;
299f789fee2SBen Gras 			EATSPACE();
300f789fee2SBen Gras 			if (*p == '\0')
301*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: filename expected", linenum, fname);
302f789fee2SBen Gras 			else
303f789fee2SBen Gras 				cmd->t = duptoeol(p, "read command");
304f789fee2SBen Gras 			break;
305f789fee2SBen Gras 		case BRANCH:			/* b t */
306f789fee2SBen Gras 			p++;
307f789fee2SBen Gras 			EATSPACE();
308f789fee2SBen Gras 			if (*p == '\0')
309f789fee2SBen Gras 				cmd->t = NULL;
310f789fee2SBen Gras 			else
311f789fee2SBen Gras 				cmd->t = duptoeol(p, "branch");
312f789fee2SBen Gras 			break;
313f789fee2SBen Gras 		case LABEL:			/* : */
314f789fee2SBen Gras 			p++;
315f789fee2SBen Gras 			EATSPACE();
316f789fee2SBen Gras 			cmd->t = duptoeol(p, "label");
317f789fee2SBen Gras 			if (strlen(p) == 0)
318*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: empty label", linenum, fname);
319f789fee2SBen Gras 			enterlabel(cmd);
320f789fee2SBen Gras 			break;
321f789fee2SBen Gras 		case SUBST:			/* s */
322f789fee2SBen Gras 			p++;
323f789fee2SBen Gras 			if (*p == '\0' || *p == '\\')
324*0a6a1f1dSLionel Sambuc 				errx(1,
325*0a6a1f1dSLionel Sambuc "%lu: %s: substitute pattern can not be delimited by newline or backslash",
326*0a6a1f1dSLionel Sambuc 					linenum, fname);
327*0a6a1f1dSLionel Sambuc 			cmd->u.s = xcalloc(1, sizeof(struct s_subst));
328*0a6a1f1dSLionel Sambuc 			p = compile_delimited(p, re, 0);
329f789fee2SBen Gras 			if (p == NULL)
330*0a6a1f1dSLionel Sambuc 				errx(1,
331*0a6a1f1dSLionel Sambuc 				"%lu: %s: unterminated substitute pattern", linenum, fname);
332*0a6a1f1dSLionel Sambuc 
333*0a6a1f1dSLionel Sambuc 			/* Compile RE with no case sensitivity temporarily */
334*0a6a1f1dSLionel Sambuc 			if (*re == '\0')
335*0a6a1f1dSLionel Sambuc 				cmd->u.s->re = NULL;
336*0a6a1f1dSLionel Sambuc 			else
337*0a6a1f1dSLionel Sambuc 				cmd->u.s->re = compile_re(re, 0);
338f789fee2SBen Gras 			--p;
339f789fee2SBen Gras 			p = compile_subst(p, cmd->u.s);
340f789fee2SBen Gras 			p = compile_flags(p, cmd->u.s);
341*0a6a1f1dSLionel Sambuc 
342*0a6a1f1dSLionel Sambuc 			/* Recompile RE with case sensitivity from "I" flag if any */
343*0a6a1f1dSLionel Sambuc 			if (*re == '\0')
344*0a6a1f1dSLionel Sambuc 				cmd->u.s->re = NULL;
345*0a6a1f1dSLionel Sambuc 			else
346*0a6a1f1dSLionel Sambuc 				cmd->u.s->re = compile_re(re, cmd->u.s->icase);
347f789fee2SBen Gras 			EATSPACE();
348f789fee2SBen Gras 			if (*p == ';') {
349f789fee2SBen Gras 				p++;
350f789fee2SBen Gras 				link = &cmd->next;
351f789fee2SBen Gras 				goto semicolon;
352f789fee2SBen Gras 			}
353f789fee2SBen Gras 			break;
354f789fee2SBen Gras 		case TR:			/* y */
355f789fee2SBen Gras 			p++;
356*0a6a1f1dSLionel Sambuc 			p = compile_tr(p, &cmd->u.y);
357f789fee2SBen Gras 			EATSPACE();
358*0a6a1f1dSLionel Sambuc 			switch (*p) {
359*0a6a1f1dSLionel Sambuc 			case ';':
360f789fee2SBen Gras 				p++;
361f789fee2SBen Gras 				link = &cmd->next;
362f789fee2SBen Gras 				goto semicolon;
363*0a6a1f1dSLionel Sambuc 			case '}':
364*0a6a1f1dSLionel Sambuc 				goto semicolon;
365*0a6a1f1dSLionel Sambuc 			case '\0':
366*0a6a1f1dSLionel Sambuc 				break;
367*0a6a1f1dSLionel Sambuc 			default:
368*0a6a1f1dSLionel Sambuc 				errx(1,
369*0a6a1f1dSLionel Sambuc "%lu: %s: extra text at the end of a transform command", linenum, fname);
370f789fee2SBen Gras 			}
371f789fee2SBen Gras 			if (*p)
372f789fee2SBen Gras 			break;
373f789fee2SBen Gras 		}
374f789fee2SBen Gras 	}
375f789fee2SBen Gras }
376f789fee2SBen Gras 
377f789fee2SBen Gras /*
378*0a6a1f1dSLionel Sambuc  * Get a delimited string.  P points to the delimeter of the string; d points
379f789fee2SBen Gras  * to a buffer area.  Newline and delimiter escapes are processed; other
380f789fee2SBen Gras  * escapes are ignored.
381f789fee2SBen Gras  *
382f789fee2SBen Gras  * Returns a pointer to the first character after the final delimiter or NULL
383f789fee2SBen Gras  * in the case of a non-terminated string.  The character array d is filled
384f789fee2SBen Gras  * with the processed string.
385f789fee2SBen Gras  */
386f789fee2SBen Gras static char *
compile_delimited(char * p,char * d,int is_tr)387*0a6a1f1dSLionel Sambuc compile_delimited(char *p, char *d, int is_tr)
388f789fee2SBen Gras {
389f789fee2SBen Gras 	char c;
390f789fee2SBen Gras 
391f789fee2SBen Gras 	c = *p++;
392f789fee2SBen Gras 	if (c == '\0')
393f789fee2SBen Gras 		return (NULL);
394f789fee2SBen Gras 	else if (c == '\\')
395*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: \\ can not be used as a string delimiter",
396*0a6a1f1dSLionel Sambuc 				linenum, fname);
397f789fee2SBen Gras 	else if (c == '\n')
398*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: newline can not be used as a string delimiter",
399*0a6a1f1dSLionel Sambuc 				linenum, fname);
400f789fee2SBen Gras 	while (*p) {
401*0a6a1f1dSLionel Sambuc 		if (*p == '[' && *p != c) {
402f789fee2SBen Gras 			if ((d = compile_ccl(&p, d)) == NULL)
403*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
404f789fee2SBen Gras 			continue;
405f789fee2SBen Gras 		} else if (*p == '\\' && p[1] == '[') {
406f789fee2SBen Gras 			*d++ = *p++;
407f789fee2SBen Gras 		} else if (*p == '\\' && p[1] == c)
408f789fee2SBen Gras 			p++;
409f789fee2SBen Gras 		else if (*p == '\\' && p[1] == 'n') {
410f789fee2SBen Gras 			*d++ = '\n';
411f789fee2SBen Gras 			p += 2;
412f789fee2SBen Gras 			continue;
413*0a6a1f1dSLionel Sambuc 		} else if (*p == '\\' && p[1] == '\\') {
414*0a6a1f1dSLionel Sambuc 			if (is_tr)
415*0a6a1f1dSLionel Sambuc 				p++;
416*0a6a1f1dSLionel Sambuc 			else
417f789fee2SBen Gras 				*d++ = *p++;
418*0a6a1f1dSLionel Sambuc 		} else if (*p == c) {
419f789fee2SBen Gras 			*d = '\0';
420f789fee2SBen Gras 			return (p + 1);
421f789fee2SBen Gras 		}
422f789fee2SBen Gras 		*d++ = *p++;
423f789fee2SBen Gras 	}
424f789fee2SBen Gras 	return (NULL);
425f789fee2SBen Gras }
426f789fee2SBen Gras 
427f789fee2SBen Gras 
428f789fee2SBen Gras /* compile_ccl: expand a POSIX character class */
429f789fee2SBen Gras static char *
compile_ccl(char ** sp,char * t)430f789fee2SBen Gras compile_ccl(char **sp, char *t)
431f789fee2SBen Gras {
432f789fee2SBen Gras 	int c, d;
433f789fee2SBen Gras 	char *s = *sp;
434f789fee2SBen Gras 
435f789fee2SBen Gras 	*t++ = *s++;
436f789fee2SBen Gras 	if (*s == '^')
437f789fee2SBen Gras 		*t++ = *s++;
438f789fee2SBen Gras 	if (*s == ']')
439f789fee2SBen Gras 		*t++ = *s++;
440f789fee2SBen Gras 	for (; *s && (*t = *s) != ']'; s++, t++)
441f789fee2SBen Gras 		if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
442f789fee2SBen Gras 			*++t = *++s, t++, s++;
443f789fee2SBen Gras 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
444f789fee2SBen Gras 				if ((c = *s) == '\0')
445f789fee2SBen Gras 					return NULL;
446*0a6a1f1dSLionel Sambuc 		}
447f789fee2SBen Gras 	return (*s == ']') ? *sp = ++s, ++t : NULL;
448f789fee2SBen Gras }
449f789fee2SBen Gras 
450f789fee2SBen Gras /*
451*0a6a1f1dSLionel Sambuc  * Compiles the regular expression in RE and returns a pointer to the compiled
452*0a6a1f1dSLionel Sambuc  * regular expression.
453f789fee2SBen Gras  * Cflags are passed to regcomp.
454f789fee2SBen Gras  */
455*0a6a1f1dSLionel Sambuc static regex_t *
compile_re(char * re,int case_insensitive)456*0a6a1f1dSLionel Sambuc compile_re(char *re, int case_insensitive)
457f789fee2SBen Gras {
458*0a6a1f1dSLionel Sambuc 	regex_t *rep;
459*0a6a1f1dSLionel Sambuc 	int eval, flags;
460f789fee2SBen Gras 
461*0a6a1f1dSLionel Sambuc 
462*0a6a1f1dSLionel Sambuc 	flags = rflags;
463*0a6a1f1dSLionel Sambuc 	if (case_insensitive)
464*0a6a1f1dSLionel Sambuc 		flags |= REG_ICASE;
465*0a6a1f1dSLionel Sambuc 	rep = xmalloc(sizeof(regex_t));
466*0a6a1f1dSLionel Sambuc 	if ((eval = regcomp(rep, re, flags)) != 0)
467*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: RE error: %s",
468*0a6a1f1dSLionel Sambuc 				linenum, fname, strregerror(eval, rep));
469*0a6a1f1dSLionel Sambuc 	if (maxnsub < rep->re_nsub)
470*0a6a1f1dSLionel Sambuc 		maxnsub = rep->re_nsub;
471*0a6a1f1dSLionel Sambuc 	return (rep);
472f789fee2SBen Gras }
473f789fee2SBen Gras 
474f789fee2SBen Gras /*
475f789fee2SBen Gras  * Compile the substitution string of a regular expression and set res to
476f789fee2SBen Gras  * point to a saved copy of it.  Nsub is the number of parenthesized regular
477f789fee2SBen Gras  * expressions.
478f789fee2SBen Gras  */
479f789fee2SBen Gras static char *
compile_subst(char * p,struct s_subst * s)480f789fee2SBen Gras compile_subst(char *p, struct s_subst *s)
481f789fee2SBen Gras {
482*0a6a1f1dSLionel Sambuc 	static char lbuf[_POSIX2_LINE_MAX + 1];
483*0a6a1f1dSLionel Sambuc 	size_t asize, size;
484*0a6a1f1dSLionel Sambuc 	u_char ref;
485f789fee2SBen Gras 	char c, *text, *op, *sp;
486*0a6a1f1dSLionel Sambuc 	int more = 1, sawesc = 0;
487f789fee2SBen Gras 
488f789fee2SBen Gras 	c = *p++;			/* Terminator character */
489f789fee2SBen Gras 	if (c == '\0')
490f789fee2SBen Gras 		return (NULL);
491f789fee2SBen Gras 
492f789fee2SBen Gras 	s->maxbref = 0;
493f789fee2SBen Gras 	s->linenum = linenum;
494*0a6a1f1dSLionel Sambuc 	asize = 2 * _POSIX2_LINE_MAX + 1;
495*0a6a1f1dSLionel Sambuc 	text = xmalloc(asize);
496*0a6a1f1dSLionel Sambuc 	size = 0;
497f789fee2SBen Gras 	do {
498f789fee2SBen Gras 		op = sp = text + size;
499f789fee2SBen Gras 		for (; *p; p++) {
500f789fee2SBen Gras 			if (*p == '\\' || sawesc) {
501f789fee2SBen Gras 				/*
502f789fee2SBen Gras 				 * If this is a continuation from the last
503f789fee2SBen Gras 				 * buffer, we won't have a character to
504f789fee2SBen Gras 				 * skip over.
505f789fee2SBen Gras 				 */
506f789fee2SBen Gras 				if (sawesc)
507f789fee2SBen Gras 					sawesc = 0;
508f789fee2SBen Gras 				else
509f789fee2SBen Gras 					p++;
510f789fee2SBen Gras 
511f789fee2SBen Gras 				if (*p == '\0') {
512f789fee2SBen Gras 					/*
513f789fee2SBen Gras 					 * This escaped character is continued
514f789fee2SBen Gras 					 * in the next part of the line.  Note
515f789fee2SBen Gras 					 * this fact, then cause the loop to
516f789fee2SBen Gras 					 * exit w/ normal EOL case and reenter
517f789fee2SBen Gras 					 * above with the new buffer.
518f789fee2SBen Gras 					 */
519f789fee2SBen Gras 					sawesc = 1;
520f789fee2SBen Gras 					p--;
521f789fee2SBen Gras 					continue;
522f789fee2SBen Gras 				} else if (strchr("123456789", *p) != NULL) {
523f789fee2SBen Gras 					*sp++ = '\\';
524*0a6a1f1dSLionel Sambuc 					ref = (u_char)(*p - '0');
525f789fee2SBen Gras 					if (s->re != NULL &&
526*0a6a1f1dSLionel Sambuc 					    ref > s->re->re_nsub)
527*0a6a1f1dSLionel Sambuc 						errx(1, "%lu: %s: \\%c not defined in the RE",
528*0a6a1f1dSLionel Sambuc 								linenum, fname, *p);
529f789fee2SBen Gras 					if (s->maxbref < ref)
530f789fee2SBen Gras 						s->maxbref = ref;
531f789fee2SBen Gras 				} else if (*p == '&' || *p == '\\')
532f789fee2SBen Gras 					*sp++ = '\\';
533f789fee2SBen Gras 			} else if (*p == c) {
534*0a6a1f1dSLionel Sambuc 				if (*++p == '\0' && more) {
535*0a6a1f1dSLionel Sambuc 					if (cu_fgets(lbuf, sizeof(lbuf), &more))
536*0a6a1f1dSLionel Sambuc 						p = lbuf;
537*0a6a1f1dSLionel Sambuc 				}
538f789fee2SBen Gras 				*sp++ = '\0';
539*0a6a1f1dSLionel Sambuc 				size += (size_t)(sp - op);
540f789fee2SBen Gras 				s->new = xrealloc(text, size);
541f789fee2SBen Gras 				return (p);
542f789fee2SBen Gras 			} else if (*p == '\n') {
543*0a6a1f1dSLionel Sambuc 				errx(1,
544*0a6a1f1dSLionel Sambuc "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
545f789fee2SBen Gras 				/* NOTREACHED */
546f789fee2SBen Gras 			}
547f789fee2SBen Gras 			*sp++ = *p;
548f789fee2SBen Gras 		}
549*0a6a1f1dSLionel Sambuc 		size += (size_t)(sp - op);
550*0a6a1f1dSLionel Sambuc 		if (asize - size < _POSIX2_LINE_MAX + 1) {
551*0a6a1f1dSLionel Sambuc 			asize *= 2;
552*0a6a1f1dSLionel Sambuc 			text = xrealloc(text, asize);
553*0a6a1f1dSLionel Sambuc 		}
554*0a6a1f1dSLionel Sambuc 	} while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
555*0a6a1f1dSLionel Sambuc 	errx(1, "%lu: %s: unterminated substitute in regular expression",
556*0a6a1f1dSLionel Sambuc 			linenum, fname);
557f789fee2SBen Gras 	/* NOTREACHED */
558f789fee2SBen Gras }
559f789fee2SBen Gras 
560f789fee2SBen Gras /*
561f789fee2SBen Gras  * Compile the flags of the s command
562f789fee2SBen Gras  */
563f789fee2SBen Gras static char *
compile_flags(char * p,struct s_subst * s)564f789fee2SBen Gras compile_flags(char *p, struct s_subst *s)
565f789fee2SBen Gras {
566f789fee2SBen Gras 	int gn;			/* True if we have seen g or n */
567*0a6a1f1dSLionel Sambuc 	unsigned long nval;
568*0a6a1f1dSLionel Sambuc 	char wfile[_POSIX2_LINE_MAX + 1], *q;
569f789fee2SBen Gras 
570f789fee2SBen Gras 	s->n = 1;				/* Default */
571f789fee2SBen Gras 	s->p = 0;
572f789fee2SBen Gras 	s->wfile = NULL;
573f789fee2SBen Gras 	s->wfd = -1;
574*0a6a1f1dSLionel Sambuc 	s->icase = 0;
575f789fee2SBen Gras 	for (gn = 0;;) {
576f789fee2SBen Gras 		EATSPACE();			/* EXTENSION */
577f789fee2SBen Gras 		switch (*p) {
578f789fee2SBen Gras 		case 'g':
579f789fee2SBen Gras 			if (gn)
580*0a6a1f1dSLionel Sambuc 				errx(1,
581*0a6a1f1dSLionel Sambuc "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
582f789fee2SBen Gras 			gn = 1;
583f789fee2SBen Gras 			s->n = 0;
584f789fee2SBen Gras 			break;
585f789fee2SBen Gras 		case '\0':
586f789fee2SBen Gras 		case '\n':
587f789fee2SBen Gras 		case ';':
588f789fee2SBen Gras 			return (p);
589f789fee2SBen Gras 		case 'p':
590f789fee2SBen Gras 			s->p = 1;
591f789fee2SBen Gras 			break;
592*0a6a1f1dSLionel Sambuc 		case 'i':
593*0a6a1f1dSLionel Sambuc 		case 'I':
594*0a6a1f1dSLionel Sambuc 			s->icase = 1;
595*0a6a1f1dSLionel Sambuc 			break;
596f789fee2SBen Gras 		case '1': case '2': case '3':
597f789fee2SBen Gras 		case '4': case '5': case '6':
598f789fee2SBen Gras 		case '7': case '8': case '9':
599f789fee2SBen Gras 			if (gn)
600*0a6a1f1dSLionel Sambuc 				errx(1,
601*0a6a1f1dSLionel Sambuc "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
602f789fee2SBen Gras 			gn = 1;
603*0a6a1f1dSLionel Sambuc 			errno = 0;
604*0a6a1f1dSLionel Sambuc 			nval = strtoul(p, &p, 10);
605*0a6a1f1dSLionel Sambuc 			if (errno == ERANGE || nval > INT_MAX)
606*0a6a1f1dSLionel Sambuc 				errx(1,
607*0a6a1f1dSLionel Sambuc "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
608*0a6a1f1dSLionel Sambuc 			s->n = (int)nval;
609f789fee2SBen Gras 			p--;
610f789fee2SBen Gras 			break;
611f789fee2SBen Gras 		case 'w':
612f789fee2SBen Gras 			p++;
613f789fee2SBen Gras #ifdef HISTORIC_PRACTICE
614f789fee2SBen Gras 			if (*p != ' ') {
615*0a6a1f1dSLionel Sambuc 				warnx("%lu: %s: space missing before w wfile", linenum, fname);
616f789fee2SBen Gras 				return (p);
617f789fee2SBen Gras 			}
618f789fee2SBen Gras #endif
619f789fee2SBen Gras 			EATSPACE();
620f789fee2SBen Gras 			q = wfile;
621f789fee2SBen Gras 			while (*p) {
622f789fee2SBen Gras 				if (*p == '\n')
623f789fee2SBen Gras 					break;
624f789fee2SBen Gras 				*q++ = *p++;
625f789fee2SBen Gras 			}
626f789fee2SBen Gras 			*q = '\0';
627f789fee2SBen Gras 			if (q == wfile)
628*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: no wfile specified", linenum, fname);
629f789fee2SBen Gras 			s->wfile = strdup(wfile);
630f789fee2SBen Gras 			if (!aflag && (s->wfd = open(wfile,
631f789fee2SBen Gras 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
632f789fee2SBen Gras 			    DEFFILEMODE)) == -1)
633*0a6a1f1dSLionel Sambuc 				err(1, "%s", wfile);
634f789fee2SBen Gras 			return (p);
635f789fee2SBen Gras 		default:
636*0a6a1f1dSLionel Sambuc 			errx(1, "%lu: %s: bad flag in substitute command: '%c'",
637*0a6a1f1dSLionel Sambuc 					linenum, fname, *p);
638f789fee2SBen Gras 			break;
639f789fee2SBen Gras 		}
640f789fee2SBen Gras 		p++;
641f789fee2SBen Gras 	}
642f789fee2SBen Gras }
643f789fee2SBen Gras 
644f789fee2SBen Gras /*
645f789fee2SBen Gras  * Compile a translation set of strings into a lookup table.
646f789fee2SBen Gras  */
647f789fee2SBen Gras static char *
compile_tr(char * p,struct s_tr ** py)648*0a6a1f1dSLionel Sambuc compile_tr(char *p, struct s_tr **py)
649f789fee2SBen Gras {
650*0a6a1f1dSLionel Sambuc 	struct s_tr *y;
651*0a6a1f1dSLionel Sambuc 	size_t i;
652*0a6a1f1dSLionel Sambuc 	const char *op, *np;
653*0a6a1f1dSLionel Sambuc 	char old[_POSIX2_LINE_MAX + 1];
654*0a6a1f1dSLionel Sambuc 	char new[_POSIX2_LINE_MAX + 1];
655*0a6a1f1dSLionel Sambuc 	size_t oclen, oldlen, nclen, newlen;
656*0a6a1f1dSLionel Sambuc 	mbstate_t mbs1, mbs2;
657*0a6a1f1dSLionel Sambuc 
658*0a6a1f1dSLionel Sambuc 	*py = y = xmalloc(sizeof(*y));
659*0a6a1f1dSLionel Sambuc 	y->multis = NULL;
660*0a6a1f1dSLionel Sambuc 	y->nmultis = 0;
661f789fee2SBen Gras 
662f789fee2SBen Gras 	if (*p == '\0' || *p == '\\')
663*0a6a1f1dSLionel Sambuc 		errx(1,
664*0a6a1f1dSLionel Sambuc 	"%lu: %s: transform pattern can not be delimited by newline or backslash",
665*0a6a1f1dSLionel Sambuc 			linenum, fname);
666*0a6a1f1dSLionel Sambuc 	p = compile_delimited(p, old, 1);
667*0a6a1f1dSLionel Sambuc 	if (p == NULL)
668*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: unterminated transform source string",
669*0a6a1f1dSLionel Sambuc 				linenum, fname);
670*0a6a1f1dSLionel Sambuc 	p = compile_delimited(p - 1, new, 1);
671*0a6a1f1dSLionel Sambuc 	if (p == NULL)
672*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: unterminated transform target string",
673*0a6a1f1dSLionel Sambuc 				linenum, fname);
674f789fee2SBen Gras 	EATSPACE();
675*0a6a1f1dSLionel Sambuc 	op = old;
676*0a6a1f1dSLionel Sambuc 	oldlen = mbsrtowcs(NULL, &op, 0, NULL);
677*0a6a1f1dSLionel Sambuc 	if (oldlen == (size_t)-1)
678*0a6a1f1dSLionel Sambuc 		err(1, NULL);
679*0a6a1f1dSLionel Sambuc 	np = new;
680*0a6a1f1dSLionel Sambuc 	newlen = mbsrtowcs(NULL, &np, 0, NULL);
681*0a6a1f1dSLionel Sambuc 	if (newlen == (size_t)-1)
682*0a6a1f1dSLionel Sambuc 		err(1, NULL);
683*0a6a1f1dSLionel Sambuc 	if (newlen != oldlen)
684*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: transform strings are not the same length",
685*0a6a1f1dSLionel Sambuc 				linenum, fname);
686*0a6a1f1dSLionel Sambuc 	if (MB_CUR_MAX == 1) {
687*0a6a1f1dSLionel Sambuc 		/*
688*0a6a1f1dSLionel Sambuc 		 * The single-byte encoding case is easy: generate a
689*0a6a1f1dSLionel Sambuc 		 * lookup table.
690*0a6a1f1dSLionel Sambuc 		 */
691f789fee2SBen Gras 		for (i = 0; i <= UCHAR_MAX; i++)
692*0a6a1f1dSLionel Sambuc 			y->bytetab[i] = (u_char)i;
693*0a6a1f1dSLionel Sambuc 		for (; *op; op++, np++)
694*0a6a1f1dSLionel Sambuc 			y->bytetab[(u_char)*op] = (u_char)*np;
695*0a6a1f1dSLionel Sambuc 	} else {
696*0a6a1f1dSLionel Sambuc 		/*
697*0a6a1f1dSLionel Sambuc 		 * Multi-byte encoding case: generate a lookup table as
698*0a6a1f1dSLionel Sambuc 		 * above, but only for single-byte characters. The first
699*0a6a1f1dSLionel Sambuc 		 * bytes of multi-byte characters have their lookup table
700*0a6a1f1dSLionel Sambuc 		 * entries set to 0, which causes do_tr() to search through
701*0a6a1f1dSLionel Sambuc 		 * an auxiliary vector of multi-byte mappings.
702*0a6a1f1dSLionel Sambuc 		 */
703*0a6a1f1dSLionel Sambuc 		memset(&mbs1, 0, sizeof(mbs1));
704*0a6a1f1dSLionel Sambuc 		memset(&mbs2, 0, sizeof(mbs2));
705*0a6a1f1dSLionel Sambuc 		for (i = 0; i <= UCHAR_MAX; i++)
706*0a6a1f1dSLionel Sambuc 			y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
707*0a6a1f1dSLionel Sambuc 		while (*op != '\0') {
708*0a6a1f1dSLionel Sambuc 			oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
709*0a6a1f1dSLionel Sambuc 			if (oclen == (size_t)-1 || oclen == (size_t)-2)
710*0a6a1f1dSLionel Sambuc 				errc(1, EILSEQ, NULL);
711*0a6a1f1dSLionel Sambuc 			nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
712*0a6a1f1dSLionel Sambuc 			if (nclen == (size_t)-1 || nclen == (size_t)-2)
713*0a6a1f1dSLionel Sambuc 				errc(1, EILSEQ, NULL);
714*0a6a1f1dSLionel Sambuc 			if (oclen == 1 && nclen == 1)
715*0a6a1f1dSLionel Sambuc 				y->bytetab[(u_char)*op] = (u_char)*np;
716*0a6a1f1dSLionel Sambuc 			else {
717*0a6a1f1dSLionel Sambuc 				y->bytetab[(u_char)*op] = 0;
718*0a6a1f1dSLionel Sambuc 				y->multis = xrealloc(y->multis,
719*0a6a1f1dSLionel Sambuc 				    (y->nmultis + 1) * sizeof(*y->multis));
720*0a6a1f1dSLionel Sambuc 				i = y->nmultis++;
721*0a6a1f1dSLionel Sambuc 				y->multis[i].fromlen = oclen;
722*0a6a1f1dSLionel Sambuc 				memcpy(y->multis[i].from, op, oclen);
723*0a6a1f1dSLionel Sambuc 				y->multis[i].tolen = nclen;
724*0a6a1f1dSLionel Sambuc 				memcpy(y->multis[i].to, np, nclen);
725*0a6a1f1dSLionel Sambuc 			}
726*0a6a1f1dSLionel Sambuc 			op += oclen;
727*0a6a1f1dSLionel Sambuc 			np += nclen;
728*0a6a1f1dSLionel Sambuc 		}
729*0a6a1f1dSLionel Sambuc 	}
730f789fee2SBen Gras 	return (p);
731f789fee2SBen Gras }
732f789fee2SBen Gras 
733f789fee2SBen Gras /*
734*0a6a1f1dSLionel Sambuc  * Compile the text following an a or i command.
735f789fee2SBen Gras  */
736f789fee2SBen Gras static char *
compile_text(void)737f789fee2SBen Gras compile_text(void)
738f789fee2SBen Gras {
739*0a6a1f1dSLionel Sambuc 	size_t asize, size;
740*0a6a1f1dSLionel Sambuc 	int esc_nl;
741*0a6a1f1dSLionel Sambuc 	char *text, *p, *op, *s;
742*0a6a1f1dSLionel Sambuc 	char lbuf[_POSIX2_LINE_MAX + 1];
743f789fee2SBen Gras 
744*0a6a1f1dSLionel Sambuc 	asize = 2 * _POSIX2_LINE_MAX + 1;
745*0a6a1f1dSLionel Sambuc 	text = xmalloc(asize);
746*0a6a1f1dSLionel Sambuc 	size = 0;
747*0a6a1f1dSLionel Sambuc 	while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
748f789fee2SBen Gras 		op = s = text + size;
749*0a6a1f1dSLionel Sambuc 		p = lbuf;
750*0a6a1f1dSLionel Sambuc 		EATSPACE();
751*0a6a1f1dSLionel Sambuc 		for (esc_nl = 0; *p != '\0'; p++) {
752*0a6a1f1dSLionel Sambuc 			if (*p == '\\' && p[1] != '\0' && *++p == '\n')
753*0a6a1f1dSLionel Sambuc 				esc_nl = 1;
754f789fee2SBen Gras 			*s++ = *p;
755f789fee2SBen Gras 		}
756*0a6a1f1dSLionel Sambuc 		size += (size_t)(s - op);
757*0a6a1f1dSLionel Sambuc 		if (!esc_nl) {
758f789fee2SBen Gras 			*s = '\0';
759f789fee2SBen Gras 			break;
760f789fee2SBen Gras 		}
761*0a6a1f1dSLionel Sambuc 		if (asize - size < _POSIX2_LINE_MAX + 1) {
762*0a6a1f1dSLionel Sambuc 			asize *= 2;
763*0a6a1f1dSLionel Sambuc 			text = xrealloc(text, asize);
764f789fee2SBen Gras 		}
765*0a6a1f1dSLionel Sambuc 	}
766*0a6a1f1dSLionel Sambuc 	text[size] = '\0';
767*0a6a1f1dSLionel Sambuc 	p = xrealloc(text, size + 1);
768*0a6a1f1dSLionel Sambuc 	return (p);
769f789fee2SBen Gras }
770f789fee2SBen Gras 
771f789fee2SBen Gras /*
772f789fee2SBen Gras  * Get an address and return a pointer to the first character after
773f789fee2SBen Gras  * it.  Fill the structure pointed to according to the address.
774f789fee2SBen Gras  */
775f789fee2SBen Gras static char *
compile_addr(char * p,struct s_addr * a)776f789fee2SBen Gras compile_addr(char *p, struct s_addr *a)
777f789fee2SBen Gras {
778*0a6a1f1dSLionel Sambuc 	char *end, re[_POSIX2_LINE_MAX + 1];
779*0a6a1f1dSLionel Sambuc 	int icase;
780f789fee2SBen Gras 
781*0a6a1f1dSLionel Sambuc 	icase = 0;
782*0a6a1f1dSLionel Sambuc 
783*0a6a1f1dSLionel Sambuc 	a->type = 0;
784f789fee2SBen Gras 	switch (*p) {
785f789fee2SBen Gras 	case '\\':				/* Context address */
786f789fee2SBen Gras 		++p;
787f789fee2SBen Gras 		/* FALLTHROUGH */
788f789fee2SBen Gras 	case '/':				/* Context address */
789*0a6a1f1dSLionel Sambuc 		p = compile_delimited(p, re, 0);
790f789fee2SBen Gras 		if (p == NULL)
791*0a6a1f1dSLionel Sambuc 			errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
792*0a6a1f1dSLionel Sambuc 		/* Check for case insensitive regexp flag */
793*0a6a1f1dSLionel Sambuc 		if (*p == 'I') {
794*0a6a1f1dSLionel Sambuc 			icase = 1;
795*0a6a1f1dSLionel Sambuc 			p++;
796*0a6a1f1dSLionel Sambuc 		}
797*0a6a1f1dSLionel Sambuc 		if (*re == '\0')
798*0a6a1f1dSLionel Sambuc 			a->u.r = NULL;
799*0a6a1f1dSLionel Sambuc 		else
800*0a6a1f1dSLionel Sambuc 			a->u.r = compile_re(re, icase);
801f789fee2SBen Gras 		a->type = AT_RE;
802f789fee2SBen Gras 		return (p);
803f789fee2SBen Gras 
804f789fee2SBen Gras 	case '$':				/* Last line */
805f789fee2SBen Gras 		a->type = AT_LAST;
806f789fee2SBen Gras 		return (p + 1);
807*0a6a1f1dSLionel Sambuc 
808*0a6a1f1dSLionel Sambuc 	case '+':				/* Relative line number */
809*0a6a1f1dSLionel Sambuc 		a->type = AT_RELLINE;
810*0a6a1f1dSLionel Sambuc 		p++;
811*0a6a1f1dSLionel Sambuc 		/* FALLTHROUGH */
812f789fee2SBen Gras 						/* Line number */
813f789fee2SBen Gras 	case '0': case '1': case '2': case '3': case '4':
814f789fee2SBen Gras 	case '5': case '6': case '7': case '8': case '9':
815*0a6a1f1dSLionel Sambuc 		if (a->type == 0)
816f789fee2SBen Gras 			a->type = AT_LINE;
817*0a6a1f1dSLionel Sambuc 		a->u.l = strtoul(p, &end, 10);
818f789fee2SBen Gras 		return (end);
819f789fee2SBen Gras 	default:
820*0a6a1f1dSLionel Sambuc 		errx(1, "%lu: %s: expected context address", linenum, fname);
821f789fee2SBen Gras 		return (NULL);
822f789fee2SBen Gras 	}
823f789fee2SBen Gras }
824f789fee2SBen Gras 
825f789fee2SBen Gras /*
826f789fee2SBen Gras  * duptoeol --
827f789fee2SBen Gras  *	Return a copy of all the characters up to \n or \0.
828f789fee2SBen Gras  */
829f789fee2SBen Gras static char *
duptoeol(char * s,const char * ctype)830f789fee2SBen Gras duptoeol(char *s, const char *ctype)
831f789fee2SBen Gras {
832f789fee2SBen Gras 	size_t len;
833f789fee2SBen Gras 	int ws;
834*0a6a1f1dSLionel Sambuc 	char *p, *start;
835f789fee2SBen Gras 
836f789fee2SBen Gras 	ws = 0;
837f789fee2SBen Gras 	for (start = s; *s != '\0' && *s != '\n'; ++s)
838f789fee2SBen Gras 		ws = isspace((unsigned char)*s);
839f789fee2SBen Gras 	*s = '\0';
840f789fee2SBen Gras 	if (ws)
841*0a6a1f1dSLionel Sambuc 		warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
842*0a6a1f1dSLionel Sambuc 	len = (size_t)(s - start + 1);
843*0a6a1f1dSLionel Sambuc 	p = xmalloc(len);
844*0a6a1f1dSLionel Sambuc 	return (memmove(p, start, len));
845f789fee2SBen Gras }
846f789fee2SBen Gras 
847f789fee2SBen Gras /*
848f789fee2SBen Gras  * Convert goto label names to addresses, and count a and r commands, in
849f789fee2SBen Gras  * the given subset of the script.  Free the memory used by labels in b
850f789fee2SBen Gras  * and t commands (but not by :).
851f789fee2SBen Gras  *
852f789fee2SBen Gras  * TODO: Remove } nodes
853f789fee2SBen Gras  */
854f789fee2SBen Gras static void
fixuplabel(struct s_command * cp,struct s_command * end)855f789fee2SBen Gras fixuplabel(struct s_command *cp, struct s_command *end)
856f789fee2SBen Gras {
857f789fee2SBen Gras 
858f789fee2SBen Gras 	for (; cp != end; cp = cp->next)
859f789fee2SBen Gras 		switch (cp->code) {
860f789fee2SBen Gras 		case 'a':
861f789fee2SBen Gras 		case 'r':
862f789fee2SBen Gras 			appendnum++;
863f789fee2SBen Gras 			break;
864f789fee2SBen Gras 		case 'b':
865f789fee2SBen Gras 		case 't':
866f789fee2SBen Gras 			/* Resolve branch target. */
867f789fee2SBen Gras 			if (cp->t == NULL) {
868f789fee2SBen Gras 				cp->u.c = NULL;
869f789fee2SBen Gras 				break;
870f789fee2SBen Gras 			}
871f789fee2SBen Gras 			if ((cp->u.c = findlabel(cp->t)) == NULL)
872*0a6a1f1dSLionel Sambuc 				errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
873f789fee2SBen Gras 			free(cp->t);
874f789fee2SBen Gras 			break;
875f789fee2SBen Gras 		case '{':
876f789fee2SBen Gras 			/* Do interior commands. */
877f789fee2SBen Gras 			fixuplabel(cp->u.c, cp->next);
878f789fee2SBen Gras 			break;
879f789fee2SBen Gras 		}
880f789fee2SBen Gras }
881f789fee2SBen Gras 
882f789fee2SBen Gras /*
883f789fee2SBen Gras  * Associate the given command label for later lookup.
884f789fee2SBen Gras  */
885f789fee2SBen Gras static void
enterlabel(struct s_command * cp)886f789fee2SBen Gras enterlabel(struct s_command *cp)
887f789fee2SBen Gras {
888f789fee2SBen Gras 	struct labhash **lhp, *lh;
889f789fee2SBen Gras 	u_char *p;
890f789fee2SBen Gras 	u_int h, c;
891f789fee2SBen Gras 
892f789fee2SBen Gras 	for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
893f789fee2SBen Gras 		h = (h << 5) + h + c;
894f789fee2SBen Gras 	lhp = &labels[h & LHMASK];
895f789fee2SBen Gras 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
896f789fee2SBen Gras 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
897*0a6a1f1dSLionel Sambuc 			errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
898f789fee2SBen Gras 	lh = xmalloc(sizeof *lh);
899f789fee2SBen Gras 	lh->lh_next = *lhp;
900f789fee2SBen Gras 	lh->lh_hash = h;
901f789fee2SBen Gras 	lh->lh_cmd = cp;
902f789fee2SBen Gras 	lh->lh_ref = 0;
903f789fee2SBen Gras 	*lhp = lh;
904f789fee2SBen Gras }
905f789fee2SBen Gras 
906f789fee2SBen Gras /*
907f789fee2SBen Gras  * Find the label contained in the command l in the command linked
908f789fee2SBen Gras  * list cp.  L is excluded from the search.  Return NULL if not found.
909f789fee2SBen Gras  */
910f789fee2SBen Gras static struct s_command *
findlabel(char * name)911f789fee2SBen Gras findlabel(char *name)
912f789fee2SBen Gras {
913f789fee2SBen Gras 	struct labhash *lh;
914f789fee2SBen Gras 	u_char *p;
915f789fee2SBen Gras 	u_int h, c;
916f789fee2SBen Gras 
917f789fee2SBen Gras 	for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
918f789fee2SBen Gras 		h = (h << 5) + h + c;
919f789fee2SBen Gras 	for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
920f789fee2SBen Gras 		if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
921f789fee2SBen Gras 			lh->lh_ref = 1;
922f789fee2SBen Gras 			return (lh->lh_cmd);
923f789fee2SBen Gras 		}
924f789fee2SBen Gras 	}
925f789fee2SBen Gras 	return (NULL);
926f789fee2SBen Gras }
927f789fee2SBen Gras 
928f789fee2SBen Gras /*
929f789fee2SBen Gras  * Warn about any unused labels.  As a side effect, release the label hash
930f789fee2SBen Gras  * table space.
931f789fee2SBen Gras  */
932f789fee2SBen Gras static void
uselabel(void)933f789fee2SBen Gras uselabel(void)
934f789fee2SBen Gras {
935f789fee2SBen Gras 	struct labhash *lh, *next;
936f789fee2SBen Gras 	int i;
937f789fee2SBen Gras 
938f789fee2SBen Gras 	for (i = 0; i < LHSZ; i++) {
939f789fee2SBen Gras 		for (lh = labels[i]; lh != NULL; lh = next) {
940f789fee2SBen Gras 			next = lh->lh_next;
941f789fee2SBen Gras 			if (!lh->lh_ref)
942*0a6a1f1dSLionel Sambuc 				warnx("%lu: %s: unused label '%s'",
943*0a6a1f1dSLionel Sambuc 				    linenum, fname, lh->lh_cmd->t);
944f789fee2SBen Gras 			free(lh);
945f789fee2SBen Gras 		}
946f789fee2SBen Gras 	}
947f789fee2SBen Gras }
948