1*0a6a1f1dSLionel Sambuc /* $NetBSD: compile.c,v 1.46 2015/03/12 12:40:41 christos Exp $ */
2f789fee2SBen Gras
3f789fee2SBen Gras /*-
4*0a6a1f1dSLionel Sambuc * Copyright (c) 1992 Diomidis Spinellis.
5f789fee2SBen Gras * Copyright (c) 1992, 1993
6f789fee2SBen Gras * The Regents of the University of California. All rights reserved.
7f789fee2SBen Gras *
8f789fee2SBen Gras * This code is derived from software contributed to Berkeley by
9f789fee2SBen Gras * Diomidis Spinellis of Imperial College, University of London.
10f789fee2SBen Gras *
11f789fee2SBen Gras * Redistribution and use in source and binary forms, with or without
12f789fee2SBen Gras * modification, are permitted provided that the following conditions
13f789fee2SBen Gras * are met:
14f789fee2SBen Gras * 1. Redistributions of source code must retain the above copyright
15f789fee2SBen Gras * notice, this list of conditions and the following disclaimer.
16f789fee2SBen Gras * 2. Redistributions in binary form must reproduce the above copyright
17f789fee2SBen Gras * notice, this list of conditions and the following disclaimer in the
18f789fee2SBen Gras * documentation and/or other materials provided with the distribution.
19f789fee2SBen Gras * 3. Neither the name of the University nor the names of its contributors
20f789fee2SBen Gras * may be used to endorse or promote products derived from this software
21f789fee2SBen Gras * without specific prior written permission.
22f789fee2SBen Gras *
23f789fee2SBen Gras * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24f789fee2SBen Gras * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25f789fee2SBen Gras * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26f789fee2SBen Gras * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27f789fee2SBen Gras * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28f789fee2SBen Gras * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29f789fee2SBen Gras * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30f789fee2SBen Gras * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31f789fee2SBen Gras * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32f789fee2SBen Gras * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33f789fee2SBen Gras * SUCH DAMAGE.
34f789fee2SBen Gras */
35f789fee2SBen Gras
36f789fee2SBen Gras #if HAVE_NBTOOL_CONFIG_H
37f789fee2SBen Gras #include "nbtool_config.h"
38f789fee2SBen Gras #endif
39f789fee2SBen Gras
40f789fee2SBen Gras #include <sys/cdefs.h>
41*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: compile.c,v 1.46 2015/03/12 12:40:41 christos Exp $");
42*0a6a1f1dSLionel Sambuc #ifdef __FBSDID
43*0a6a1f1dSLionel Sambuc __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
44f789fee2SBen Gras #endif
45*0a6a1f1dSLionel Sambuc
46*0a6a1f1dSLionel Sambuc #if 0
47*0a6a1f1dSLionel Sambuc static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93";
48*0a6a1f1dSLionel Sambuc #endif
49f789fee2SBen Gras
50f789fee2SBen Gras #include <sys/types.h>
51f789fee2SBen Gras #include <sys/stat.h>
52f789fee2SBen Gras
53f789fee2SBen Gras #include <ctype.h>
54*0a6a1f1dSLionel Sambuc #include <err.h>
55f789fee2SBen Gras #include <errno.h>
56f789fee2SBen Gras #include <fcntl.h>
57f789fee2SBen Gras #include <limits.h>
58f789fee2SBen Gras #include <regex.h>
59f789fee2SBen Gras #include <stdio.h>
60f789fee2SBen Gras #include <stdlib.h>
61f789fee2SBen Gras #include <string.h>
62*0a6a1f1dSLionel Sambuc #include <wchar.h>
63f789fee2SBen Gras
64f789fee2SBen Gras #include "defs.h"
65f789fee2SBen Gras #include "extern.h"
66f789fee2SBen Gras
67f789fee2SBen Gras #define LHSZ 128
68f789fee2SBen Gras #define LHMASK (LHSZ - 1)
69f789fee2SBen Gras static struct labhash {
70f789fee2SBen Gras struct labhash *lh_next;
71f789fee2SBen Gras u_int lh_hash;
72f789fee2SBen Gras struct s_command *lh_cmd;
73f789fee2SBen Gras int lh_ref;
74f789fee2SBen Gras } *labels[LHSZ];
75f789fee2SBen Gras
76f789fee2SBen Gras static char *compile_addr(char *, struct s_addr *);
77f789fee2SBen Gras static char *compile_ccl(char **, char *);
78*0a6a1f1dSLionel Sambuc static char *compile_delimited(char *, char *, int);
79f789fee2SBen Gras static char *compile_flags(char *, struct s_subst *);
80*0a6a1f1dSLionel Sambuc static regex_t *compile_re(char *, int);
81f789fee2SBen Gras static char *compile_subst(char *, struct s_subst *);
82f789fee2SBen Gras static char *compile_text(void);
83*0a6a1f1dSLionel Sambuc static char *compile_tr(char *, struct s_tr **);
84f789fee2SBen Gras static struct s_command
85f789fee2SBen Gras **compile_stream(struct s_command **);
86f789fee2SBen Gras static char *duptoeol(char *, const char *);
87f789fee2SBen Gras static void enterlabel(struct s_command *);
88f789fee2SBen Gras static struct s_command
89f789fee2SBen Gras *findlabel(char *);
90f789fee2SBen Gras static void fixuplabel(struct s_command *, struct s_command *);
91f789fee2SBen Gras static void uselabel(void);
92f789fee2SBen Gras
93f789fee2SBen Gras /*
94f789fee2SBen Gras * Command specification. This is used to drive the command parser.
95f789fee2SBen Gras */
96f789fee2SBen Gras struct s_format {
97f789fee2SBen Gras char code; /* Command code */
98f789fee2SBen Gras int naddr; /* Number of address args */
99f789fee2SBen Gras enum e_args args; /* Argument type */
100f789fee2SBen Gras };
101f789fee2SBen Gras
102f789fee2SBen Gras static struct s_format cmd_fmts[] = {
103f789fee2SBen Gras {'{', 2, GROUP},
104f789fee2SBen Gras {'}', 0, ENDGROUP},
105f789fee2SBen Gras {'a', 1, TEXT},
106f789fee2SBen Gras {'b', 2, BRANCH},
107f789fee2SBen Gras {'c', 2, TEXT},
108f789fee2SBen Gras {'d', 2, EMPTY},
109f789fee2SBen Gras {'D', 2, EMPTY},
110f789fee2SBen Gras {'g', 2, EMPTY},
111f789fee2SBen Gras {'G', 2, EMPTY},
112f789fee2SBen Gras {'h', 2, EMPTY},
113f789fee2SBen Gras {'H', 2, EMPTY},
114f789fee2SBen Gras {'i', 1, TEXT},
115f789fee2SBen Gras {'l', 2, EMPTY},
116f789fee2SBen Gras {'n', 2, EMPTY},
117f789fee2SBen Gras {'N', 2, EMPTY},
118f789fee2SBen Gras {'p', 2, EMPTY},
119f789fee2SBen Gras {'P', 2, EMPTY},
120f789fee2SBen Gras {'q', 1, EMPTY},
121f789fee2SBen Gras {'r', 1, RFILE},
122f789fee2SBen Gras {'s', 2, SUBST},
123f789fee2SBen Gras {'t', 2, BRANCH},
124f789fee2SBen Gras {'w', 2, WFILE},
125f789fee2SBen Gras {'x', 2, EMPTY},
126f789fee2SBen Gras {'y', 2, TR},
127f789fee2SBen Gras {'!', 2, NONSEL},
128f789fee2SBen Gras {':', 0, LABEL},
129f789fee2SBen Gras {'#', 0, COMMENT},
130f789fee2SBen Gras {'=', 1, EMPTY},
131f789fee2SBen Gras {'\0', 0, COMMENT},
132f789fee2SBen Gras };
133f789fee2SBen Gras
134f789fee2SBen Gras /* The compiled program. */
135f789fee2SBen Gras struct s_command *prog;
136f789fee2SBen Gras
137f789fee2SBen Gras /*
138f789fee2SBen Gras * Compile the program into prog.
139f789fee2SBen Gras * Initialise appends.
140f789fee2SBen Gras */
141f789fee2SBen Gras void
compile(void)142f789fee2SBen Gras compile(void)
143f789fee2SBen Gras {
144f789fee2SBen Gras *compile_stream(&prog) = NULL;
145f789fee2SBen Gras fixuplabel(prog, NULL);
146f789fee2SBen Gras uselabel();
147f789fee2SBen Gras if (appendnum > 0)
148f789fee2SBen Gras appends = xmalloc(sizeof(struct s_appends) * appendnum);
149f789fee2SBen Gras match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
150f789fee2SBen Gras }
151f789fee2SBen Gras
152*0a6a1f1dSLionel Sambuc #define EATSPACE() do { \
153*0a6a1f1dSLionel Sambuc if (p) \
154*0a6a1f1dSLionel Sambuc while (*p && isspace((unsigned char)*p)) \
155*0a6a1f1dSLionel Sambuc p++; \
156*0a6a1f1dSLionel Sambuc } while (0)
157f789fee2SBen Gras
158f789fee2SBen Gras static struct s_command **
compile_stream(struct s_command ** link)159f789fee2SBen Gras compile_stream(struct s_command **link)
160f789fee2SBen Gras {
161f789fee2SBen Gras char *p;
162*0a6a1f1dSLionel Sambuc static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
163f789fee2SBen Gras struct s_command *cmd, *cmd2, *stack;
164f789fee2SBen Gras struct s_format *fp;
165*0a6a1f1dSLionel Sambuc char re[_POSIX2_LINE_MAX + 1];
166f789fee2SBen Gras int naddr; /* Number of addresses */
167f789fee2SBen Gras
168f789fee2SBen Gras stack = 0;
169f789fee2SBen Gras for (;;) {
170*0a6a1f1dSLionel Sambuc if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
171f789fee2SBen Gras if (stack != 0)
172*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unexpected EOF (pending }'s)",
173*0a6a1f1dSLionel Sambuc linenum, fname);
174f789fee2SBen Gras return (link);
175f789fee2SBen Gras }
176f789fee2SBen Gras
177f789fee2SBen Gras semicolon: EATSPACE();
178*0a6a1f1dSLionel Sambuc if (p) {
179f789fee2SBen Gras if (*p == '#' || *p == '\0')
180f789fee2SBen Gras continue;
181f789fee2SBen Gras else if (*p == ';') {
182f789fee2SBen Gras p++;
183f789fee2SBen Gras goto semicolon;
184f789fee2SBen Gras }
185*0a6a1f1dSLionel Sambuc }
186f789fee2SBen Gras *link = cmd = xmalloc(sizeof(struct s_command));
187f789fee2SBen Gras link = &cmd->next;
188*0a6a1f1dSLionel Sambuc cmd->startline = cmd->nonsel = 0;
189f789fee2SBen Gras /* First parse the addresses */
190f789fee2SBen Gras naddr = 0;
191f789fee2SBen Gras
192f789fee2SBen Gras /* Valid characters to start an address */
193f789fee2SBen Gras #define addrchar(c) (strchr("0123456789/\\$", (c)))
194f789fee2SBen Gras if (addrchar(*p)) {
195f789fee2SBen Gras naddr++;
196f789fee2SBen Gras cmd->a1 = xmalloc(sizeof(struct s_addr));
197f789fee2SBen Gras p = compile_addr(p, cmd->a1);
198f789fee2SBen Gras EATSPACE(); /* EXTENSION */
199f789fee2SBen Gras if (*p == ',') {
200f789fee2SBen Gras p++;
201f789fee2SBen Gras EATSPACE(); /* EXTENSION */
202f789fee2SBen Gras naddr++;
203f789fee2SBen Gras cmd->a2 = xmalloc(sizeof(struct s_addr));
204f789fee2SBen Gras p = compile_addr(p, cmd->a2);
205f789fee2SBen Gras EATSPACE();
206f789fee2SBen Gras } else
207f789fee2SBen Gras cmd->a2 = 0;
208f789fee2SBen Gras } else
209f789fee2SBen Gras cmd->a1 = cmd->a2 = 0;
210f789fee2SBen Gras
211f789fee2SBen Gras nonsel: /* Now parse the command */
212f789fee2SBen Gras if (!*p)
213*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: command expected", linenum, fname);
214f789fee2SBen Gras cmd->code = *p;
215f789fee2SBen Gras for (fp = cmd_fmts; fp->code; fp++)
216f789fee2SBen Gras if (fp->code == *p)
217f789fee2SBen Gras break;
218f789fee2SBen Gras if (!fp->code)
219*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
220f789fee2SBen Gras if (naddr > fp->naddr)
221*0a6a1f1dSLionel Sambuc errx(1,
222*0a6a1f1dSLionel Sambuc "%lu: %s: command %c expects up to %d address(es), found %d",
223*0a6a1f1dSLionel Sambuc linenum, fname, *p, fp->naddr, naddr);
224f789fee2SBen Gras switch (fp->args) {
225f789fee2SBen Gras case NONSEL: /* ! */
226f789fee2SBen Gras p++;
227f789fee2SBen Gras EATSPACE();
228f789fee2SBen Gras cmd->nonsel = ! cmd->nonsel;
229f789fee2SBen Gras goto nonsel;
230f789fee2SBen Gras case GROUP: /* { */
231f789fee2SBen Gras p++;
232f789fee2SBen Gras EATSPACE();
233f789fee2SBen Gras cmd->next = stack;
234f789fee2SBen Gras stack = cmd;
235f789fee2SBen Gras link = &cmd->u.c;
236f789fee2SBen Gras if (*p)
237f789fee2SBen Gras goto semicolon;
238f789fee2SBen Gras break;
239f789fee2SBen Gras case ENDGROUP:
240f789fee2SBen Gras /*
241f789fee2SBen Gras * Short-circuit command processing, since end of
242f789fee2SBen Gras * group is really just a noop.
243f789fee2SBen Gras */
244f789fee2SBen Gras cmd->nonsel = 1;
245f789fee2SBen Gras if (stack == 0)
246*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unexpected }", linenum, fname);
247f789fee2SBen Gras cmd2 = stack;
248f789fee2SBen Gras stack = cmd2->next;
249f789fee2SBen Gras cmd2->next = cmd;
250f789fee2SBen Gras /*FALLTHROUGH*/
251f789fee2SBen Gras case EMPTY: /* d D g G h H l n N p P q x = \0 */
252f789fee2SBen Gras p++;
253f789fee2SBen Gras EATSPACE();
254*0a6a1f1dSLionel Sambuc switch (*p) {
255*0a6a1f1dSLionel Sambuc case ';':
256f789fee2SBen Gras p++;
257f789fee2SBen Gras link = &cmd->next;
258f789fee2SBen Gras goto semicolon;
259*0a6a1f1dSLionel Sambuc case '}':
260*0a6a1f1dSLionel Sambuc goto semicolon;
261*0a6a1f1dSLionel Sambuc case '\0':
262*0a6a1f1dSLionel Sambuc break;
263*0a6a1f1dSLionel Sambuc default:
264*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: extra characters at the end of %c command",
265*0a6a1f1dSLionel Sambuc linenum, fname, cmd->code);
266f789fee2SBen Gras }
267f789fee2SBen Gras break;
268f789fee2SBen Gras case TEXT: /* a c i */
269f789fee2SBen Gras p++;
270f789fee2SBen Gras EATSPACE();
271f789fee2SBen Gras if (*p != '\\')
272*0a6a1f1dSLionel Sambuc errx(1,
273*0a6a1f1dSLionel Sambuc "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
274f789fee2SBen Gras p++;
275f789fee2SBen Gras EATSPACE();
276f789fee2SBen Gras if (*p)
277*0a6a1f1dSLionel Sambuc errx(1,
278*0a6a1f1dSLionel Sambuc "%lu: %s: extra characters after \\ at the end of %c command",
279*0a6a1f1dSLionel Sambuc linenum, fname, cmd->code);
280f789fee2SBen Gras cmd->t = compile_text();
281f789fee2SBen Gras break;
282f789fee2SBen Gras case COMMENT: /* \0 # */
283f789fee2SBen Gras break;
284f789fee2SBen Gras case WFILE: /* w */
285f789fee2SBen Gras p++;
286f789fee2SBen Gras EATSPACE();
287f789fee2SBen Gras if (*p == '\0')
288*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: filename expected", linenum, fname);
289f789fee2SBen Gras cmd->t = duptoeol(p, "w command");
290f789fee2SBen Gras if (aflag)
291f789fee2SBen Gras cmd->u.fd = -1;
292f789fee2SBen Gras else if ((cmd->u.fd = open(p,
293f789fee2SBen Gras O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
294f789fee2SBen Gras DEFFILEMODE)) == -1)
295*0a6a1f1dSLionel Sambuc err(1, "%s", p);
296f789fee2SBen Gras break;
297f789fee2SBen Gras case RFILE: /* r */
298f789fee2SBen Gras p++;
299f789fee2SBen Gras EATSPACE();
300f789fee2SBen Gras if (*p == '\0')
301*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: filename expected", linenum, fname);
302f789fee2SBen Gras else
303f789fee2SBen Gras cmd->t = duptoeol(p, "read command");
304f789fee2SBen Gras break;
305f789fee2SBen Gras case BRANCH: /* b t */
306f789fee2SBen Gras p++;
307f789fee2SBen Gras EATSPACE();
308f789fee2SBen Gras if (*p == '\0')
309f789fee2SBen Gras cmd->t = NULL;
310f789fee2SBen Gras else
311f789fee2SBen Gras cmd->t = duptoeol(p, "branch");
312f789fee2SBen Gras break;
313f789fee2SBen Gras case LABEL: /* : */
314f789fee2SBen Gras p++;
315f789fee2SBen Gras EATSPACE();
316f789fee2SBen Gras cmd->t = duptoeol(p, "label");
317f789fee2SBen Gras if (strlen(p) == 0)
318*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: empty label", linenum, fname);
319f789fee2SBen Gras enterlabel(cmd);
320f789fee2SBen Gras break;
321f789fee2SBen Gras case SUBST: /* s */
322f789fee2SBen Gras p++;
323f789fee2SBen Gras if (*p == '\0' || *p == '\\')
324*0a6a1f1dSLionel Sambuc errx(1,
325*0a6a1f1dSLionel Sambuc "%lu: %s: substitute pattern can not be delimited by newline or backslash",
326*0a6a1f1dSLionel Sambuc linenum, fname);
327*0a6a1f1dSLionel Sambuc cmd->u.s = xcalloc(1, sizeof(struct s_subst));
328*0a6a1f1dSLionel Sambuc p = compile_delimited(p, re, 0);
329f789fee2SBen Gras if (p == NULL)
330*0a6a1f1dSLionel Sambuc errx(1,
331*0a6a1f1dSLionel Sambuc "%lu: %s: unterminated substitute pattern", linenum, fname);
332*0a6a1f1dSLionel Sambuc
333*0a6a1f1dSLionel Sambuc /* Compile RE with no case sensitivity temporarily */
334*0a6a1f1dSLionel Sambuc if (*re == '\0')
335*0a6a1f1dSLionel Sambuc cmd->u.s->re = NULL;
336*0a6a1f1dSLionel Sambuc else
337*0a6a1f1dSLionel Sambuc cmd->u.s->re = compile_re(re, 0);
338f789fee2SBen Gras --p;
339f789fee2SBen Gras p = compile_subst(p, cmd->u.s);
340f789fee2SBen Gras p = compile_flags(p, cmd->u.s);
341*0a6a1f1dSLionel Sambuc
342*0a6a1f1dSLionel Sambuc /* Recompile RE with case sensitivity from "I" flag if any */
343*0a6a1f1dSLionel Sambuc if (*re == '\0')
344*0a6a1f1dSLionel Sambuc cmd->u.s->re = NULL;
345*0a6a1f1dSLionel Sambuc else
346*0a6a1f1dSLionel Sambuc cmd->u.s->re = compile_re(re, cmd->u.s->icase);
347f789fee2SBen Gras EATSPACE();
348f789fee2SBen Gras if (*p == ';') {
349f789fee2SBen Gras p++;
350f789fee2SBen Gras link = &cmd->next;
351f789fee2SBen Gras goto semicolon;
352f789fee2SBen Gras }
353f789fee2SBen Gras break;
354f789fee2SBen Gras case TR: /* y */
355f789fee2SBen Gras p++;
356*0a6a1f1dSLionel Sambuc p = compile_tr(p, &cmd->u.y);
357f789fee2SBen Gras EATSPACE();
358*0a6a1f1dSLionel Sambuc switch (*p) {
359*0a6a1f1dSLionel Sambuc case ';':
360f789fee2SBen Gras p++;
361f789fee2SBen Gras link = &cmd->next;
362f789fee2SBen Gras goto semicolon;
363*0a6a1f1dSLionel Sambuc case '}':
364*0a6a1f1dSLionel Sambuc goto semicolon;
365*0a6a1f1dSLionel Sambuc case '\0':
366*0a6a1f1dSLionel Sambuc break;
367*0a6a1f1dSLionel Sambuc default:
368*0a6a1f1dSLionel Sambuc errx(1,
369*0a6a1f1dSLionel Sambuc "%lu: %s: extra text at the end of a transform command", linenum, fname);
370f789fee2SBen Gras }
371f789fee2SBen Gras if (*p)
372f789fee2SBen Gras break;
373f789fee2SBen Gras }
374f789fee2SBen Gras }
375f789fee2SBen Gras }
376f789fee2SBen Gras
377f789fee2SBen Gras /*
378*0a6a1f1dSLionel Sambuc * Get a delimited string. P points to the delimeter of the string; d points
379f789fee2SBen Gras * to a buffer area. Newline and delimiter escapes are processed; other
380f789fee2SBen Gras * escapes are ignored.
381f789fee2SBen Gras *
382f789fee2SBen Gras * Returns a pointer to the first character after the final delimiter or NULL
383f789fee2SBen Gras * in the case of a non-terminated string. The character array d is filled
384f789fee2SBen Gras * with the processed string.
385f789fee2SBen Gras */
386f789fee2SBen Gras static char *
compile_delimited(char * p,char * d,int is_tr)387*0a6a1f1dSLionel Sambuc compile_delimited(char *p, char *d, int is_tr)
388f789fee2SBen Gras {
389f789fee2SBen Gras char c;
390f789fee2SBen Gras
391f789fee2SBen Gras c = *p++;
392f789fee2SBen Gras if (c == '\0')
393f789fee2SBen Gras return (NULL);
394f789fee2SBen Gras else if (c == '\\')
395*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: \\ can not be used as a string delimiter",
396*0a6a1f1dSLionel Sambuc linenum, fname);
397f789fee2SBen Gras else if (c == '\n')
398*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: newline can not be used as a string delimiter",
399*0a6a1f1dSLionel Sambuc linenum, fname);
400f789fee2SBen Gras while (*p) {
401*0a6a1f1dSLionel Sambuc if (*p == '[' && *p != c) {
402f789fee2SBen Gras if ((d = compile_ccl(&p, d)) == NULL)
403*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
404f789fee2SBen Gras continue;
405f789fee2SBen Gras } else if (*p == '\\' && p[1] == '[') {
406f789fee2SBen Gras *d++ = *p++;
407f789fee2SBen Gras } else if (*p == '\\' && p[1] == c)
408f789fee2SBen Gras p++;
409f789fee2SBen Gras else if (*p == '\\' && p[1] == 'n') {
410f789fee2SBen Gras *d++ = '\n';
411f789fee2SBen Gras p += 2;
412f789fee2SBen Gras continue;
413*0a6a1f1dSLionel Sambuc } else if (*p == '\\' && p[1] == '\\') {
414*0a6a1f1dSLionel Sambuc if (is_tr)
415*0a6a1f1dSLionel Sambuc p++;
416*0a6a1f1dSLionel Sambuc else
417f789fee2SBen Gras *d++ = *p++;
418*0a6a1f1dSLionel Sambuc } else if (*p == c) {
419f789fee2SBen Gras *d = '\0';
420f789fee2SBen Gras return (p + 1);
421f789fee2SBen Gras }
422f789fee2SBen Gras *d++ = *p++;
423f789fee2SBen Gras }
424f789fee2SBen Gras return (NULL);
425f789fee2SBen Gras }
426f789fee2SBen Gras
427f789fee2SBen Gras
428f789fee2SBen Gras /* compile_ccl: expand a POSIX character class */
429f789fee2SBen Gras static char *
compile_ccl(char ** sp,char * t)430f789fee2SBen Gras compile_ccl(char **sp, char *t)
431f789fee2SBen Gras {
432f789fee2SBen Gras int c, d;
433f789fee2SBen Gras char *s = *sp;
434f789fee2SBen Gras
435f789fee2SBen Gras *t++ = *s++;
436f789fee2SBen Gras if (*s == '^')
437f789fee2SBen Gras *t++ = *s++;
438f789fee2SBen Gras if (*s == ']')
439f789fee2SBen Gras *t++ = *s++;
440f789fee2SBen Gras for (; *s && (*t = *s) != ']'; s++, t++)
441f789fee2SBen Gras if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
442f789fee2SBen Gras *++t = *++s, t++, s++;
443f789fee2SBen Gras for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
444f789fee2SBen Gras if ((c = *s) == '\0')
445f789fee2SBen Gras return NULL;
446*0a6a1f1dSLionel Sambuc }
447f789fee2SBen Gras return (*s == ']') ? *sp = ++s, ++t : NULL;
448f789fee2SBen Gras }
449f789fee2SBen Gras
450f789fee2SBen Gras /*
451*0a6a1f1dSLionel Sambuc * Compiles the regular expression in RE and returns a pointer to the compiled
452*0a6a1f1dSLionel Sambuc * regular expression.
453f789fee2SBen Gras * Cflags are passed to regcomp.
454f789fee2SBen Gras */
455*0a6a1f1dSLionel Sambuc static regex_t *
compile_re(char * re,int case_insensitive)456*0a6a1f1dSLionel Sambuc compile_re(char *re, int case_insensitive)
457f789fee2SBen Gras {
458*0a6a1f1dSLionel Sambuc regex_t *rep;
459*0a6a1f1dSLionel Sambuc int eval, flags;
460f789fee2SBen Gras
461*0a6a1f1dSLionel Sambuc
462*0a6a1f1dSLionel Sambuc flags = rflags;
463*0a6a1f1dSLionel Sambuc if (case_insensitive)
464*0a6a1f1dSLionel Sambuc flags |= REG_ICASE;
465*0a6a1f1dSLionel Sambuc rep = xmalloc(sizeof(regex_t));
466*0a6a1f1dSLionel Sambuc if ((eval = regcomp(rep, re, flags)) != 0)
467*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: RE error: %s",
468*0a6a1f1dSLionel Sambuc linenum, fname, strregerror(eval, rep));
469*0a6a1f1dSLionel Sambuc if (maxnsub < rep->re_nsub)
470*0a6a1f1dSLionel Sambuc maxnsub = rep->re_nsub;
471*0a6a1f1dSLionel Sambuc return (rep);
472f789fee2SBen Gras }
473f789fee2SBen Gras
474f789fee2SBen Gras /*
475f789fee2SBen Gras * Compile the substitution string of a regular expression and set res to
476f789fee2SBen Gras * point to a saved copy of it. Nsub is the number of parenthesized regular
477f789fee2SBen Gras * expressions.
478f789fee2SBen Gras */
479f789fee2SBen Gras static char *
compile_subst(char * p,struct s_subst * s)480f789fee2SBen Gras compile_subst(char *p, struct s_subst *s)
481f789fee2SBen Gras {
482*0a6a1f1dSLionel Sambuc static char lbuf[_POSIX2_LINE_MAX + 1];
483*0a6a1f1dSLionel Sambuc size_t asize, size;
484*0a6a1f1dSLionel Sambuc u_char ref;
485f789fee2SBen Gras char c, *text, *op, *sp;
486*0a6a1f1dSLionel Sambuc int more = 1, sawesc = 0;
487f789fee2SBen Gras
488f789fee2SBen Gras c = *p++; /* Terminator character */
489f789fee2SBen Gras if (c == '\0')
490f789fee2SBen Gras return (NULL);
491f789fee2SBen Gras
492f789fee2SBen Gras s->maxbref = 0;
493f789fee2SBen Gras s->linenum = linenum;
494*0a6a1f1dSLionel Sambuc asize = 2 * _POSIX2_LINE_MAX + 1;
495*0a6a1f1dSLionel Sambuc text = xmalloc(asize);
496*0a6a1f1dSLionel Sambuc size = 0;
497f789fee2SBen Gras do {
498f789fee2SBen Gras op = sp = text + size;
499f789fee2SBen Gras for (; *p; p++) {
500f789fee2SBen Gras if (*p == '\\' || sawesc) {
501f789fee2SBen Gras /*
502f789fee2SBen Gras * If this is a continuation from the last
503f789fee2SBen Gras * buffer, we won't have a character to
504f789fee2SBen Gras * skip over.
505f789fee2SBen Gras */
506f789fee2SBen Gras if (sawesc)
507f789fee2SBen Gras sawesc = 0;
508f789fee2SBen Gras else
509f789fee2SBen Gras p++;
510f789fee2SBen Gras
511f789fee2SBen Gras if (*p == '\0') {
512f789fee2SBen Gras /*
513f789fee2SBen Gras * This escaped character is continued
514f789fee2SBen Gras * in the next part of the line. Note
515f789fee2SBen Gras * this fact, then cause the loop to
516f789fee2SBen Gras * exit w/ normal EOL case and reenter
517f789fee2SBen Gras * above with the new buffer.
518f789fee2SBen Gras */
519f789fee2SBen Gras sawesc = 1;
520f789fee2SBen Gras p--;
521f789fee2SBen Gras continue;
522f789fee2SBen Gras } else if (strchr("123456789", *p) != NULL) {
523f789fee2SBen Gras *sp++ = '\\';
524*0a6a1f1dSLionel Sambuc ref = (u_char)(*p - '0');
525f789fee2SBen Gras if (s->re != NULL &&
526*0a6a1f1dSLionel Sambuc ref > s->re->re_nsub)
527*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: \\%c not defined in the RE",
528*0a6a1f1dSLionel Sambuc linenum, fname, *p);
529f789fee2SBen Gras if (s->maxbref < ref)
530f789fee2SBen Gras s->maxbref = ref;
531f789fee2SBen Gras } else if (*p == '&' || *p == '\\')
532f789fee2SBen Gras *sp++ = '\\';
533f789fee2SBen Gras } else if (*p == c) {
534*0a6a1f1dSLionel Sambuc if (*++p == '\0' && more) {
535*0a6a1f1dSLionel Sambuc if (cu_fgets(lbuf, sizeof(lbuf), &more))
536*0a6a1f1dSLionel Sambuc p = lbuf;
537*0a6a1f1dSLionel Sambuc }
538f789fee2SBen Gras *sp++ = '\0';
539*0a6a1f1dSLionel Sambuc size += (size_t)(sp - op);
540f789fee2SBen Gras s->new = xrealloc(text, size);
541f789fee2SBen Gras return (p);
542f789fee2SBen Gras } else if (*p == '\n') {
543*0a6a1f1dSLionel Sambuc errx(1,
544*0a6a1f1dSLionel Sambuc "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
545f789fee2SBen Gras /* NOTREACHED */
546f789fee2SBen Gras }
547f789fee2SBen Gras *sp++ = *p;
548f789fee2SBen Gras }
549*0a6a1f1dSLionel Sambuc size += (size_t)(sp - op);
550*0a6a1f1dSLionel Sambuc if (asize - size < _POSIX2_LINE_MAX + 1) {
551*0a6a1f1dSLionel Sambuc asize *= 2;
552*0a6a1f1dSLionel Sambuc text = xrealloc(text, asize);
553*0a6a1f1dSLionel Sambuc }
554*0a6a1f1dSLionel Sambuc } while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
555*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unterminated substitute in regular expression",
556*0a6a1f1dSLionel Sambuc linenum, fname);
557f789fee2SBen Gras /* NOTREACHED */
558f789fee2SBen Gras }
559f789fee2SBen Gras
560f789fee2SBen Gras /*
561f789fee2SBen Gras * Compile the flags of the s command
562f789fee2SBen Gras */
563f789fee2SBen Gras static char *
compile_flags(char * p,struct s_subst * s)564f789fee2SBen Gras compile_flags(char *p, struct s_subst *s)
565f789fee2SBen Gras {
566f789fee2SBen Gras int gn; /* True if we have seen g or n */
567*0a6a1f1dSLionel Sambuc unsigned long nval;
568*0a6a1f1dSLionel Sambuc char wfile[_POSIX2_LINE_MAX + 1], *q;
569f789fee2SBen Gras
570f789fee2SBen Gras s->n = 1; /* Default */
571f789fee2SBen Gras s->p = 0;
572f789fee2SBen Gras s->wfile = NULL;
573f789fee2SBen Gras s->wfd = -1;
574*0a6a1f1dSLionel Sambuc s->icase = 0;
575f789fee2SBen Gras for (gn = 0;;) {
576f789fee2SBen Gras EATSPACE(); /* EXTENSION */
577f789fee2SBen Gras switch (*p) {
578f789fee2SBen Gras case 'g':
579f789fee2SBen Gras if (gn)
580*0a6a1f1dSLionel Sambuc errx(1,
581*0a6a1f1dSLionel Sambuc "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
582f789fee2SBen Gras gn = 1;
583f789fee2SBen Gras s->n = 0;
584f789fee2SBen Gras break;
585f789fee2SBen Gras case '\0':
586f789fee2SBen Gras case '\n':
587f789fee2SBen Gras case ';':
588f789fee2SBen Gras return (p);
589f789fee2SBen Gras case 'p':
590f789fee2SBen Gras s->p = 1;
591f789fee2SBen Gras break;
592*0a6a1f1dSLionel Sambuc case 'i':
593*0a6a1f1dSLionel Sambuc case 'I':
594*0a6a1f1dSLionel Sambuc s->icase = 1;
595*0a6a1f1dSLionel Sambuc break;
596f789fee2SBen Gras case '1': case '2': case '3':
597f789fee2SBen Gras case '4': case '5': case '6':
598f789fee2SBen Gras case '7': case '8': case '9':
599f789fee2SBen Gras if (gn)
600*0a6a1f1dSLionel Sambuc errx(1,
601*0a6a1f1dSLionel Sambuc "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
602f789fee2SBen Gras gn = 1;
603*0a6a1f1dSLionel Sambuc errno = 0;
604*0a6a1f1dSLionel Sambuc nval = strtoul(p, &p, 10);
605*0a6a1f1dSLionel Sambuc if (errno == ERANGE || nval > INT_MAX)
606*0a6a1f1dSLionel Sambuc errx(1,
607*0a6a1f1dSLionel Sambuc "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
608*0a6a1f1dSLionel Sambuc s->n = (int)nval;
609f789fee2SBen Gras p--;
610f789fee2SBen Gras break;
611f789fee2SBen Gras case 'w':
612f789fee2SBen Gras p++;
613f789fee2SBen Gras #ifdef HISTORIC_PRACTICE
614f789fee2SBen Gras if (*p != ' ') {
615*0a6a1f1dSLionel Sambuc warnx("%lu: %s: space missing before w wfile", linenum, fname);
616f789fee2SBen Gras return (p);
617f789fee2SBen Gras }
618f789fee2SBen Gras #endif
619f789fee2SBen Gras EATSPACE();
620f789fee2SBen Gras q = wfile;
621f789fee2SBen Gras while (*p) {
622f789fee2SBen Gras if (*p == '\n')
623f789fee2SBen Gras break;
624f789fee2SBen Gras *q++ = *p++;
625f789fee2SBen Gras }
626f789fee2SBen Gras *q = '\0';
627f789fee2SBen Gras if (q == wfile)
628*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: no wfile specified", linenum, fname);
629f789fee2SBen Gras s->wfile = strdup(wfile);
630f789fee2SBen Gras if (!aflag && (s->wfd = open(wfile,
631f789fee2SBen Gras O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
632f789fee2SBen Gras DEFFILEMODE)) == -1)
633*0a6a1f1dSLionel Sambuc err(1, "%s", wfile);
634f789fee2SBen Gras return (p);
635f789fee2SBen Gras default:
636*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: bad flag in substitute command: '%c'",
637*0a6a1f1dSLionel Sambuc linenum, fname, *p);
638f789fee2SBen Gras break;
639f789fee2SBen Gras }
640f789fee2SBen Gras p++;
641f789fee2SBen Gras }
642f789fee2SBen Gras }
643f789fee2SBen Gras
644f789fee2SBen Gras /*
645f789fee2SBen Gras * Compile a translation set of strings into a lookup table.
646f789fee2SBen Gras */
647f789fee2SBen Gras static char *
compile_tr(char * p,struct s_tr ** py)648*0a6a1f1dSLionel Sambuc compile_tr(char *p, struct s_tr **py)
649f789fee2SBen Gras {
650*0a6a1f1dSLionel Sambuc struct s_tr *y;
651*0a6a1f1dSLionel Sambuc size_t i;
652*0a6a1f1dSLionel Sambuc const char *op, *np;
653*0a6a1f1dSLionel Sambuc char old[_POSIX2_LINE_MAX + 1];
654*0a6a1f1dSLionel Sambuc char new[_POSIX2_LINE_MAX + 1];
655*0a6a1f1dSLionel Sambuc size_t oclen, oldlen, nclen, newlen;
656*0a6a1f1dSLionel Sambuc mbstate_t mbs1, mbs2;
657*0a6a1f1dSLionel Sambuc
658*0a6a1f1dSLionel Sambuc *py = y = xmalloc(sizeof(*y));
659*0a6a1f1dSLionel Sambuc y->multis = NULL;
660*0a6a1f1dSLionel Sambuc y->nmultis = 0;
661f789fee2SBen Gras
662f789fee2SBen Gras if (*p == '\0' || *p == '\\')
663*0a6a1f1dSLionel Sambuc errx(1,
664*0a6a1f1dSLionel Sambuc "%lu: %s: transform pattern can not be delimited by newline or backslash",
665*0a6a1f1dSLionel Sambuc linenum, fname);
666*0a6a1f1dSLionel Sambuc p = compile_delimited(p, old, 1);
667*0a6a1f1dSLionel Sambuc if (p == NULL)
668*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unterminated transform source string",
669*0a6a1f1dSLionel Sambuc linenum, fname);
670*0a6a1f1dSLionel Sambuc p = compile_delimited(p - 1, new, 1);
671*0a6a1f1dSLionel Sambuc if (p == NULL)
672*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unterminated transform target string",
673*0a6a1f1dSLionel Sambuc linenum, fname);
674f789fee2SBen Gras EATSPACE();
675*0a6a1f1dSLionel Sambuc op = old;
676*0a6a1f1dSLionel Sambuc oldlen = mbsrtowcs(NULL, &op, 0, NULL);
677*0a6a1f1dSLionel Sambuc if (oldlen == (size_t)-1)
678*0a6a1f1dSLionel Sambuc err(1, NULL);
679*0a6a1f1dSLionel Sambuc np = new;
680*0a6a1f1dSLionel Sambuc newlen = mbsrtowcs(NULL, &np, 0, NULL);
681*0a6a1f1dSLionel Sambuc if (newlen == (size_t)-1)
682*0a6a1f1dSLionel Sambuc err(1, NULL);
683*0a6a1f1dSLionel Sambuc if (newlen != oldlen)
684*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: transform strings are not the same length",
685*0a6a1f1dSLionel Sambuc linenum, fname);
686*0a6a1f1dSLionel Sambuc if (MB_CUR_MAX == 1) {
687*0a6a1f1dSLionel Sambuc /*
688*0a6a1f1dSLionel Sambuc * The single-byte encoding case is easy: generate a
689*0a6a1f1dSLionel Sambuc * lookup table.
690*0a6a1f1dSLionel Sambuc */
691f789fee2SBen Gras for (i = 0; i <= UCHAR_MAX; i++)
692*0a6a1f1dSLionel Sambuc y->bytetab[i] = (u_char)i;
693*0a6a1f1dSLionel Sambuc for (; *op; op++, np++)
694*0a6a1f1dSLionel Sambuc y->bytetab[(u_char)*op] = (u_char)*np;
695*0a6a1f1dSLionel Sambuc } else {
696*0a6a1f1dSLionel Sambuc /*
697*0a6a1f1dSLionel Sambuc * Multi-byte encoding case: generate a lookup table as
698*0a6a1f1dSLionel Sambuc * above, but only for single-byte characters. The first
699*0a6a1f1dSLionel Sambuc * bytes of multi-byte characters have their lookup table
700*0a6a1f1dSLionel Sambuc * entries set to 0, which causes do_tr() to search through
701*0a6a1f1dSLionel Sambuc * an auxiliary vector of multi-byte mappings.
702*0a6a1f1dSLionel Sambuc */
703*0a6a1f1dSLionel Sambuc memset(&mbs1, 0, sizeof(mbs1));
704*0a6a1f1dSLionel Sambuc memset(&mbs2, 0, sizeof(mbs2));
705*0a6a1f1dSLionel Sambuc for (i = 0; i <= UCHAR_MAX; i++)
706*0a6a1f1dSLionel Sambuc y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
707*0a6a1f1dSLionel Sambuc while (*op != '\0') {
708*0a6a1f1dSLionel Sambuc oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
709*0a6a1f1dSLionel Sambuc if (oclen == (size_t)-1 || oclen == (size_t)-2)
710*0a6a1f1dSLionel Sambuc errc(1, EILSEQ, NULL);
711*0a6a1f1dSLionel Sambuc nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
712*0a6a1f1dSLionel Sambuc if (nclen == (size_t)-1 || nclen == (size_t)-2)
713*0a6a1f1dSLionel Sambuc errc(1, EILSEQ, NULL);
714*0a6a1f1dSLionel Sambuc if (oclen == 1 && nclen == 1)
715*0a6a1f1dSLionel Sambuc y->bytetab[(u_char)*op] = (u_char)*np;
716*0a6a1f1dSLionel Sambuc else {
717*0a6a1f1dSLionel Sambuc y->bytetab[(u_char)*op] = 0;
718*0a6a1f1dSLionel Sambuc y->multis = xrealloc(y->multis,
719*0a6a1f1dSLionel Sambuc (y->nmultis + 1) * sizeof(*y->multis));
720*0a6a1f1dSLionel Sambuc i = y->nmultis++;
721*0a6a1f1dSLionel Sambuc y->multis[i].fromlen = oclen;
722*0a6a1f1dSLionel Sambuc memcpy(y->multis[i].from, op, oclen);
723*0a6a1f1dSLionel Sambuc y->multis[i].tolen = nclen;
724*0a6a1f1dSLionel Sambuc memcpy(y->multis[i].to, np, nclen);
725*0a6a1f1dSLionel Sambuc }
726*0a6a1f1dSLionel Sambuc op += oclen;
727*0a6a1f1dSLionel Sambuc np += nclen;
728*0a6a1f1dSLionel Sambuc }
729*0a6a1f1dSLionel Sambuc }
730f789fee2SBen Gras return (p);
731f789fee2SBen Gras }
732f789fee2SBen Gras
733f789fee2SBen Gras /*
734*0a6a1f1dSLionel Sambuc * Compile the text following an a or i command.
735f789fee2SBen Gras */
736f789fee2SBen Gras static char *
compile_text(void)737f789fee2SBen Gras compile_text(void)
738f789fee2SBen Gras {
739*0a6a1f1dSLionel Sambuc size_t asize, size;
740*0a6a1f1dSLionel Sambuc int esc_nl;
741*0a6a1f1dSLionel Sambuc char *text, *p, *op, *s;
742*0a6a1f1dSLionel Sambuc char lbuf[_POSIX2_LINE_MAX + 1];
743f789fee2SBen Gras
744*0a6a1f1dSLionel Sambuc asize = 2 * _POSIX2_LINE_MAX + 1;
745*0a6a1f1dSLionel Sambuc text = xmalloc(asize);
746*0a6a1f1dSLionel Sambuc size = 0;
747*0a6a1f1dSLionel Sambuc while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
748f789fee2SBen Gras op = s = text + size;
749*0a6a1f1dSLionel Sambuc p = lbuf;
750*0a6a1f1dSLionel Sambuc EATSPACE();
751*0a6a1f1dSLionel Sambuc for (esc_nl = 0; *p != '\0'; p++) {
752*0a6a1f1dSLionel Sambuc if (*p == '\\' && p[1] != '\0' && *++p == '\n')
753*0a6a1f1dSLionel Sambuc esc_nl = 1;
754f789fee2SBen Gras *s++ = *p;
755f789fee2SBen Gras }
756*0a6a1f1dSLionel Sambuc size += (size_t)(s - op);
757*0a6a1f1dSLionel Sambuc if (!esc_nl) {
758f789fee2SBen Gras *s = '\0';
759f789fee2SBen Gras break;
760f789fee2SBen Gras }
761*0a6a1f1dSLionel Sambuc if (asize - size < _POSIX2_LINE_MAX + 1) {
762*0a6a1f1dSLionel Sambuc asize *= 2;
763*0a6a1f1dSLionel Sambuc text = xrealloc(text, asize);
764f789fee2SBen Gras }
765*0a6a1f1dSLionel Sambuc }
766*0a6a1f1dSLionel Sambuc text[size] = '\0';
767*0a6a1f1dSLionel Sambuc p = xrealloc(text, size + 1);
768*0a6a1f1dSLionel Sambuc return (p);
769f789fee2SBen Gras }
770f789fee2SBen Gras
771f789fee2SBen Gras /*
772f789fee2SBen Gras * Get an address and return a pointer to the first character after
773f789fee2SBen Gras * it. Fill the structure pointed to according to the address.
774f789fee2SBen Gras */
775f789fee2SBen Gras static char *
compile_addr(char * p,struct s_addr * a)776f789fee2SBen Gras compile_addr(char *p, struct s_addr *a)
777f789fee2SBen Gras {
778*0a6a1f1dSLionel Sambuc char *end, re[_POSIX2_LINE_MAX + 1];
779*0a6a1f1dSLionel Sambuc int icase;
780f789fee2SBen Gras
781*0a6a1f1dSLionel Sambuc icase = 0;
782*0a6a1f1dSLionel Sambuc
783*0a6a1f1dSLionel Sambuc a->type = 0;
784f789fee2SBen Gras switch (*p) {
785f789fee2SBen Gras case '\\': /* Context address */
786f789fee2SBen Gras ++p;
787f789fee2SBen Gras /* FALLTHROUGH */
788f789fee2SBen Gras case '/': /* Context address */
789*0a6a1f1dSLionel Sambuc p = compile_delimited(p, re, 0);
790f789fee2SBen Gras if (p == NULL)
791*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
792*0a6a1f1dSLionel Sambuc /* Check for case insensitive regexp flag */
793*0a6a1f1dSLionel Sambuc if (*p == 'I') {
794*0a6a1f1dSLionel Sambuc icase = 1;
795*0a6a1f1dSLionel Sambuc p++;
796*0a6a1f1dSLionel Sambuc }
797*0a6a1f1dSLionel Sambuc if (*re == '\0')
798*0a6a1f1dSLionel Sambuc a->u.r = NULL;
799*0a6a1f1dSLionel Sambuc else
800*0a6a1f1dSLionel Sambuc a->u.r = compile_re(re, icase);
801f789fee2SBen Gras a->type = AT_RE;
802f789fee2SBen Gras return (p);
803f789fee2SBen Gras
804f789fee2SBen Gras case '$': /* Last line */
805f789fee2SBen Gras a->type = AT_LAST;
806f789fee2SBen Gras return (p + 1);
807*0a6a1f1dSLionel Sambuc
808*0a6a1f1dSLionel Sambuc case '+': /* Relative line number */
809*0a6a1f1dSLionel Sambuc a->type = AT_RELLINE;
810*0a6a1f1dSLionel Sambuc p++;
811*0a6a1f1dSLionel Sambuc /* FALLTHROUGH */
812f789fee2SBen Gras /* Line number */
813f789fee2SBen Gras case '0': case '1': case '2': case '3': case '4':
814f789fee2SBen Gras case '5': case '6': case '7': case '8': case '9':
815*0a6a1f1dSLionel Sambuc if (a->type == 0)
816f789fee2SBen Gras a->type = AT_LINE;
817*0a6a1f1dSLionel Sambuc a->u.l = strtoul(p, &end, 10);
818f789fee2SBen Gras return (end);
819f789fee2SBen Gras default:
820*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: expected context address", linenum, fname);
821f789fee2SBen Gras return (NULL);
822f789fee2SBen Gras }
823f789fee2SBen Gras }
824f789fee2SBen Gras
825f789fee2SBen Gras /*
826f789fee2SBen Gras * duptoeol --
827f789fee2SBen Gras * Return a copy of all the characters up to \n or \0.
828f789fee2SBen Gras */
829f789fee2SBen Gras static char *
duptoeol(char * s,const char * ctype)830f789fee2SBen Gras duptoeol(char *s, const char *ctype)
831f789fee2SBen Gras {
832f789fee2SBen Gras size_t len;
833f789fee2SBen Gras int ws;
834*0a6a1f1dSLionel Sambuc char *p, *start;
835f789fee2SBen Gras
836f789fee2SBen Gras ws = 0;
837f789fee2SBen Gras for (start = s; *s != '\0' && *s != '\n'; ++s)
838f789fee2SBen Gras ws = isspace((unsigned char)*s);
839f789fee2SBen Gras *s = '\0';
840f789fee2SBen Gras if (ws)
841*0a6a1f1dSLionel Sambuc warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
842*0a6a1f1dSLionel Sambuc len = (size_t)(s - start + 1);
843*0a6a1f1dSLionel Sambuc p = xmalloc(len);
844*0a6a1f1dSLionel Sambuc return (memmove(p, start, len));
845f789fee2SBen Gras }
846f789fee2SBen Gras
847f789fee2SBen Gras /*
848f789fee2SBen Gras * Convert goto label names to addresses, and count a and r commands, in
849f789fee2SBen Gras * the given subset of the script. Free the memory used by labels in b
850f789fee2SBen Gras * and t commands (but not by :).
851f789fee2SBen Gras *
852f789fee2SBen Gras * TODO: Remove } nodes
853f789fee2SBen Gras */
854f789fee2SBen Gras static void
fixuplabel(struct s_command * cp,struct s_command * end)855f789fee2SBen Gras fixuplabel(struct s_command *cp, struct s_command *end)
856f789fee2SBen Gras {
857f789fee2SBen Gras
858f789fee2SBen Gras for (; cp != end; cp = cp->next)
859f789fee2SBen Gras switch (cp->code) {
860f789fee2SBen Gras case 'a':
861f789fee2SBen Gras case 'r':
862f789fee2SBen Gras appendnum++;
863f789fee2SBen Gras break;
864f789fee2SBen Gras case 'b':
865f789fee2SBen Gras case 't':
866f789fee2SBen Gras /* Resolve branch target. */
867f789fee2SBen Gras if (cp->t == NULL) {
868f789fee2SBen Gras cp->u.c = NULL;
869f789fee2SBen Gras break;
870f789fee2SBen Gras }
871f789fee2SBen Gras if ((cp->u.c = findlabel(cp->t)) == NULL)
872*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
873f789fee2SBen Gras free(cp->t);
874f789fee2SBen Gras break;
875f789fee2SBen Gras case '{':
876f789fee2SBen Gras /* Do interior commands. */
877f789fee2SBen Gras fixuplabel(cp->u.c, cp->next);
878f789fee2SBen Gras break;
879f789fee2SBen Gras }
880f789fee2SBen Gras }
881f789fee2SBen Gras
882f789fee2SBen Gras /*
883f789fee2SBen Gras * Associate the given command label for later lookup.
884f789fee2SBen Gras */
885f789fee2SBen Gras static void
enterlabel(struct s_command * cp)886f789fee2SBen Gras enterlabel(struct s_command *cp)
887f789fee2SBen Gras {
888f789fee2SBen Gras struct labhash **lhp, *lh;
889f789fee2SBen Gras u_char *p;
890f789fee2SBen Gras u_int h, c;
891f789fee2SBen Gras
892f789fee2SBen Gras for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
893f789fee2SBen Gras h = (h << 5) + h + c;
894f789fee2SBen Gras lhp = &labels[h & LHMASK];
895f789fee2SBen Gras for (lh = *lhp; lh != NULL; lh = lh->lh_next)
896f789fee2SBen Gras if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
897*0a6a1f1dSLionel Sambuc errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
898f789fee2SBen Gras lh = xmalloc(sizeof *lh);
899f789fee2SBen Gras lh->lh_next = *lhp;
900f789fee2SBen Gras lh->lh_hash = h;
901f789fee2SBen Gras lh->lh_cmd = cp;
902f789fee2SBen Gras lh->lh_ref = 0;
903f789fee2SBen Gras *lhp = lh;
904f789fee2SBen Gras }
905f789fee2SBen Gras
906f789fee2SBen Gras /*
907f789fee2SBen Gras * Find the label contained in the command l in the command linked
908f789fee2SBen Gras * list cp. L is excluded from the search. Return NULL if not found.
909f789fee2SBen Gras */
910f789fee2SBen Gras static struct s_command *
findlabel(char * name)911f789fee2SBen Gras findlabel(char *name)
912f789fee2SBen Gras {
913f789fee2SBen Gras struct labhash *lh;
914f789fee2SBen Gras u_char *p;
915f789fee2SBen Gras u_int h, c;
916f789fee2SBen Gras
917f789fee2SBen Gras for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
918f789fee2SBen Gras h = (h << 5) + h + c;
919f789fee2SBen Gras for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
920f789fee2SBen Gras if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
921f789fee2SBen Gras lh->lh_ref = 1;
922f789fee2SBen Gras return (lh->lh_cmd);
923f789fee2SBen Gras }
924f789fee2SBen Gras }
925f789fee2SBen Gras return (NULL);
926f789fee2SBen Gras }
927f789fee2SBen Gras
928f789fee2SBen Gras /*
929f789fee2SBen Gras * Warn about any unused labels. As a side effect, release the label hash
930f789fee2SBen Gras * table space.
931f789fee2SBen Gras */
932f789fee2SBen Gras static void
uselabel(void)933f789fee2SBen Gras uselabel(void)
934f789fee2SBen Gras {
935f789fee2SBen Gras struct labhash *lh, *next;
936f789fee2SBen Gras int i;
937f789fee2SBen Gras
938f789fee2SBen Gras for (i = 0; i < LHSZ; i++) {
939f789fee2SBen Gras for (lh = labels[i]; lh != NULL; lh = next) {
940f789fee2SBen Gras next = lh->lh_next;
941f789fee2SBen Gras if (!lh->lh_ref)
942*0a6a1f1dSLionel Sambuc warnx("%lu: %s: unused label '%s'",
943*0a6a1f1dSLionel Sambuc linenum, fname, lh->lh_cmd->t);
944f789fee2SBen Gras free(lh);
945f789fee2SBen Gras }
946f789fee2SBen Gras }
947f789fee2SBen Gras }
948