xref: /openbsd-src/usr.bin/sed/compile.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: compile.c,v 1.24 2007/03/20 03:50:39 tedu Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992 Diomidis Spinellis.
5  * Copyright (c) 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis of Imperial College, University of London.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifndef lint
37 /* from: static char sccsid[] = "@(#)compile.c	8.2 (Berkeley) 4/28/95"; */
38 static const char rcsid[] = "$OpenBSD: compile.c,v 1.24 2007/03/20 03:50:39 tedu Exp $";
39 #endif /* not lint */
40 
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 
44 #include <ctype.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <limits.h>
48 #include <regex.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 
53 #include "defs.h"
54 #include "extern.h"
55 
56 #define LHSZ	128
57 #define	LHMASK	(LHSZ - 1)
58 static struct labhash {
59 	struct	labhash *lh_next;
60 	u_int	lh_hash;
61 	struct	s_command *lh_cmd;
62 	int	lh_ref;
63 } *labels[LHSZ];
64 
65 static char	 *compile_addr(char *, struct s_addr *);
66 static char	 *compile_ccl(char **, char *);
67 static char	 *compile_delimited(char *, char *);
68 static char	 *compile_flags(char *, struct s_subst *);
69 static char	 *compile_re(char *, regex_t **);
70 static char	 *compile_subst(char *, struct s_subst *);
71 static char	 *compile_text(void);
72 static char	 *compile_tr(char *, char **);
73 static struct s_command
74 		**compile_stream(struct s_command **);
75 static char	 *duptoeol(char *, char *, char **);
76 static void	  enterlabel(struct s_command *);
77 static struct s_command
78 		 *findlabel(char *);
79 static void	  fixuplabel(struct s_command *, struct s_command *);
80 static void	  uselabel(void);
81 
82 /*
83  * Command specification.  This is used to drive the command parser.
84  */
85 struct s_format {
86 	char code;				/* Command code */
87 	int naddr;				/* Number of address args */
88 	enum e_args args;			/* Argument type */
89 };
90 
91 static struct s_format cmd_fmts[] = {
92 	{'{', 2, GROUP},
93 	{'}', 0, ENDGROUP},
94 	{'a', 1, TEXT},
95 	{'b', 2, BRANCH},
96 	{'c', 2, TEXT},
97 	{'d', 2, EMPTY},
98 	{'D', 2, EMPTY},
99 	{'g', 2, EMPTY},
100 	{'G', 2, EMPTY},
101 	{'h', 2, EMPTY},
102 	{'H', 2, EMPTY},
103 	{'i', 1, TEXT},
104 	{'l', 2, EMPTY},
105 	{'n', 2, EMPTY},
106 	{'N', 2, EMPTY},
107 	{'p', 2, EMPTY},
108 	{'P', 2, EMPTY},
109 	{'q', 1, EMPTY},
110 	{'r', 1, RFILE},
111 	{'s', 2, SUBST},
112 	{'t', 2, BRANCH},
113 	{'w', 2, WFILE},
114 	{'x', 2, EMPTY},
115 	{'y', 2, TR},
116 	{'!', 2, NONSEL},
117 	{':', 0, LABEL},
118 	{'#', 0, COMMENT},
119 	{'=', 1, EMPTY},
120 	{'\0', 0, COMMENT},
121 };
122 
123 /* The compiled program. */
124 struct s_command *prog;
125 
126 /*
127  * Compile the program into prog.
128  * Initialise appends.
129  */
130 void
131 compile(void)
132 {
133 	*compile_stream(&prog) = NULL;
134 	fixuplabel(prog, NULL);
135 	uselabel();
136 	appends = xmalloc(sizeof(struct s_appends) * appendnum);
137 	match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
138 }
139 
140 #define EATSPACE() do {							\
141 	if (p)								\
142 		while (isascii(*p) && isspace(*p))			\
143 			p++;						\
144 	} while (0)
145 
146 static struct s_command **
147 compile_stream(struct s_command **link)
148 {
149 	char *p;
150 	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
151 	struct s_command *cmd, *cmd2, *stack;
152 	struct s_format *fp;
153 	int naddr;				/* Number of addresses */
154 
155 	stack = 0;
156 	for (;;) {
157 		if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
158 			if (stack != 0)
159 				err(COMPILE, "unexpected EOF (pending }'s)");
160 			return (link);
161 		}
162 
163 semicolon:	EATSPACE();
164 		if (*p == '#' || *p == '\0')
165 			continue;
166 		if (*p == ';') {
167 			p++;
168 			goto semicolon;
169 		}
170 		*link = cmd = xmalloc(sizeof(struct s_command));
171 		link = &cmd->next;
172 		cmd->nonsel = cmd->inrange = 0;
173 		/* First parse the addresses */
174 		naddr = 0;
175 
176 /* Valid characters to start an address */
177 #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
178 		if (addrchar(*p)) {
179 			naddr++;
180 			cmd->a1 = xmalloc(sizeof(struct s_addr));
181 			p = compile_addr(p, cmd->a1);
182 			EATSPACE();				/* EXTENSION */
183 			if (*p == ',') {
184 				p++;
185 				EATSPACE();			/* EXTENSION */
186 				naddr++;
187 				cmd->a2 = xmalloc(sizeof(struct s_addr));
188 				p = compile_addr(p, cmd->a2);
189 				EATSPACE();
190 			} else {
191 				cmd->a2 = 0;
192 			}
193 		} else {
194 			cmd->a1 = cmd->a2 = 0;
195 		}
196 
197 nonsel:		/* Now parse the command */
198 		if (!*p)
199 			err(COMPILE, "command expected");
200 		cmd->code = *p;
201 		for (fp = cmd_fmts; fp->code; fp++)
202 			if (fp->code == *p)
203 				break;
204 		if (!fp->code)
205 			err(COMPILE, "invalid command code %c", *p);
206 		if (naddr > fp->naddr)
207 			err(COMPILE,
208 			    "command %c expects up to %d address(es), found %d",
209 			    *p, fp->naddr, naddr);
210 		switch (fp->args) {
211 		case NONSEL:			/* ! */
212 			p++;
213 			EATSPACE();
214 			cmd->nonsel = ! cmd->nonsel;
215 			goto nonsel;
216 		case GROUP:			/* { */
217 			p++;
218 			EATSPACE();
219 			cmd->next = stack;
220 			stack = cmd;
221 			link = &cmd->u.c;
222 			if (*p)
223 				goto semicolon;
224 			break;
225 		case ENDGROUP:
226 			/*
227 			 * Short-circuit command processing, since end of
228 			 * group is really just a noop.
229 			 */
230 			cmd->nonsel = 1;
231 			if (stack == 0)
232 				err(COMPILE, "unexpected }");
233 			cmd2 = stack;
234 			stack = cmd2->next;
235 			cmd2->next = cmd;
236 			/*FALLTHROUGH*/
237 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
238 			p++;
239 			EATSPACE();
240 			if (*p == ';') {
241 				p++;
242 				link = &cmd->next;
243 				goto semicolon;
244 			}
245 			if (*p)
246 				err(COMPILE,
247 "extra characters at the end of %c command", cmd->code);
248 			break;
249 		case TEXT:			/* a c i */
250 			p++;
251 			EATSPACE();
252 			if (*p != '\\')
253 				err(COMPILE, "command %c expects \\ followed by"
254 				    " text", cmd->code);
255 			p++;
256 			EATSPACE();
257 			if (*p)
258 				err(COMPILE, "extra characters after \\ at the"
259 				    " end of %c command", cmd->code);
260 			cmd->t = compile_text();
261 			break;
262 		case COMMENT:			/* \0 # */
263 			break;
264 		case WFILE:			/* w */
265 			p++;
266 			EATSPACE();
267 			if (*p == '\0')
268 				err(COMPILE, "filename expected");
269 			cmd->t = duptoeol(p, "w command", NULL);
270 			if (aflag)
271 				cmd->u.fd = -1;
272 			else if ((cmd->u.fd = open(p,
273 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
274 			    DEFFILEMODE)) == -1)
275 				err(FATAL, "%s: %s", p, strerror(errno));
276 			break;
277 		case RFILE:			/* r */
278 			p++;
279 			EATSPACE();
280 			cmd->t = duptoeol(p, "read command", NULL);
281 			break;
282 		case BRANCH:			/* b t */
283 			p++;
284 			EATSPACE();
285 			if (*p == '\0')
286 				cmd->t = NULL;
287 			else
288 				cmd->t = duptoeol(p, "branch", &p);
289 			if (*p == ';') {
290 				p++;
291 				goto semicolon;
292 			}
293 			break;
294 		case LABEL:			/* : */
295 			p++;
296 			EATSPACE();
297 			cmd->t = duptoeol(p, "label", &p);
298 			if (strlen(cmd->t) == 0)
299 				err(COMPILE, "empty label");
300 			enterlabel(cmd);
301 			if (*p == ';') {
302 				p++;
303 				goto semicolon;
304 			}
305 			break;
306 		case SUBST:			/* s */
307 			p++;
308 			if (*p == '\0' || *p == '\\')
309 				err(COMPILE, "substitute pattern can not be"
310 				    " delimited by newline or backslash");
311 			cmd->u.s = xmalloc(sizeof(struct s_subst));
312 			p = compile_re(p, &cmd->u.s->re);
313 			if (p == NULL)
314 				err(COMPILE, "unterminated substitute pattern");
315 			--p;
316 			p = compile_subst(p, cmd->u.s);
317 			p = compile_flags(p, cmd->u.s);
318 			EATSPACE();
319 			if (*p == ';') {
320 				p++;
321 				link = &cmd->next;
322 				goto semicolon;
323 			}
324 			break;
325 		case TR:			/* y */
326 			p++;
327 			p = compile_tr(p, (char **)&cmd->u.y);
328 			EATSPACE();
329 			if (*p == ';') {
330 				p++;
331 				link = &cmd->next;
332 				goto semicolon;
333 			}
334 			if (*p)
335 				err(COMPILE, "extra text at the end of a"
336 				    " transform command");
337 			break;
338 		}
339 	}
340 }
341 
342 /*
343  * Get a delimited string.  P points to the delimeter of the string; d points
344  * to a buffer area.  Newline and delimiter escapes are processed; other
345  * escapes are ignored.
346  *
347  * Returns a pointer to the first character after the final delimiter or NULL
348  * in the case of a non-terminated string.  The character array d is filled
349  * with the processed string.
350  */
351 static char *
352 compile_delimited(char *p, char *d)
353 {
354 	char c;
355 
356 	c = *p++;
357 	if (c == '\0')
358 		return (NULL);
359 	else if (c == '\\')
360 		err(COMPILE, "\\ can not be used as a string delimiter");
361 	else if (c == '\n')
362 		err(COMPILE, "newline can not be used as a string delimiter");
363 	while (*p) {
364 		if (*p == '[') {
365 			if ((d = compile_ccl(&p, d)) == NULL)
366 				err(COMPILE, "unbalanced brackets ([])");
367 			continue;
368 		} else if (*p == '\\' && p[1] == '[') {
369 			*d++ = *p++;
370 		} else if (*p == '\\' && p[1] == c) {
371 			p++;
372 		} else if (*p == '\\' && p[1] == 'n') {
373 			*d++ = '\n';
374 			p += 2;
375 			continue;
376 		} else if (*p == '\\' && p[1] == '\\') {
377 			*d++ = *p++;
378 		} else if (*p == c) {
379 			*d = '\0';
380 			return (p + 1);
381 		}
382 		*d++ = *p++;
383 	}
384 	return (NULL);
385 }
386 
387 
388 /* compile_ccl: expand a POSIX character class */
389 static char *
390 compile_ccl(char **sp, char *t)
391 {
392 	int c, d;
393 	char *s = *sp;
394 
395 	*t++ = *s++;
396 	if (*s == '^')
397 		*t++ = *s++;
398 	if (*s == ']')
399 		*t++ = *s++;
400 	for (; *s && (*t = *s) != ']'; s++, t++)
401 		if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
402 			*++t = *++s, t++, s++;
403 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
404 				if ((c = *s) == '\0')
405 					return NULL;
406 		} else if (*s == '\\' && s[1] == 'n') {
407 			*t = '\n';
408 			s++;
409 		}
410 	if (*s == ']') {
411 		*sp = ++s;
412 		return (++t);
413 	} else {
414 		return (NULL);
415 	}
416 }
417 
418 /*
419  * Get a regular expression.  P points to the delimiter of the regular
420  * expression; repp points to the address of a regexp pointer.  Newline
421  * and delimiter escapes are processed; other escapes are ignored.
422  * Returns a pointer to the first character after the final delimiter
423  * or NULL in the case of a non terminated regular expression.  The regexp
424  * pointer is set to the compiled regular expression.
425  * Cflags are passed to regcomp.
426  */
427 static char *
428 compile_re(char *p, regex_t **repp)
429 {
430 	int eval;
431 	char re[_POSIX2_LINE_MAX + 1];
432 
433 	p = compile_delimited(p, re);
434 	if (p && strlen(re) == 0) {
435 		*repp = NULL;
436 		return (p);
437 	}
438 	*repp = xmalloc(sizeof(regex_t));
439 	if (p && (eval = regcomp(*repp, re, 0)) != 0)
440 		err(COMPILE, "RE error: %s", strregerror(eval, *repp));
441 	if (maxnsub < (*repp)->re_nsub)
442 		maxnsub = (*repp)->re_nsub;
443 	return (p);
444 }
445 
446 /*
447  * Compile the substitution string of a regular expression and set res to
448  * point to a saved copy of it.  Nsub is the number of parenthesized regular
449  * expressions.
450  */
451 static char *
452 compile_subst(char *p, struct s_subst *s)
453 {
454 	static char lbuf[_POSIX2_LINE_MAX + 1];
455 	int asize, ref, size;
456 	char c, *text, *op, *sp;
457 	int sawesc = 0;
458 
459 	c = *p++;			/* Terminator character */
460 	if (c == '\0')
461 		return (NULL);
462 
463 	s->maxbref = 0;
464 	s->linenum = linenum;
465 	asize = 2 * _POSIX2_LINE_MAX + 1;
466 	text = xmalloc(asize);
467 	size = 0;
468 	do {
469 		op = sp = text + size;
470 		for (; *p; p++) {
471 			if (*p == '\\' || sawesc) {
472 				/*
473 				 * If this is a continuation from the last
474 				 * buffer, we won't have a character to
475 				 * skip over.
476 				 */
477 				if (sawesc)
478 					sawesc = 0;
479 				else
480 					p++;
481 
482 				if (*p == '\0') {
483 					/*
484 					 * This escaped character is continued
485 					 * in the next part of the line.  Note
486 					 * this fact, then cause the loop to
487 					 * exit w/ normal EOL case and reenter
488 					 * above with the new buffer.
489 					 */
490 					sawesc = 1;
491 					p--;
492 					continue;
493 				} else if (strchr("123456789", *p) != NULL) {
494 					*sp++ = '\\';
495 					ref = *p - '0';
496 					if (s->re != NULL &&
497 					    ref > s->re->re_nsub)
498 						err(COMPILE,
499 "\\%c not defined in the RE", *p);
500 					if (s->maxbref < ref)
501 						s->maxbref = ref;
502 				} else if (*p == '&' || *p == '\\')
503 					*sp++ = '\\';
504 			} else if (*p == c) {
505 				p++;
506 				*sp++ = '\0';
507 				size += sp - op;
508 				s->new = xrealloc(text, size);
509 				return (p);
510 			} else if (*p == '\n') {
511 				err(COMPILE,
512 "unescaped newline inside substitute pattern");
513 				/* NOTREACHED */
514 			}
515 			*sp++ = *p;
516 		}
517 		size += sp - op;
518 		if (asize - size < _POSIX2_LINE_MAX + 1) {
519 			asize *= 2;
520 			text = xrealloc(text, asize);
521 		}
522 	} while (cu_fgets(p = lbuf, sizeof(lbuf)));
523 	err(COMPILE, "unterminated substitute in regular expression");
524 	/* NOTREACHED */
525 }
526 
527 /*
528  * Compile the flags of the s command
529  */
530 static char *
531 compile_flags(char *p, struct s_subst *s)
532 {
533 	int gn;			/* True if we have seen g or n */
534 	long l;
535 	char wfile[_POSIX2_LINE_MAX + 1], *q;
536 
537 	s->n = 1;				/* Default */
538 	s->p = 0;
539 	s->wfile = NULL;
540 	s->wfd = -1;
541 	for (gn = 0;;) {
542 		EATSPACE();			/* EXTENSION */
543 		switch (*p) {
544 		case 'g':
545 			if (gn)
546 				err(COMPILE, "more than one number or 'g' in"
547 				    " substitute flags");
548 			gn = 1;
549 			s->n = 0;
550 			break;
551 		case '\0':
552 		case '\n':
553 		case ';':
554 			return (p);
555 		case 'p':
556 			s->p = 1;
557 			break;
558 		case '1': case '2': case '3':
559 		case '4': case '5': case '6':
560 		case '7': case '8': case '9':
561 			if (gn)
562 				err(COMPILE, "more than one number or 'g' in"
563 				    " substitute flags");
564 			gn = 1;
565 			l = strtol(p, &p, 10);
566 			if (l <= 0 || l >= INT_MAX)
567 				err(COMPILE,
568 				    "number in substitute flags out of range");
569 			s->n = (int)l;
570 			continue;
571 		case 'w':
572 			p++;
573 #ifdef HISTORIC_PRACTICE
574 			if (*p != ' ') {
575 				err(WARNING, "space missing before w wfile");
576 				return (p);
577 			}
578 #endif
579 			EATSPACE();
580 			q = wfile;
581 			while (*p) {
582 				if (*p == '\n')
583 					break;
584 				*q++ = *p++;
585 			}
586 			*q = '\0';
587 			if (q == wfile)
588 				err(COMPILE, "no wfile specified");
589 			s->wfile = strdup(wfile);
590 			if (!aflag && (s->wfd = open(wfile,
591 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
592 			    DEFFILEMODE)) == -1)
593 				err(FATAL, "%s: %s", wfile, strerror(errno));
594 			return (p);
595 		default:
596 			err(COMPILE,
597 			    "bad flag in substitute command: '%c'", *p);
598 			break;
599 		}
600 		p++;
601 	}
602 }
603 
604 /*
605  * Compile a translation set of strings into a lookup table.
606  */
607 static char *
608 compile_tr(char *p, char **transtab)
609 {
610 	int i;
611 	char *lt, *op, *np;
612 	char old[_POSIX2_LINE_MAX + 1];
613 	char new[_POSIX2_LINE_MAX + 1];
614 
615 	if (*p == '\0' || *p == '\\')
616 		err(COMPILE,
617 "transform pattern can not be delimited by newline or backslash");
618 	p = compile_delimited(p, old);
619 	if (p == NULL) {
620 		err(COMPILE, "unterminated transform source string");
621 		return (NULL);
622 	}
623 	p = compile_delimited(--p, new);
624 	if (p == NULL) {
625 		err(COMPILE, "unterminated transform target string");
626 		return (NULL);
627 	}
628 	EATSPACE();
629 	if (strlen(new) != strlen(old)) {
630 		err(COMPILE, "transform strings are not the same length");
631 		return (NULL);
632 	}
633 	/* We assume characters are 8 bits */
634 	lt = xmalloc(UCHAR_MAX + 1);
635 	for (i = 0; i <= UCHAR_MAX; i++)
636 		lt[i] = (char)i;
637 	for (op = old, np = new; *op; op++, np++)
638 		lt[(u_char)*op] = *np;
639 	*transtab = lt;
640 	return (p);
641 }
642 
643 /*
644  * Compile the text following an a, c, or i command.
645  */
646 static char *
647 compile_text(void)
648 {
649 	int asize, esc_nl, size;
650 	char *text, *p, *op, *s;
651 	char lbuf[_POSIX2_LINE_MAX + 1];
652 
653 	asize = 2 * _POSIX2_LINE_MAX + 1;
654 	text = xmalloc(asize);
655 	size = 0;
656 	while (cu_fgets(lbuf, sizeof(lbuf))) {
657 		op = s = text + size;
658 		p = lbuf;
659 		EATSPACE();
660 		for (esc_nl = 0; *p != '\0'; p++) {
661 			if (*p == '\\' && p[1] != '\0' && *++p == '\n')
662 				esc_nl = 1;
663 			*s++ = *p;
664 		}
665 		size += s - op;
666 		if (!esc_nl) {
667 			*s = '\0';
668 			break;
669 		}
670 		if (asize - size < _POSIX2_LINE_MAX + 1) {
671 			asize *= 2;
672 			text = xmalloc(asize);
673 		}
674 	}
675 	text[size] = '\0';
676 	return (xrealloc(text, size + 1));
677 }
678 
679 /*
680  * Get an address and return a pointer to the first character after
681  * it.  Fill the structure pointed to according to the address.
682  */
683 static char *
684 compile_addr(char *p, struct s_addr *a)
685 {
686 	char *end;
687 
688 	switch (*p) {
689 	case '\\':				/* Context address */
690 		++p;
691 		/* FALLTHROUGH */
692 	case '/':				/* Context address */
693 		p = compile_re(p, &a->u.r);
694 		if (p == NULL)
695 			err(COMPILE, "unterminated regular expression");
696 		a->type = AT_RE;
697 		return (p);
698 
699 	case '$':				/* Last line */
700 		a->type = AT_LAST;
701 		return (p + 1);
702 						/* Line number */
703 	case '0': case '1': case '2': case '3': case '4':
704 	case '5': case '6': case '7': case '8': case '9':
705 		a->type = AT_LINE;
706 		a->u.l = strtoul(p, &end, 10);
707 		return (end);
708 	default:
709 		err(COMPILE, "expected context address");
710 		return (NULL);
711 	}
712 }
713 
714 /*
715  * duptoeol --
716  *	Return a copy of all the characters up to \n or \0.
717  */
718 static char *
719 duptoeol(char *s, char *ctype, char **semi)
720 {
721 	size_t len;
722 	int ws;
723 	char *start;
724 
725 	ws = 0;
726 	if (semi) {
727 		for (start = s; *s != '\0' && *s != '\n' && *s != ';'; ++s)
728 			ws = isspace(*s);
729 	} else {
730 		for (start = s; *s != '\0' && *s != '\n'; ++s)
731 			ws = isspace(*s);
732 		*s = '\0';
733 	}
734 	if (ws)
735 		err(WARNING, "whitespace after %s", ctype);
736 	len = s - start + 1;
737 	if (semi)
738 		*semi = s;
739 	s = xmalloc(len);
740 	strlcpy(s, start, len);
741 	return (s);
742 }
743 
744 /*
745  * Convert goto label names to addresses, and count a and r commands, in
746  * the given subset of the script.  Free the memory used by labels in b
747  * and t commands (but not by :).
748  *
749  * TODO: Remove } nodes
750  */
751 static void
752 fixuplabel(struct s_command *cp, struct s_command *end)
753 {
754 
755 	for (; cp != end; cp = cp->next)
756 		switch (cp->code) {
757 		case 'a':
758 		case 'r':
759 			appendnum++;
760 			break;
761 		case 'b':
762 		case 't':
763 			/* Resolve branch target. */
764 			if (cp->t == NULL) {
765 				cp->u.c = NULL;
766 				break;
767 			}
768 			if ((cp->u.c = findlabel(cp->t)) == NULL)
769 				err(COMPILE2, "undefined label '%s'", cp->t);
770 			free(cp->t);
771 			break;
772 		case '{':
773 			/* Do interior commands. */
774 			fixuplabel(cp->u.c, cp->next);
775 			break;
776 		}
777 }
778 
779 /*
780  * Associate the given command label for later lookup.
781  */
782 static void
783 enterlabel(struct s_command *cp)
784 {
785 	struct labhash **lhp, *lh;
786 	u_char *p;
787 	u_int h, c;
788 
789 	for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
790 		h = (h << 5) + h + c;
791 	lhp = &labels[h & LHMASK];
792 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
793 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
794 			err(COMPILE2, "duplicate label '%s'", cp->t);
795 	lh = xmalloc(sizeof *lh);
796 	lh->lh_next = *lhp;
797 	lh->lh_hash = h;
798 	lh->lh_cmd = cp;
799 	lh->lh_ref = 0;
800 	*lhp = lh;
801 }
802 
803 /*
804  * Find the label contained in the command l in the command linked
805  * list cp.  L is excluded from the search.  Return NULL if not found.
806  */
807 static struct s_command *
808 findlabel(char *name)
809 {
810 	struct labhash *lh;
811 	u_char *p;
812 	u_int h, c;
813 
814 	for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
815 		h = (h << 5) + h + c;
816 	for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
817 		if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
818 			lh->lh_ref = 1;
819 			return (lh->lh_cmd);
820 		}
821 	}
822 	return (NULL);
823 }
824 
825 /*
826  * Warn about any unused labels.  As a side effect, release the label hash
827  * table space.
828  */
829 static void
830 uselabel(void)
831 {
832 	struct labhash *lh, *next;
833 	int i;
834 
835 	for (i = 0; i < LHSZ; i++) {
836 		for (lh = labels[i]; lh != NULL; lh = next) {
837 			next = lh->lh_next;
838 			if (!lh->lh_ref)
839 				err(WARNING, "unused label '%s'",
840 				    lh->lh_cmd->t);
841 			free(lh);
842 		}
843 	}
844 }
845