xref: /minix3/usr.bin/tr/str.c (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1*84d9c625SLionel Sambuc /*	$NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $	*/
2d5c6c6a5SLionel Sambuc 
3d5c6c6a5SLionel Sambuc /*-
4d5c6c6a5SLionel Sambuc  * Copyright (c) 1991, 1993
5d5c6c6a5SLionel Sambuc  *	The Regents of the University of California.  All rights reserved.
6d5c6c6a5SLionel Sambuc  *
7d5c6c6a5SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
8d5c6c6a5SLionel Sambuc  * modification, are permitted provided that the following conditions
9d5c6c6a5SLionel Sambuc  * are met:
10d5c6c6a5SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
11d5c6c6a5SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
12d5c6c6a5SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
13d5c6c6a5SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
14d5c6c6a5SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
15d5c6c6a5SLionel Sambuc  * 3. Neither the name of the University nor the names of its contributors
16d5c6c6a5SLionel Sambuc  *    may be used to endorse or promote products derived from this software
17d5c6c6a5SLionel Sambuc  *    without specific prior written permission.
18d5c6c6a5SLionel Sambuc  *
19d5c6c6a5SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20d5c6c6a5SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21d5c6c6a5SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22d5c6c6a5SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23d5c6c6a5SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24d5c6c6a5SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25d5c6c6a5SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26d5c6c6a5SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27d5c6c6a5SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28d5c6c6a5SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29d5c6c6a5SLionel Sambuc  * SUCH DAMAGE.
30d5c6c6a5SLionel Sambuc  */
31d5c6c6a5SLionel Sambuc 
32d5c6c6a5SLionel Sambuc #include <sys/cdefs.h>
33d5c6c6a5SLionel Sambuc #ifndef lint
34d5c6c6a5SLionel Sambuc #if 0
35d5c6c6a5SLionel Sambuc static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
36d5c6c6a5SLionel Sambuc #endif
37*84d9c625SLionel Sambuc __RCSID("$NetBSD: str.c,v 1.29 2013/08/11 01:54:35 dholland Exp $");
38d5c6c6a5SLionel Sambuc #endif /* not lint */
39d5c6c6a5SLionel Sambuc 
40d5c6c6a5SLionel Sambuc #include <sys/types.h>
41d5c6c6a5SLionel Sambuc 
42d5c6c6a5SLionel Sambuc #include <err.h>
43d5c6c6a5SLionel Sambuc #include <errno.h>
44d5c6c6a5SLionel Sambuc #include <stddef.h>
45d5c6c6a5SLionel Sambuc #include <stdio.h>
46d5c6c6a5SLionel Sambuc #include <stdlib.h>
47d5c6c6a5SLionel Sambuc #include <string.h>
48d5c6c6a5SLionel Sambuc #include <ctype.h>
49*84d9c625SLionel Sambuc #include <assert.h>
50d5c6c6a5SLionel Sambuc 
51d5c6c6a5SLionel Sambuc #include "extern.h"
52d5c6c6a5SLionel Sambuc 
53*84d9c625SLionel Sambuc struct str {
54*84d9c625SLionel Sambuc 	enum { STRING1, STRING2 } which;
55*84d9c625SLionel Sambuc 	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
56*84d9c625SLionel Sambuc 	int cnt;			/* character count */
57*84d9c625SLionel Sambuc 	int lastch;			/* last character */
58*84d9c625SLionel Sambuc 	int equiv[2];			/* equivalence set */
59*84d9c625SLionel Sambuc 	int *set;			/* set of characters */
60*84d9c625SLionel Sambuc 	const char *str;		/* user's string */
61*84d9c625SLionel Sambuc };
62*84d9c625SLionel Sambuc 
63d5c6c6a5SLionel Sambuc static int backslash(STR *);
64d5c6c6a5SLionel Sambuc static int bracket(STR *);
65d5c6c6a5SLionel Sambuc static int c_class(const void *, const void *);
66*84d9c625SLionel Sambuc static int *genclass(const char *, size_t);
67d5c6c6a5SLionel Sambuc static void genequiv(STR *);
68d5c6c6a5SLionel Sambuc static int genrange(STR *);
69d5c6c6a5SLionel Sambuc static void genseq(STR *);
70d5c6c6a5SLionel Sambuc 
71*84d9c625SLionel Sambuc STR *
str_create(int whichstring,const char * txt)72*84d9c625SLionel Sambuc str_create(int whichstring, const char *txt)
73*84d9c625SLionel Sambuc {
74*84d9c625SLionel Sambuc 	STR *s;
75*84d9c625SLionel Sambuc 
76*84d9c625SLionel Sambuc 	s = malloc(sizeof(*s));
77*84d9c625SLionel Sambuc 	if (s == NULL) {
78*84d9c625SLionel Sambuc 		err(1, "Out of memory");
79*84d9c625SLionel Sambuc 	}
80*84d9c625SLionel Sambuc 
81*84d9c625SLionel Sambuc 	s->which = whichstring == 2 ? STRING2 : STRING1;
82*84d9c625SLionel Sambuc 	s->state = NORMAL;
83*84d9c625SLionel Sambuc 	s->cnt = 0;
84*84d9c625SLionel Sambuc 	s->lastch = OOBCH;
85*84d9c625SLionel Sambuc 	s->equiv[0] = 0;
86*84d9c625SLionel Sambuc 	s->equiv[1] = OOBCH;
87*84d9c625SLionel Sambuc 	s->set = NULL;
88*84d9c625SLionel Sambuc 	s->str = txt;
89*84d9c625SLionel Sambuc 
90*84d9c625SLionel Sambuc 	return s;
91*84d9c625SLionel Sambuc }
92*84d9c625SLionel Sambuc 
93*84d9c625SLionel Sambuc void
str_destroy(STR * s)94*84d9c625SLionel Sambuc str_destroy(STR *s)
95*84d9c625SLionel Sambuc {
96*84d9c625SLionel Sambuc 	if (s->set != NULL && s->set != s->equiv) {
97*84d9c625SLionel Sambuc 		free(s->set);
98*84d9c625SLionel Sambuc 	}
99*84d9c625SLionel Sambuc 	free(s);
100*84d9c625SLionel Sambuc }
101*84d9c625SLionel Sambuc 
102d5c6c6a5SLionel Sambuc int
next(STR * s,int * ret)103*84d9c625SLionel Sambuc next(STR *s, int *ret)
104d5c6c6a5SLionel Sambuc {
105d5c6c6a5SLionel Sambuc 	int ch;
106d5c6c6a5SLionel Sambuc 
107d5c6c6a5SLionel Sambuc 	switch (s->state) {
108d5c6c6a5SLionel Sambuc 	case EOS:
109*84d9c625SLionel Sambuc 		*ret = s->lastch;
110d5c6c6a5SLionel Sambuc 		return 0;
111d5c6c6a5SLionel Sambuc 	case INFINITE:
112*84d9c625SLionel Sambuc 		*ret = s->lastch;
113d5c6c6a5SLionel Sambuc 		return 1;
114d5c6c6a5SLionel Sambuc 	case NORMAL:
115*84d9c625SLionel Sambuc 		ch = (unsigned char)s->str[0];
116*84d9c625SLionel Sambuc 		switch (ch) {
117d5c6c6a5SLionel Sambuc 		case '\0':
118d5c6c6a5SLionel Sambuc 			s->state = EOS;
119*84d9c625SLionel Sambuc 			*ret = s->lastch;
120d5c6c6a5SLionel Sambuc 			return 0;
121d5c6c6a5SLionel Sambuc 		case '\\':
122d5c6c6a5SLionel Sambuc 			s->lastch = backslash(s);
123d5c6c6a5SLionel Sambuc 			break;
124d5c6c6a5SLionel Sambuc 		case '[':
125*84d9c625SLionel Sambuc 			if (bracket(s)) {
126*84d9c625SLionel Sambuc 				return next(s, ret);
127*84d9c625SLionel Sambuc 			}
128d5c6c6a5SLionel Sambuc 			/* FALLTHROUGH */
129d5c6c6a5SLionel Sambuc 		default:
130d5c6c6a5SLionel Sambuc 			++s->str;
131d5c6c6a5SLionel Sambuc 			s->lastch = ch;
132d5c6c6a5SLionel Sambuc 			break;
133d5c6c6a5SLionel Sambuc 		}
134d5c6c6a5SLionel Sambuc 
135d5c6c6a5SLionel Sambuc 		/* We can start a range at any time. */
136*84d9c625SLionel Sambuc 		if (s->str[0] == '-' && genrange(s)) {
137*84d9c625SLionel Sambuc 			return next(s, ret);
138*84d9c625SLionel Sambuc 		}
139*84d9c625SLionel Sambuc 		*ret = s->lastch;
140d5c6c6a5SLionel Sambuc 		return 1;
141d5c6c6a5SLionel Sambuc 	case RANGE:
142*84d9c625SLionel Sambuc 		if (s->cnt == 0) {
143d5c6c6a5SLionel Sambuc 			s->state = NORMAL;
144*84d9c625SLionel Sambuc 			return next(s, ret);
145d5c6c6a5SLionel Sambuc 		}
146*84d9c625SLionel Sambuc 		s->cnt--;
147d5c6c6a5SLionel Sambuc 		++s->lastch;
148*84d9c625SLionel Sambuc 		*ret = s->lastch;
149d5c6c6a5SLionel Sambuc 		return 1;
150d5c6c6a5SLionel Sambuc 	case SEQUENCE:
151*84d9c625SLionel Sambuc 		if (s->cnt == 0) {
152d5c6c6a5SLionel Sambuc 			s->state = NORMAL;
153*84d9c625SLionel Sambuc 			return next(s, ret);
154d5c6c6a5SLionel Sambuc 		}
155*84d9c625SLionel Sambuc 		s->cnt--;
156*84d9c625SLionel Sambuc 		*ret = s->lastch;
157d5c6c6a5SLionel Sambuc 		return 1;
158d5c6c6a5SLionel Sambuc 	case SET:
159*84d9c625SLionel Sambuc 		s->lastch = s->set[s->cnt++];
160*84d9c625SLionel Sambuc 		if (s->lastch == OOBCH) {
161d5c6c6a5SLionel Sambuc 			s->state = NORMAL;
162*84d9c625SLionel Sambuc 			if (s->set != s->equiv) {
163*84d9c625SLionel Sambuc 				free(s->set);
164d5c6c6a5SLionel Sambuc 			}
165*84d9c625SLionel Sambuc 			s->set = NULL;
166*84d9c625SLionel Sambuc 			return next(s, ret);
167*84d9c625SLionel Sambuc 		}
168*84d9c625SLionel Sambuc 		*ret = s->lastch;
169d5c6c6a5SLionel Sambuc 		return 1;
170d5c6c6a5SLionel Sambuc 	}
171d5c6c6a5SLionel Sambuc 	/* NOTREACHED */
172*84d9c625SLionel Sambuc 	assert(0);
173*84d9c625SLionel Sambuc 	*ret = s->lastch;
174d5c6c6a5SLionel Sambuc 	return 0;
175d5c6c6a5SLionel Sambuc }
176d5c6c6a5SLionel Sambuc 
177d5c6c6a5SLionel Sambuc static int
bracket(STR * s)178d5c6c6a5SLionel Sambuc bracket(STR *s)
179d5c6c6a5SLionel Sambuc {
180*84d9c625SLionel Sambuc 	const char *p;
181*84d9c625SLionel Sambuc 	int *q;
182d5c6c6a5SLionel Sambuc 
183d5c6c6a5SLionel Sambuc 	switch (s->str[1]) {
184d5c6c6a5SLionel Sambuc 	case ':':				/* "[:class:]" */
185d5c6c6a5SLionel Sambuc 		if ((p = strstr(s->str + 2, ":]")) == NULL)
186d5c6c6a5SLionel Sambuc 			return 0;
187d5c6c6a5SLionel Sambuc 		s->str += 2;
188*84d9c625SLionel Sambuc 		q = genclass(s->str, p - s->str);
189*84d9c625SLionel Sambuc 		s->state = SET;
190*84d9c625SLionel Sambuc 		s->set = q;
191*84d9c625SLionel Sambuc 		s->cnt = 0;
192d5c6c6a5SLionel Sambuc 		s->str = p + 2;
193d5c6c6a5SLionel Sambuc 		return 1;
194d5c6c6a5SLionel Sambuc 	case '=':				/* "[=equiv=]" */
195d5c6c6a5SLionel Sambuc 		if ((p = strstr(s->str + 2, "=]")) == NULL)
196d5c6c6a5SLionel Sambuc 			return 0;
197d5c6c6a5SLionel Sambuc 		s->str += 2;
198d5c6c6a5SLionel Sambuc 		genequiv(s);
199*84d9c625SLionel Sambuc 		s->str = p + 2;
200d5c6c6a5SLionel Sambuc 		return 1;
201d5c6c6a5SLionel Sambuc 	default:				/* "[\###*n]" or "[#*n]" */
202d5c6c6a5SLionel Sambuc 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
203d5c6c6a5SLionel Sambuc 			return 0;
204d5c6c6a5SLionel Sambuc 		if (p[0] != '*' || strchr(p, ']') == NULL)
205d5c6c6a5SLionel Sambuc 			return 0;
206d5c6c6a5SLionel Sambuc 		s->str += 1;
207d5c6c6a5SLionel Sambuc 		genseq(s);
208d5c6c6a5SLionel Sambuc 		return 1;
209d5c6c6a5SLionel Sambuc 	}
210d5c6c6a5SLionel Sambuc 	/* NOTREACHED */
211d5c6c6a5SLionel Sambuc }
212d5c6c6a5SLionel Sambuc 
213d5c6c6a5SLionel Sambuc typedef struct {
214d5c6c6a5SLionel Sambuc 	const char *name;
215d5c6c6a5SLionel Sambuc 	int (*func)(int);
216d5c6c6a5SLionel Sambuc } CLASS;
217d5c6c6a5SLionel Sambuc 
218d5c6c6a5SLionel Sambuc static const CLASS classes[] = {
219d5c6c6a5SLionel Sambuc 	{ "alnum",  isalnum  },
220d5c6c6a5SLionel Sambuc 	{ "alpha",  isalpha  },
221d5c6c6a5SLionel Sambuc 	{ "blank",  isblank  },
222d5c6c6a5SLionel Sambuc 	{ "cntrl",  iscntrl  },
223d5c6c6a5SLionel Sambuc 	{ "digit",  isdigit  },
224d5c6c6a5SLionel Sambuc 	{ "graph",  isgraph  },
225d5c6c6a5SLionel Sambuc 	{ "lower",  islower  },
226d5c6c6a5SLionel Sambuc 	{ "print",  isprint  },
227d5c6c6a5SLionel Sambuc 	{ "punct",  ispunct  },
228d5c6c6a5SLionel Sambuc 	{ "space",  isspace  },
229d5c6c6a5SLionel Sambuc 	{ "upper",  isupper  },
230d5c6c6a5SLionel Sambuc 	{ "xdigit", isxdigit },
231d5c6c6a5SLionel Sambuc };
232d5c6c6a5SLionel Sambuc 
233*84d9c625SLionel Sambuc typedef struct {
234*84d9c625SLionel Sambuc 	const char *name;
235*84d9c625SLionel Sambuc 	size_t len;
236*84d9c625SLionel Sambuc } CLASSKEY;
237*84d9c625SLionel Sambuc 
238*84d9c625SLionel Sambuc static int *
genclass(const char * class,size_t len)239*84d9c625SLionel Sambuc genclass(const char *class, size_t len)
240d5c6c6a5SLionel Sambuc {
241*84d9c625SLionel Sambuc 	int ch;
242d5c6c6a5SLionel Sambuc 	const CLASS *cp;
243*84d9c625SLionel Sambuc 	CLASSKEY key;
244d5c6c6a5SLionel Sambuc 	int *p;
245*84d9c625SLionel Sambuc 	unsigned pos, num;
246d5c6c6a5SLionel Sambuc 
247*84d9c625SLionel Sambuc 	/* Find the class */
248*84d9c625SLionel Sambuc 	key.name = class;
249*84d9c625SLionel Sambuc 	key.len = len;
250*84d9c625SLionel Sambuc 	cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]),
251*84d9c625SLionel Sambuc 		     c_class);
252*84d9c625SLionel Sambuc 	if (cp == NULL) {
253*84d9c625SLionel Sambuc 		errx(1, "unknown class %.*s", (int)len, class);
254*84d9c625SLionel Sambuc 	}
255d5c6c6a5SLionel Sambuc 
256*84d9c625SLionel Sambuc 	/*
257*84d9c625SLionel Sambuc 	 * Figure out what characters are in the class
258*84d9c625SLionel Sambuc 	 */
259*84d9c625SLionel Sambuc 
260*84d9c625SLionel Sambuc 	num = NCHARS + 1;
261*84d9c625SLionel Sambuc 	p = malloc(num * sizeof(*p));
262*84d9c625SLionel Sambuc 	if (p == NULL) {
263d5c6c6a5SLionel Sambuc 		err(1, "malloc");
264*84d9c625SLionel Sambuc 	}
265d5c6c6a5SLionel Sambuc 
266*84d9c625SLionel Sambuc 	pos = 0;
267*84d9c625SLionel Sambuc 	for (ch = 0; ch < NCHARS; ch++) {
268*84d9c625SLionel Sambuc 		if (cp->func(ch)) {
269*84d9c625SLionel Sambuc 			p[pos++] = ch;
270*84d9c625SLionel Sambuc 		}
271*84d9c625SLionel Sambuc 	}
272d5c6c6a5SLionel Sambuc 
273*84d9c625SLionel Sambuc 	p[pos++] = OOBCH;
274*84d9c625SLionel Sambuc 	for (; pos < num; pos++) {
275*84d9c625SLionel Sambuc 		p[pos] = 0;
276*84d9c625SLionel Sambuc 	}
277*84d9c625SLionel Sambuc 
278*84d9c625SLionel Sambuc 	return p;
279d5c6c6a5SLionel Sambuc }
280d5c6c6a5SLionel Sambuc 
281d5c6c6a5SLionel Sambuc static int
c_class(const void * av,const void * bv)282*84d9c625SLionel Sambuc c_class(const void *av, const void *bv)
283d5c6c6a5SLionel Sambuc {
284*84d9c625SLionel Sambuc 	const CLASSKEY *a = av;
285*84d9c625SLionel Sambuc 	const CLASS *b = bv;
286*84d9c625SLionel Sambuc 	size_t blen;
287*84d9c625SLionel Sambuc 	int r;
288*84d9c625SLionel Sambuc 
289*84d9c625SLionel Sambuc 	blen = strlen(b->name);
290*84d9c625SLionel Sambuc 	r = strncmp(a->name, b->name, a->len);
291*84d9c625SLionel Sambuc 	if (r != 0) {
292*84d9c625SLionel Sambuc 		return r;
293*84d9c625SLionel Sambuc 	}
294*84d9c625SLionel Sambuc 	if (a->len < blen) {
295*84d9c625SLionel Sambuc 		/* someone gave us a prefix of the right name */
296*84d9c625SLionel Sambuc 		return -1;
297*84d9c625SLionel Sambuc 	}
298*84d9c625SLionel Sambuc 	assert(a-> len == blen);
299*84d9c625SLionel Sambuc 	return 0;
300d5c6c6a5SLionel Sambuc }
301d5c6c6a5SLionel Sambuc 
302d5c6c6a5SLionel Sambuc /*
303d5c6c6a5SLionel Sambuc  * English doesn't have any equivalence classes, so for now
304d5c6c6a5SLionel Sambuc  * we just syntax check and grab the character.
305d5c6c6a5SLionel Sambuc  */
306d5c6c6a5SLionel Sambuc static void
genequiv(STR * s)307d5c6c6a5SLionel Sambuc genequiv(STR *s)
308d5c6c6a5SLionel Sambuc {
309*84d9c625SLionel Sambuc 	int ch;
310*84d9c625SLionel Sambuc 
311*84d9c625SLionel Sambuc 	ch = (unsigned char)s->str[0];
312*84d9c625SLionel Sambuc 	if (ch == '\\') {
313d5c6c6a5SLionel Sambuc 		s->equiv[0] = backslash(s);
314d5c6c6a5SLionel Sambuc 	} else {
315*84d9c625SLionel Sambuc 		s->equiv[0] = ch;
316*84d9c625SLionel Sambuc 		s->str++;
317d5c6c6a5SLionel Sambuc 	}
318*84d9c625SLionel Sambuc 	if (s->str[0] != '=') {
319*84d9c625SLionel Sambuc 		errx(1, "Misplaced equivalence equals sign");
320*84d9c625SLionel Sambuc 	}
321*84d9c625SLionel Sambuc 	s->str++;
322*84d9c625SLionel Sambuc 	if (s->str[0] != ']') {
323*84d9c625SLionel Sambuc 		errx(1, "Misplaced equivalence right bracket");
324*84d9c625SLionel Sambuc 	}
325*84d9c625SLionel Sambuc 	s->str++;
326*84d9c625SLionel Sambuc 
327d5c6c6a5SLionel Sambuc 	s->cnt = 0;
328d5c6c6a5SLionel Sambuc 	s->state = SET;
329d5c6c6a5SLionel Sambuc 	s->set = s->equiv;
330d5c6c6a5SLionel Sambuc }
331d5c6c6a5SLionel Sambuc 
332d5c6c6a5SLionel Sambuc static int
genrange(STR * s)333d5c6c6a5SLionel Sambuc genrange(STR *s)
334d5c6c6a5SLionel Sambuc {
335d5c6c6a5SLionel Sambuc 	int stopval;
336*84d9c625SLionel Sambuc 	const char *savestart;
337d5c6c6a5SLionel Sambuc 
338*84d9c625SLionel Sambuc 	savestart = s->str++;
339*84d9c625SLionel Sambuc 	stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++;
340*84d9c625SLionel Sambuc 	if (stopval < (unsigned char)s->lastch) {
341d5c6c6a5SLionel Sambuc 		s->str = savestart;
342d5c6c6a5SLionel Sambuc 		return 0;
343d5c6c6a5SLionel Sambuc 	}
344d5c6c6a5SLionel Sambuc 	s->cnt = stopval - s->lastch + 1;
345d5c6c6a5SLionel Sambuc 	s->state = RANGE;
346d5c6c6a5SLionel Sambuc 	--s->lastch;
347d5c6c6a5SLionel Sambuc 	return 1;
348d5c6c6a5SLionel Sambuc }
349d5c6c6a5SLionel Sambuc 
350d5c6c6a5SLionel Sambuc static void
genseq(STR * s)351d5c6c6a5SLionel Sambuc genseq(STR *s)
352d5c6c6a5SLionel Sambuc {
353d5c6c6a5SLionel Sambuc 	char *ep;
354d5c6c6a5SLionel Sambuc 
355*84d9c625SLionel Sambuc 	if (s->which == STRING1) {
356*84d9c625SLionel Sambuc 		errx(1, "Sequences only valid in string2");
357*84d9c625SLionel Sambuc 	}
358d5c6c6a5SLionel Sambuc 
359*84d9c625SLionel Sambuc 	if (*s->str == '\\') {
360d5c6c6a5SLionel Sambuc 		s->lastch = backslash(s);
361*84d9c625SLionel Sambuc 	} else {
362*84d9c625SLionel Sambuc 		s->lastch = (unsigned char)*s->str++;
363*84d9c625SLionel Sambuc 	}
364*84d9c625SLionel Sambuc 	if (*s->str != '*') {
365*84d9c625SLionel Sambuc 		errx(1, "Misplaced sequence asterisk");
366*84d9c625SLionel Sambuc 	}
367d5c6c6a5SLionel Sambuc 
368*84d9c625SLionel Sambuc 	s->str++;
369*84d9c625SLionel Sambuc 	switch (s->str[0]) {
370d5c6c6a5SLionel Sambuc 	case '\\':
371d5c6c6a5SLionel Sambuc 		s->cnt = backslash(s);
372d5c6c6a5SLionel Sambuc 		break;
373d5c6c6a5SLionel Sambuc 	case ']':
374d5c6c6a5SLionel Sambuc 		s->cnt = 0;
375d5c6c6a5SLionel Sambuc 		++s->str;
376d5c6c6a5SLionel Sambuc 		break;
377d5c6c6a5SLionel Sambuc 	default:
378*84d9c625SLionel Sambuc 		if (isdigit((unsigned char)s->str[0])) {
379d5c6c6a5SLionel Sambuc 			s->cnt = strtol(s->str, &ep, 0);
380d5c6c6a5SLionel Sambuc 			if (*ep == ']') {
381d5c6c6a5SLionel Sambuc 				s->str = ep + 1;
382d5c6c6a5SLionel Sambuc 				break;
383d5c6c6a5SLionel Sambuc 			}
384d5c6c6a5SLionel Sambuc 		}
385d5c6c6a5SLionel Sambuc 		errx(1, "illegal sequence count");
386d5c6c6a5SLionel Sambuc 		/* NOTREACHED */
387d5c6c6a5SLionel Sambuc 	}
388d5c6c6a5SLionel Sambuc 
389d5c6c6a5SLionel Sambuc 	s->state = s->cnt ? SEQUENCE : INFINITE;
390d5c6c6a5SLionel Sambuc }
391d5c6c6a5SLionel Sambuc 
392d5c6c6a5SLionel Sambuc /*
393d5c6c6a5SLionel Sambuc  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
394d5c6c6a5SLionel Sambuc  * an escape code or a literal character.
395d5c6c6a5SLionel Sambuc  */
396d5c6c6a5SLionel Sambuc static int
backslash(STR * s)397d5c6c6a5SLionel Sambuc backslash(STR *s)
398d5c6c6a5SLionel Sambuc {
399d5c6c6a5SLionel Sambuc 	int ch, cnt, val;
400d5c6c6a5SLionel Sambuc 
401*84d9c625SLionel Sambuc 	cnt = val = 0;
402*84d9c625SLionel Sambuc 	for (;;) {
403*84d9c625SLionel Sambuc 		/* Consume the character we're already on. */
404*84d9c625SLionel Sambuc 		s->str++;
405*84d9c625SLionel Sambuc 
406*84d9c625SLionel Sambuc 		/* Look at the next character. */
407*84d9c625SLionel Sambuc 		ch = (unsigned char)s->str[0];
408*84d9c625SLionel Sambuc 		if (!isascii(ch) || !isdigit(ch)) {
409d5c6c6a5SLionel Sambuc 			break;
410*84d9c625SLionel Sambuc 		}
411d5c6c6a5SLionel Sambuc 		val = val * 8 + ch - '0';
412d5c6c6a5SLionel Sambuc 		if (++cnt == 3) {
413*84d9c625SLionel Sambuc 			/* Enough digits; consume this one and stop */
414d5c6c6a5SLionel Sambuc 			++s->str;
415d5c6c6a5SLionel Sambuc 			break;
416d5c6c6a5SLionel Sambuc 		}
417d5c6c6a5SLionel Sambuc 	}
418*84d9c625SLionel Sambuc 	if (cnt) {
419*84d9c625SLionel Sambuc 		/* We saw digits, so return their value */
420d5c6c6a5SLionel Sambuc 		return val;
421*84d9c625SLionel Sambuc 	}
422*84d9c625SLionel Sambuc 	if (ch == '\0') {
423*84d9c625SLionel Sambuc 		/* \<end> -> \ */
424*84d9c625SLionel Sambuc 		s->state = EOS;
425*84d9c625SLionel Sambuc 		return '\\';
426*84d9c625SLionel Sambuc 	}
427*84d9c625SLionel Sambuc 
428*84d9c625SLionel Sambuc 	/* Consume the escaped character */
429*84d9c625SLionel Sambuc 	s->str++;
430*84d9c625SLionel Sambuc 
431d5c6c6a5SLionel Sambuc 	switch (ch) {
432d5c6c6a5SLionel Sambuc 	case 'a':			/* escape characters */
433d5c6c6a5SLionel Sambuc 		return '\7';
434d5c6c6a5SLionel Sambuc 	case 'b':
435d5c6c6a5SLionel Sambuc 		return '\b';
436d5c6c6a5SLionel Sambuc 	case 'e':
437d5c6c6a5SLionel Sambuc 		return '\033';
438d5c6c6a5SLionel Sambuc 	case 'f':
439d5c6c6a5SLionel Sambuc 		return '\f';
440d5c6c6a5SLionel Sambuc 	case 'n':
441d5c6c6a5SLionel Sambuc 		return '\n';
442d5c6c6a5SLionel Sambuc 	case 'r':
443d5c6c6a5SLionel Sambuc 		return '\r';
444d5c6c6a5SLionel Sambuc 	case 't':
445d5c6c6a5SLionel Sambuc 		return '\t';
446d5c6c6a5SLionel Sambuc 	case 'v':
447d5c6c6a5SLionel Sambuc 		return '\13';
448*84d9c625SLionel Sambuc 	default:			/* \q -> q */
449d5c6c6a5SLionel Sambuc 		return ch;
450d5c6c6a5SLionel Sambuc 	}
451d5c6c6a5SLionel Sambuc }
452