xref: /csrg-svn/usr.bin/tr/str.c (revision 69028)
151396Sbostic /*-
262321Sbostic  * Copyright (c) 1991, 1993
362321Sbostic  *	The Regents of the University of California.  All rights reserved.
451396Sbostic  *
551396Sbostic  * %sccs.include.redist.c%
651396Sbostic  */
751396Sbostic 
851396Sbostic #ifndef lint
9*69028Sbostic static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 04/28/95";
1051396Sbostic #endif /* not lint */
1151396Sbostic 
1251396Sbostic #include <sys/cdefs.h>
1351396Sbostic #include <sys/types.h>
1458372Sbostic 
1551396Sbostic #include <errno.h>
1658372Sbostic #include <stddef.h>
1751396Sbostic #include <stdio.h>
1851396Sbostic #include <stdlib.h>
1951396Sbostic #include <string.h>
2058372Sbostic 
2151396Sbostic #include "extern.h"
2251396Sbostic 
2351396Sbostic static int	backslash __P((STR *));
2451396Sbostic static int	bracket __P((STR *));
2551396Sbostic static int	c_class __P((const void *, const void *));
2651396Sbostic static void	genclass __P((STR *));
2751396Sbostic static void	genequiv __P((STR *));
2851396Sbostic static int	genrange __P((STR *));
2951396Sbostic static void	genseq __P((STR *));
3051396Sbostic 
3151396Sbostic int
next(s)3251396Sbostic next(s)
3351396Sbostic 	register STR *s;
3451396Sbostic {
3551396Sbostic 	register int ch;
3651396Sbostic 
3751396Sbostic 	switch (s->state) {
3851396Sbostic 	case EOS:
3951396Sbostic 		return (0);
4051396Sbostic 	case INFINITE:
4151396Sbostic 		return (1);
4251396Sbostic 	case NORMAL:
4351399Sbostic 		switch (ch = *s->str) {
4451396Sbostic 		case '\0':
4551396Sbostic 			s->state = EOS;
4651396Sbostic 			return (0);
4751396Sbostic 		case '\\':
4851396Sbostic 			s->lastch = backslash(s);
4951396Sbostic 			break;
5051396Sbostic 		case '[':
5151396Sbostic 			if (bracket(s))
5251396Sbostic 				return (next(s));
5351396Sbostic 			/* FALLTHROUGH */
5451396Sbostic 		default:
5551399Sbostic 			++s->str;
5651396Sbostic 			s->lastch = ch;
5751396Sbostic 			break;
5851396Sbostic 		}
5951396Sbostic 
6051396Sbostic 		/* We can start a range at any time. */
6151396Sbostic 		if (s->str[0] == '-' && genrange(s))
6251396Sbostic 			return (next(s));
6351396Sbostic 		return (1);
6451396Sbostic 	case RANGE:
6551396Sbostic 		if (s->cnt-- == 0) {
6651396Sbostic 			s->state = NORMAL;
6751396Sbostic 			return (next(s));
6851396Sbostic 		}
6951396Sbostic 		++s->lastch;
7051396Sbostic 		return (1);
7151396Sbostic 	case SEQUENCE:
7251396Sbostic 		if (s->cnt-- == 0) {
7351396Sbostic 			s->state = NORMAL;
7451396Sbostic 			return (next(s));
7551396Sbostic 		}
7651396Sbostic 		return (1);
7751396Sbostic 	case SET:
7851396Sbostic 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
7951396Sbostic 			s->state = NORMAL;
8051396Sbostic 			return (next(s));
8151396Sbostic 		}
8251396Sbostic 		return (1);
8351396Sbostic 	}
8451396Sbostic 	/* NOTREACHED */
8551396Sbostic }
8651396Sbostic 
8751396Sbostic static int
bracket(s)8851396Sbostic bracket(s)
8951396Sbostic 	register STR *s;
9051396Sbostic {
9151396Sbostic 	register char *p;
9251396Sbostic 
9352217Sbostic 	switch (s->str[1]) {
9451396Sbostic 	case ':':				/* "[:class:]" */
9552217Sbostic 		if ((p = strstr(s->str + 2, ":]")) == NULL)
9651396Sbostic 			return (0);
9751396Sbostic 		*p = '\0';
9852217Sbostic 		s->str += 2;
9951396Sbostic 		genclass(s);
10051396Sbostic 		s->str = p + 2;
10151396Sbostic 		return (1);
10251396Sbostic 	case '=':				/* "[=equiv=]" */
10352217Sbostic 		if ((p = strstr(s->str + 2, "=]")) == NULL)
10451396Sbostic 			return (0);
10552217Sbostic 		s->str += 2;
10651396Sbostic 		genequiv(s);
10751396Sbostic 		return (1);
10852217Sbostic 	default:				/* "[\###*n]" or "[#*n]" */
10952217Sbostic 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
11051396Sbostic 			return (0);
11151396Sbostic 		if (p[0] != '*' || index(p, ']') == NULL)
11251396Sbostic 			return (0);
11352217Sbostic 		s->str += 1;
11451396Sbostic 		genseq(s);
11551396Sbostic 		return (1);
11651396Sbostic 	}
11751396Sbostic 	/* NOTREACHED */
11851396Sbostic }
11951396Sbostic 
12051396Sbostic int isalnum __P((int)),
12151396Sbostic     isalpha __P((int)),
12251396Sbostic     isblank __P((int)),
12351396Sbostic     isspace __P((int)),
12451396Sbostic     iscntrl __P((int)),
12551396Sbostic     isdigit __P((int)),
12651396Sbostic     isgraph __P((int)),
12751396Sbostic     islower __P((int)),
12851396Sbostic     isprint __P((int)),
12951396Sbostic     ispunct __P((int)),
13051396Sbostic     isupper __P((int)),
13151396Sbostic     isxdigit __P((int));
13251396Sbostic 
13351396Sbostic typedef struct {
13451396Sbostic 	char *name;
13551396Sbostic 	int (*func) __P((int));
13651396Sbostic 	int *set;
13751396Sbostic } CLASS;
13851396Sbostic 
13951396Sbostic static CLASS classes[] = {
14051407Sbostic 	{ "alnum",  isalnum,  },
14151407Sbostic 	{ "alpha",  isalpha,  },
14251407Sbostic 	{ "blank",  isblank,  },
14351407Sbostic 	{ "cntrl",  iscntrl,  },
14451407Sbostic 	{ "digit",  isdigit,  },
14551407Sbostic 	{ "graph",  isgraph,  },
14651407Sbostic 	{ "lower",  islower,  },
14751407Sbostic 	{ "print",  isupper,  },
14851407Sbostic 	{ "punct",  ispunct,  },
14951407Sbostic 	{ "space",  isspace,  },
15051407Sbostic 	{ "upper",  isupper,  },
15151407Sbostic 	{ "xdigit", isxdigit, },
15251396Sbostic };
15351396Sbostic 
15451396Sbostic static void
genclass(s)15551396Sbostic genclass(s)
15651396Sbostic 	STR *s;
15751396Sbostic {
15851396Sbostic 	register int cnt, (*func) __P((int));
15951396Sbostic 	CLASS *cp, tmp;
16051396Sbostic 	int *p;
16151396Sbostic 
16251396Sbostic 	tmp.name = s->str;
16351396Sbostic 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
16451396Sbostic 	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
16551396Sbostic 		err("unknown class %s", s->str);
16651396Sbostic 
16751396Sbostic 	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
16851396Sbostic 		err("%s", strerror(errno));
16951407Sbostic 	bzero(p, (NCHARS + 1) * sizeof(int));
17051396Sbostic 	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
17151396Sbostic 		if ((func)(cnt))
17251396Sbostic 			*p++ = cnt;
17351396Sbostic 	*p = OOBCH;
17451396Sbostic 
17551396Sbostic 	s->cnt = 0;
17651407Sbostic 	s->state = SET;
17751396Sbostic 	s->set = cp->set;
17851396Sbostic }
17951396Sbostic 
18051396Sbostic static int
c_class(a,b)18151396Sbostic c_class(a, b)
18251396Sbostic 	const void *a, *b;
18351396Sbostic {
18451396Sbostic 	return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
18551396Sbostic }
18651396Sbostic 
18751396Sbostic /*
18851396Sbostic  * English doesn't have any equivalence classes, so for now
18951396Sbostic  * we just syntax check and grab the character.
19051396Sbostic  */
19151396Sbostic static void
genequiv(s)19251396Sbostic genequiv(s)
19351396Sbostic 	STR *s;
19451396Sbostic {
19552217Sbostic 	if (*s->str == '\\') {
19651407Sbostic 		s->equiv[0] = backslash(s);
19751396Sbostic 		if (*s->str != '=')
19851396Sbostic 			err("misplaced equivalence equals sign");
19951396Sbostic 	} else {
20051407Sbostic 		s->equiv[0] = s->str[0];
20151396Sbostic 		if (s->str[1] != '=')
20251396Sbostic 			err("misplaced equivalence equals sign");
20351396Sbostic 	}
20451396Sbostic 	s->str += 2;
20551396Sbostic 	s->cnt = 0;
20651396Sbostic 	s->state = SET;
20751407Sbostic 	s->set = s->equiv;
20851396Sbostic }
20951396Sbostic 
21051396Sbostic static int
genrange(s)21151396Sbostic genrange(s)
21251396Sbostic 	STR *s;
21351396Sbostic {
21451396Sbostic 	int stopval;
21551396Sbostic 	char *savestart;
21651396Sbostic 
21751396Sbostic 	savestart = s->str;
218*69028Sbostic 	stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
219*69028Sbostic 	if (stopval < (u_char)s->lastch) {
22051396Sbostic 		s->str = savestart;
22151396Sbostic 		return (0);
22251396Sbostic 	}
22351396Sbostic 	s->cnt = stopval - s->lastch + 1;
22451396Sbostic 	s->state = RANGE;
22551396Sbostic 	--s->lastch;
22651396Sbostic 	return (1);
22751396Sbostic }
22851396Sbostic 
22951396Sbostic static void
genseq(s)23051396Sbostic genseq(s)
23151396Sbostic 	STR *s;
23251396Sbostic {
23351396Sbostic 	char *ep;
23451396Sbostic 
23551407Sbostic 	if (s->which == STRING1)
23651407Sbostic 		err("sequences only valid in string2");
23751396Sbostic 
23851396Sbostic 	if (*s->str == '\\')
23951396Sbostic 		s->lastch = backslash(s);
24051396Sbostic 	else
24151396Sbostic 		s->lastch = *s->str++;
24251396Sbostic 	if (*s->str != '*')
24351396Sbostic 		err("misplaced sequence asterisk");
24451396Sbostic 
24551396Sbostic 	switch (*++s->str) {
24651396Sbostic 	case '\\':
24751396Sbostic 		s->cnt = backslash(s);
24851396Sbostic 		break;
24951396Sbostic 	case ']':
25051396Sbostic 		s->cnt = 0;
25151396Sbostic 		++s->str;
25251396Sbostic 		break;
25351396Sbostic 	default:
25451396Sbostic 		if (isdigit(*s->str)) {
25551396Sbostic 			s->cnt = strtol(s->str, &ep, 0);
25651396Sbostic 			if (*ep == ']') {
25751396Sbostic 				s->str = ep + 1;
25851396Sbostic 				break;
25951396Sbostic 			}
26051396Sbostic 		}
26151396Sbostic 		err("illegal sequence count");
26251396Sbostic 		/* NOTREACHED */
26351396Sbostic 	}
26451396Sbostic 
26551396Sbostic 	s->state = s->cnt ? SEQUENCE : INFINITE;
26651396Sbostic }
26751396Sbostic 
26851407Sbostic /* Use the #defines isXXX() here, DON'T use them above. */
26951396Sbostic #include <ctype.h>
27051396Sbostic 
27151396Sbostic /*
27251396Sbostic  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
27351396Sbostic  * an escape code or a literal character.
27451396Sbostic  */
27551396Sbostic static int
backslash(s)27651396Sbostic backslash(s)
27751396Sbostic 	register STR *s;
27851396Sbostic {
27951396Sbostic 	register int ch, cnt, val;
28051396Sbostic 
28151396Sbostic 	for (cnt = val = 0;;) {
28251396Sbostic 		ch = *++s->str;
28351396Sbostic 		if (!isascii(ch) || !isdigit(ch))
28451396Sbostic 			break;
28551396Sbostic 		val = val * 8 + ch - '0';
28658372Sbostic 		if (++cnt == 3) {
28758372Sbostic 			++s->str;
28851396Sbostic 			break;
28958372Sbostic 		}
29051396Sbostic 	}
29157746Sbostic 	if (cnt)
29257746Sbostic 		return (val);
29358459Sbostic 	if (ch != '\0')
29458459Sbostic 		++s->str;
29551396Sbostic 	switch (ch) {
29651396Sbostic 		case 'a':			/* escape characters */
29751396Sbostic 			return ('\7');
29851396Sbostic 		case 'b':
29951396Sbostic 			return ('\b');
30051396Sbostic 		case 'f':
30151396Sbostic 			return ('\f');
30251396Sbostic 		case 'n':
30351396Sbostic 			return ('\n');
30451396Sbostic 		case 'r':
30551396Sbostic 			return ('\r');
30651396Sbostic 		case 't':
30751396Sbostic 			return ('\t');
30851396Sbostic 		case 'v':
30951396Sbostic 			return ('\13');
31051396Sbostic 		case '\0':			/*  \" -> \ */
31151396Sbostic 			s->state = EOS;
31251396Sbostic 			return ('\\');
31351396Sbostic 		default:			/* \x" -> x */
31451396Sbostic 			return (ch);
31551396Sbostic 	}
31651396Sbostic }
317