xref: /csrg-svn/usr.bin/tr/str.c (revision 51399)
151396Sbostic /*-
251396Sbostic  * Copyright (c) 1991 The Regents of the University of California.
351396Sbostic  * All rights reserved.
451396Sbostic  *
551396Sbostic  * %sccs.include.redist.c%
651396Sbostic  */
751396Sbostic 
851396Sbostic #ifndef lint
9*51399Sbostic static char sccsid[] = "@(#)str.c	5.2 (Berkeley) 10/24/91";
1051396Sbostic #endif /* not lint */
1151396Sbostic 
1251396Sbostic #include <sys/cdefs.h>
1351396Sbostic #include <sys/types.h>
1451396Sbostic #include <errno.h>
1551396Sbostic #include <stdio.h>
1651396Sbostic #include <stddef.h>
1751396Sbostic #include <stdlib.h>
1851396Sbostic #include <string.h>
1951396Sbostic #include "extern.h"
2051396Sbostic 
2151396Sbostic static int	backslash __P((STR *));
2251396Sbostic static int	bracket __P((STR *));
2351396Sbostic static int	c_class __P((const void *, const void *));
2451396Sbostic static void	genclass __P((STR *));
2551396Sbostic static void	genequiv __P((STR *));
2651396Sbostic static int	genrange __P((STR *));
2751396Sbostic static void	genseq __P((STR *));
2851396Sbostic 
2951396Sbostic int
3051396Sbostic next(s)
3151396Sbostic 	register STR *s;
3251396Sbostic {
3351396Sbostic 	register int ch;
3451396Sbostic 
3551396Sbostic 	switch (s->state) {
3651396Sbostic 	case EOS:
3751396Sbostic 		return (0);
3851396Sbostic 	case INFINITE:
3951396Sbostic 		return (1);
4051396Sbostic 	case NORMAL:
41*51399Sbostic 		switch (ch = *s->str) {
4251396Sbostic 		case '\0':
4351396Sbostic 			s->state = EOS;
4451396Sbostic 			return (0);
4551396Sbostic 		case '\\':
4651396Sbostic 			s->lastch = backslash(s);
4751396Sbostic 			break;
4851396Sbostic 		case '[':
4951396Sbostic 			if (bracket(s))
5051396Sbostic 				return (next(s));
5151396Sbostic 			/* FALLTHROUGH */
5251396Sbostic 		default:
53*51399Sbostic 			++s->str;
5451396Sbostic 			s->lastch = ch;
5551396Sbostic 			break;
5651396Sbostic 		}
5751396Sbostic 
5851396Sbostic 		/* We can start a range at any time. */
5951396Sbostic 		if (s->str[0] == '-' && genrange(s))
6051396Sbostic 			return (next(s));
6151396Sbostic 		return (1);
6251396Sbostic 	case RANGE:
6351396Sbostic 		if (s->cnt-- == 0) {
6451396Sbostic 			s->state = NORMAL;
6551396Sbostic 			return (next(s));
6651396Sbostic 		}
6751396Sbostic 		++s->lastch;
6851396Sbostic 		return (1);
6951396Sbostic 	case SEQUENCE:
7051396Sbostic 		if (s->cnt-- == 0) {
7151396Sbostic 			s->state = NORMAL;
7251396Sbostic 			return (next(s));
7351396Sbostic 		}
7451396Sbostic 		return (1);
7551396Sbostic 	case SET:
7651396Sbostic 	case ULSET:
7751396Sbostic 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
7851396Sbostic 			s->state = NORMAL;
7951396Sbostic 			return (next(s));
8051396Sbostic 		}
8151396Sbostic 		return (1);
8251396Sbostic 	}
8351396Sbostic 	/* NOTREACHED */
8451396Sbostic }
8551396Sbostic 
8651396Sbostic static int
8751396Sbostic bracket(s)
8851396Sbostic 	register STR *s;
8951396Sbostic {
9051396Sbostic 	register char *p;
9151396Sbostic 
92*51399Sbostic 	switch (*++s->str) {
9351396Sbostic 	case ':':				/* "[:class:]" */
9451396Sbostic 		if ((p = strpbrk(s->str + 1, ":]")) == NULL)
9551396Sbostic 			return (0);
9651396Sbostic 		if (p[0] != ':' || p[1] != ']')
9751396Sbostic 			return (0);
9851396Sbostic 		*p = '\0';
9951396Sbostic 		++s->str;
10051396Sbostic 		genclass(s);
10151396Sbostic 		s->str = p + 2;
10251396Sbostic 		return (1);
10351396Sbostic 	case '=':				/* "[=equiv=]" */
10451396Sbostic 		if ((p = strpbrk(s->str + 1, "=]")) == NULL)
10551396Sbostic 			return (0);
10651396Sbostic 		if (p[0] != '=' || p[1] != ']')
10751396Sbostic 			return (0);
10851396Sbostic 		genequiv(s);
10951396Sbostic 		return (1);
11051396Sbostic 	default:				/* "[\###*]" or "[#*]" */
11151396Sbostic 		if ((p = strpbrk(s->str + 1, "*]")) == NULL)
11251396Sbostic 			return (0);
11351396Sbostic 		if (p[0] != '*' || index(p, ']') == NULL)
11451396Sbostic 			return (0);
11551396Sbostic 		genseq(s);
11651396Sbostic 		return (1);
11751396Sbostic 	}
11851396Sbostic 	/* NOTREACHED */
11951396Sbostic }
12051396Sbostic 
12151396Sbostic int isalnum __P((int)),
12251396Sbostic     isalpha __P((int)),
12351396Sbostic     isblank __P((int)),
12451396Sbostic     isspace __P((int)),
12551396Sbostic     iscntrl __P((int)),
12651396Sbostic     isdigit __P((int)),
12751396Sbostic     isgraph __P((int)),
12851396Sbostic     islower __P((int)),
12951396Sbostic     isprint __P((int)),
13051396Sbostic     ispunct __P((int)),
13151396Sbostic     isupper __P((int)),
13251396Sbostic     isxdigit __P((int));
13351396Sbostic 
13451396Sbostic typedef struct {
13551396Sbostic 	char *name;
13651396Sbostic 	int (*func) __P((int));
13751396Sbostic 	u_int type;
13851396Sbostic 	int *set;
13951396Sbostic } CLASS;
14051396Sbostic 
14151396Sbostic static CLASS classes[] = {
14251396Sbostic 	{ "alnum",  isalnum,  T_CLASS, },
14351396Sbostic 	{ "alpha",  isalpha,  T_CLASS, },
14451396Sbostic 	{ "blank",  isblank,  T_CLASS, },
14551396Sbostic 	{ "cntrl",  iscntrl,  T_CLASS, },
14651396Sbostic 	{ "digit",  isdigit,  T_CLASS, },
14751396Sbostic 	{ "graph",  isgraph,  T_CLASS, },
14851396Sbostic 	{ "lower",  islower,  T_UL, },
14951396Sbostic 	{ "print",  isupper,  T_CLASS, },
15051396Sbostic 	{ "punct",  ispunct,  T_CLASS, },
15151396Sbostic 	{ "space",  isspace,  T_CLASS, },
15251396Sbostic 	{ "upper",  isupper,  T_UL, },
15351396Sbostic 	{ "xdigit", isxdigit, T_CLASS, },
15451396Sbostic };
15551396Sbostic 
15651396Sbostic static void
15751396Sbostic genclass(s)
15851396Sbostic 	STR *s;
15951396Sbostic {
16051396Sbostic 	register int cnt, (*func) __P((int));
16151396Sbostic 	CLASS *cp, tmp;
16251396Sbostic 	int *p;
16351396Sbostic 
16451396Sbostic 	tmp.name = s->str;
16551396Sbostic 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
16651396Sbostic 	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
16751396Sbostic 		err("unknown class %s", s->str);
16851396Sbostic 	if (!(cp->type | s->type))
16951396Sbostic 		err("class %s illegally used");
17051396Sbostic 
17151396Sbostic 	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
17251396Sbostic 		err("%s", strerror(errno));
17351396Sbostic 	bzero(p, NCHARS);
17451396Sbostic 	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
17551396Sbostic 		if ((func)(cnt))
17651396Sbostic 			*p++ = cnt;
17751396Sbostic 	*p = OOBCH;
17851396Sbostic 
17951396Sbostic 	s->cnt = 0;
18051396Sbostic 	s->state = cp->type & T_UL ? ULSET : SET;
18151396Sbostic 	s->set = cp->set;
18251396Sbostic }
18351396Sbostic 
18451396Sbostic static int
18551396Sbostic c_class(a, b)
18651396Sbostic 	const void *a, *b;
18751396Sbostic {
18851396Sbostic 	return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
18951396Sbostic }
19051396Sbostic 
19151396Sbostic /*
19251396Sbostic  * English doesn't have any equivalence classes, so for now
19351396Sbostic  * we just syntax check and grab the character.
19451396Sbostic  */
19551396Sbostic static void
19651396Sbostic genequiv(s)
19751396Sbostic 	STR *s;
19851396Sbostic {
19951396Sbostic 	static int val[2] = { 0, OOBCH };
20051396Sbostic 
20151396Sbostic 	if (*++s->str == '\\') {
20251396Sbostic 		val[0] = backslash(s);
20351396Sbostic 		if (*s->str != '=')
20451396Sbostic 			err("misplaced equivalence equals sign");
20551396Sbostic 	} else {
20651396Sbostic 		val[0] = s->str[0];
20751396Sbostic 		if (s->str[1] != '=')
20851396Sbostic 			err("misplaced equivalence equals sign");
20951396Sbostic 	}
21051396Sbostic 	s->str += 2;
21151396Sbostic 	s->cnt = 0;
21251396Sbostic 	s->state = SET;
21351396Sbostic 	s->set = val;
21451396Sbostic }
21551396Sbostic 
21651396Sbostic static int
21751396Sbostic genrange(s)
21851396Sbostic 	STR *s;
21951396Sbostic {
22051396Sbostic 	int stopval;
22151396Sbostic 	char *savestart;
22251396Sbostic 
22351396Sbostic 	savestart = s->str;
22451396Sbostic 	stopval = *++s->str == '\\' ? backslash(s) : *s->str;
22551396Sbostic 	if (stopval < s->lastch) {
22651396Sbostic 		s->str = savestart;
22751396Sbostic 		return (0);
22851396Sbostic 	}
22951396Sbostic 	s->cnt = stopval - s->lastch + 1;
23051396Sbostic 	s->state = RANGE;
23151396Sbostic 	--s->lastch;
23251396Sbostic 	return (1);
23351396Sbostic }
23451396Sbostic 
23551396Sbostic static void
23651396Sbostic genseq(s)
23751396Sbostic 	STR *s;
23851396Sbostic {
23951396Sbostic 	char *ep;
24051396Sbostic 
24151396Sbostic 	if (!(s->type & T_SEQ))
24251396Sbostic 		err("sequences only valid in string1");
24351396Sbostic 
24451396Sbostic 	if (*s->str == '\\')
24551396Sbostic 		s->lastch = backslash(s);
24651396Sbostic 	else
24751396Sbostic 		s->lastch = *s->str++;
24851396Sbostic 	if (*s->str != '*')
24951396Sbostic 		err("misplaced sequence asterisk");
25051396Sbostic 
25151396Sbostic 	switch (*++s->str) {
25251396Sbostic 	case '\\':
25351396Sbostic 		s->cnt = backslash(s);
25451396Sbostic 		break;
25551396Sbostic 	case ']':
25651396Sbostic 		s->cnt = 0;
25751396Sbostic 		++s->str;
25851396Sbostic 		break;
25951396Sbostic 	default:
26051396Sbostic 		if (isdigit(*s->str)) {
26151396Sbostic 			s->cnt = strtol(s->str, &ep, 0);
26251396Sbostic 			if (*ep == ']') {
26351396Sbostic 				s->str = ep + 1;
26451396Sbostic 				break;
26551396Sbostic 			}
26651396Sbostic 		}
26751396Sbostic 		err("illegal sequence count");
26851396Sbostic 		/* NOTREACHED */
26951396Sbostic 	}
27051396Sbostic 
27151396Sbostic 	s->state = s->cnt ? SEQUENCE : INFINITE;
27251396Sbostic }
27351396Sbostic 
27451396Sbostic /* Use the #defines here, DON'T use them above. */
27551396Sbostic #include <ctype.h>
27651396Sbostic 
27751396Sbostic /*
27851396Sbostic  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
27951396Sbostic  * an escape code or a literal character.
28051396Sbostic  */
28151396Sbostic static int
28251396Sbostic backslash(s)
28351396Sbostic 	register STR *s;
28451396Sbostic {
28551396Sbostic 	register int ch, cnt, val;
28651396Sbostic 
28751396Sbostic 	for (cnt = val = 0;;) {
28851396Sbostic 		ch = *++s->str;
28951396Sbostic 		if (!isascii(ch) || !isdigit(ch))
29051396Sbostic 			break;
29151396Sbostic 		val = val * 8 + ch - '0';
29251396Sbostic 		if (++cnt == 3)
29351396Sbostic 			break;
29451396Sbostic 	}
29551396Sbostic 	if (cnt)
29651396Sbostic 		return (val);
29751396Sbostic 	++s->str;
29851396Sbostic 	switch (ch) {
29951396Sbostic 		case 'a':			/* escape characters */
30051396Sbostic 			return ('\7');
30151396Sbostic 		case 'b':
30251396Sbostic 			return ('\b');
30351396Sbostic 		case 'f':
30451396Sbostic 			return ('\f');
30551396Sbostic 		case 'n':
30651396Sbostic 			return ('\n');
30751396Sbostic 		case 'r':
30851396Sbostic 			return ('\r');
30951396Sbostic 		case 't':
31051396Sbostic 			return ('\t');
31151396Sbostic 		case 'v':
31251396Sbostic 			return ('\13');
31351396Sbostic 		case '\0':			/*  \" -> \ */
31451396Sbostic 			s->state = EOS;
31551396Sbostic 			return ('\\');
31651396Sbostic 		default:			/* \x" -> x */
31751396Sbostic 			return (ch);
31851396Sbostic 	}
31951396Sbostic }
320