151396Sbostic /*-
262321Sbostic * Copyright (c) 1991, 1993
362321Sbostic * The Regents of the University of California. All rights reserved.
451396Sbostic *
551396Sbostic * %sccs.include.redist.c%
651396Sbostic */
751396Sbostic
851396Sbostic #ifndef lint
9*69028Sbostic static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 04/28/95";
1051396Sbostic #endif /* not lint */
1151396Sbostic
1251396Sbostic #include <sys/cdefs.h>
1351396Sbostic #include <sys/types.h>
1458372Sbostic
1551396Sbostic #include <errno.h>
1658372Sbostic #include <stddef.h>
1751396Sbostic #include <stdio.h>
1851396Sbostic #include <stdlib.h>
1951396Sbostic #include <string.h>
2058372Sbostic
2151396Sbostic #include "extern.h"
2251396Sbostic
2351396Sbostic static int backslash __P((STR *));
2451396Sbostic static int bracket __P((STR *));
2551396Sbostic static int c_class __P((const void *, const void *));
2651396Sbostic static void genclass __P((STR *));
2751396Sbostic static void genequiv __P((STR *));
2851396Sbostic static int genrange __P((STR *));
2951396Sbostic static void genseq __P((STR *));
3051396Sbostic
3151396Sbostic int
next(s)3251396Sbostic next(s)
3351396Sbostic register STR *s;
3451396Sbostic {
3551396Sbostic register int ch;
3651396Sbostic
3751396Sbostic switch (s->state) {
3851396Sbostic case EOS:
3951396Sbostic return (0);
4051396Sbostic case INFINITE:
4151396Sbostic return (1);
4251396Sbostic case NORMAL:
4351399Sbostic switch (ch = *s->str) {
4451396Sbostic case '\0':
4551396Sbostic s->state = EOS;
4651396Sbostic return (0);
4751396Sbostic case '\\':
4851396Sbostic s->lastch = backslash(s);
4951396Sbostic break;
5051396Sbostic case '[':
5151396Sbostic if (bracket(s))
5251396Sbostic return (next(s));
5351396Sbostic /* FALLTHROUGH */
5451396Sbostic default:
5551399Sbostic ++s->str;
5651396Sbostic s->lastch = ch;
5751396Sbostic break;
5851396Sbostic }
5951396Sbostic
6051396Sbostic /* We can start a range at any time. */
6151396Sbostic if (s->str[0] == '-' && genrange(s))
6251396Sbostic return (next(s));
6351396Sbostic return (1);
6451396Sbostic case RANGE:
6551396Sbostic if (s->cnt-- == 0) {
6651396Sbostic s->state = NORMAL;
6751396Sbostic return (next(s));
6851396Sbostic }
6951396Sbostic ++s->lastch;
7051396Sbostic return (1);
7151396Sbostic case SEQUENCE:
7251396Sbostic if (s->cnt-- == 0) {
7351396Sbostic s->state = NORMAL;
7451396Sbostic return (next(s));
7551396Sbostic }
7651396Sbostic return (1);
7751396Sbostic case SET:
7851396Sbostic if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
7951396Sbostic s->state = NORMAL;
8051396Sbostic return (next(s));
8151396Sbostic }
8251396Sbostic return (1);
8351396Sbostic }
8451396Sbostic /* NOTREACHED */
8551396Sbostic }
8651396Sbostic
8751396Sbostic static int
bracket(s)8851396Sbostic bracket(s)
8951396Sbostic register STR *s;
9051396Sbostic {
9151396Sbostic register char *p;
9251396Sbostic
9352217Sbostic switch (s->str[1]) {
9451396Sbostic case ':': /* "[:class:]" */
9552217Sbostic if ((p = strstr(s->str + 2, ":]")) == NULL)
9651396Sbostic return (0);
9751396Sbostic *p = '\0';
9852217Sbostic s->str += 2;
9951396Sbostic genclass(s);
10051396Sbostic s->str = p + 2;
10151396Sbostic return (1);
10251396Sbostic case '=': /* "[=equiv=]" */
10352217Sbostic if ((p = strstr(s->str + 2, "=]")) == NULL)
10451396Sbostic return (0);
10552217Sbostic s->str += 2;
10651396Sbostic genequiv(s);
10751396Sbostic return (1);
10852217Sbostic default: /* "[\###*n]" or "[#*n]" */
10952217Sbostic if ((p = strpbrk(s->str + 2, "*]")) == NULL)
11051396Sbostic return (0);
11151396Sbostic if (p[0] != '*' || index(p, ']') == NULL)
11251396Sbostic return (0);
11352217Sbostic s->str += 1;
11451396Sbostic genseq(s);
11551396Sbostic return (1);
11651396Sbostic }
11751396Sbostic /* NOTREACHED */
11851396Sbostic }
11951396Sbostic
12051396Sbostic int isalnum __P((int)),
12151396Sbostic isalpha __P((int)),
12251396Sbostic isblank __P((int)),
12351396Sbostic isspace __P((int)),
12451396Sbostic iscntrl __P((int)),
12551396Sbostic isdigit __P((int)),
12651396Sbostic isgraph __P((int)),
12751396Sbostic islower __P((int)),
12851396Sbostic isprint __P((int)),
12951396Sbostic ispunct __P((int)),
13051396Sbostic isupper __P((int)),
13151396Sbostic isxdigit __P((int));
13251396Sbostic
13351396Sbostic typedef struct {
13451396Sbostic char *name;
13551396Sbostic int (*func) __P((int));
13651396Sbostic int *set;
13751396Sbostic } CLASS;
13851396Sbostic
13951396Sbostic static CLASS classes[] = {
14051407Sbostic { "alnum", isalnum, },
14151407Sbostic { "alpha", isalpha, },
14251407Sbostic { "blank", isblank, },
14351407Sbostic { "cntrl", iscntrl, },
14451407Sbostic { "digit", isdigit, },
14551407Sbostic { "graph", isgraph, },
14651407Sbostic { "lower", islower, },
14751407Sbostic { "print", isupper, },
14851407Sbostic { "punct", ispunct, },
14951407Sbostic { "space", isspace, },
15051407Sbostic { "upper", isupper, },
15151407Sbostic { "xdigit", isxdigit, },
15251396Sbostic };
15351396Sbostic
15451396Sbostic static void
genclass(s)15551396Sbostic genclass(s)
15651396Sbostic STR *s;
15751396Sbostic {
15851396Sbostic register int cnt, (*func) __P((int));
15951396Sbostic CLASS *cp, tmp;
16051396Sbostic int *p;
16151396Sbostic
16251396Sbostic tmp.name = s->str;
16351396Sbostic if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
16451396Sbostic sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
16551396Sbostic err("unknown class %s", s->str);
16651396Sbostic
16751396Sbostic if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
16851396Sbostic err("%s", strerror(errno));
16951407Sbostic bzero(p, (NCHARS + 1) * sizeof(int));
17051396Sbostic for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
17151396Sbostic if ((func)(cnt))
17251396Sbostic *p++ = cnt;
17351396Sbostic *p = OOBCH;
17451396Sbostic
17551396Sbostic s->cnt = 0;
17651407Sbostic s->state = SET;
17751396Sbostic s->set = cp->set;
17851396Sbostic }
17951396Sbostic
18051396Sbostic static int
c_class(a,b)18151396Sbostic c_class(a, b)
18251396Sbostic const void *a, *b;
18351396Sbostic {
18451396Sbostic return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
18551396Sbostic }
18651396Sbostic
18751396Sbostic /*
18851396Sbostic * English doesn't have any equivalence classes, so for now
18951396Sbostic * we just syntax check and grab the character.
19051396Sbostic */
19151396Sbostic static void
genequiv(s)19251396Sbostic genequiv(s)
19351396Sbostic STR *s;
19451396Sbostic {
19552217Sbostic if (*s->str == '\\') {
19651407Sbostic s->equiv[0] = backslash(s);
19751396Sbostic if (*s->str != '=')
19851396Sbostic err("misplaced equivalence equals sign");
19951396Sbostic } else {
20051407Sbostic s->equiv[0] = s->str[0];
20151396Sbostic if (s->str[1] != '=')
20251396Sbostic err("misplaced equivalence equals sign");
20351396Sbostic }
20451396Sbostic s->str += 2;
20551396Sbostic s->cnt = 0;
20651396Sbostic s->state = SET;
20751407Sbostic s->set = s->equiv;
20851396Sbostic }
20951396Sbostic
21051396Sbostic static int
genrange(s)21151396Sbostic genrange(s)
21251396Sbostic STR *s;
21351396Sbostic {
21451396Sbostic int stopval;
21551396Sbostic char *savestart;
21651396Sbostic
21751396Sbostic savestart = s->str;
218*69028Sbostic stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
219*69028Sbostic if (stopval < (u_char)s->lastch) {
22051396Sbostic s->str = savestart;
22151396Sbostic return (0);
22251396Sbostic }
22351396Sbostic s->cnt = stopval - s->lastch + 1;
22451396Sbostic s->state = RANGE;
22551396Sbostic --s->lastch;
22651396Sbostic return (1);
22751396Sbostic }
22851396Sbostic
22951396Sbostic static void
genseq(s)23051396Sbostic genseq(s)
23151396Sbostic STR *s;
23251396Sbostic {
23351396Sbostic char *ep;
23451396Sbostic
23551407Sbostic if (s->which == STRING1)
23651407Sbostic err("sequences only valid in string2");
23751396Sbostic
23851396Sbostic if (*s->str == '\\')
23951396Sbostic s->lastch = backslash(s);
24051396Sbostic else
24151396Sbostic s->lastch = *s->str++;
24251396Sbostic if (*s->str != '*')
24351396Sbostic err("misplaced sequence asterisk");
24451396Sbostic
24551396Sbostic switch (*++s->str) {
24651396Sbostic case '\\':
24751396Sbostic s->cnt = backslash(s);
24851396Sbostic break;
24951396Sbostic case ']':
25051396Sbostic s->cnt = 0;
25151396Sbostic ++s->str;
25251396Sbostic break;
25351396Sbostic default:
25451396Sbostic if (isdigit(*s->str)) {
25551396Sbostic s->cnt = strtol(s->str, &ep, 0);
25651396Sbostic if (*ep == ']') {
25751396Sbostic s->str = ep + 1;
25851396Sbostic break;
25951396Sbostic }
26051396Sbostic }
26151396Sbostic err("illegal sequence count");
26251396Sbostic /* NOTREACHED */
26351396Sbostic }
26451396Sbostic
26551396Sbostic s->state = s->cnt ? SEQUENCE : INFINITE;
26651396Sbostic }
26751396Sbostic
26851407Sbostic /* Use the #defines isXXX() here, DON'T use them above. */
26951396Sbostic #include <ctype.h>
27051396Sbostic
27151396Sbostic /*
27251396Sbostic * Translate \??? into a character. Up to 3 octal digits, if no digits either
27351396Sbostic * an escape code or a literal character.
27451396Sbostic */
27551396Sbostic static int
backslash(s)27651396Sbostic backslash(s)
27751396Sbostic register STR *s;
27851396Sbostic {
27951396Sbostic register int ch, cnt, val;
28051396Sbostic
28151396Sbostic for (cnt = val = 0;;) {
28251396Sbostic ch = *++s->str;
28351396Sbostic if (!isascii(ch) || !isdigit(ch))
28451396Sbostic break;
28551396Sbostic val = val * 8 + ch - '0';
28658372Sbostic if (++cnt == 3) {
28758372Sbostic ++s->str;
28851396Sbostic break;
28958372Sbostic }
29051396Sbostic }
29157746Sbostic if (cnt)
29257746Sbostic return (val);
29358459Sbostic if (ch != '\0')
29458459Sbostic ++s->str;
29551396Sbostic switch (ch) {
29651396Sbostic case 'a': /* escape characters */
29751396Sbostic return ('\7');
29851396Sbostic case 'b':
29951396Sbostic return ('\b');
30051396Sbostic case 'f':
30151396Sbostic return ('\f');
30251396Sbostic case 'n':
30351396Sbostic return ('\n');
30451396Sbostic case 'r':
30551396Sbostic return ('\r');
30651396Sbostic case 't':
30751396Sbostic return ('\t');
30851396Sbostic case 'v':
30951396Sbostic return ('\13');
31051396Sbostic case '\0': /* \" -> \ */
31151396Sbostic s->state = EOS;
31251396Sbostic return ('\\');
31351396Sbostic default: /* \x" -> x */
31451396Sbostic return (ch);
31551396Sbostic }
31651396Sbostic }
317