1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 */
7
8 #ifndef lint
9 static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 04/28/95";
10 #endif /* not lint */
11
12 #include <sys/cdefs.h>
13 #include <sys/types.h>
14
15 #include <errno.h>
16 #include <stddef.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20
21 #include "extern.h"
22
23 static int backslash __P((STR *));
24 static int bracket __P((STR *));
25 static int c_class __P((const void *, const void *));
26 static void genclass __P((STR *));
27 static void genequiv __P((STR *));
28 static int genrange __P((STR *));
29 static void genseq __P((STR *));
30
31 int
next(s)32 next(s)
33 register STR *s;
34 {
35 register int ch;
36
37 switch (s->state) {
38 case EOS:
39 return (0);
40 case INFINITE:
41 return (1);
42 case NORMAL:
43 switch (ch = *s->str) {
44 case '\0':
45 s->state = EOS;
46 return (0);
47 case '\\':
48 s->lastch = backslash(s);
49 break;
50 case '[':
51 if (bracket(s))
52 return (next(s));
53 /* FALLTHROUGH */
54 default:
55 ++s->str;
56 s->lastch = ch;
57 break;
58 }
59
60 /* We can start a range at any time. */
61 if (s->str[0] == '-' && genrange(s))
62 return (next(s));
63 return (1);
64 case RANGE:
65 if (s->cnt-- == 0) {
66 s->state = NORMAL;
67 return (next(s));
68 }
69 ++s->lastch;
70 return (1);
71 case SEQUENCE:
72 if (s->cnt-- == 0) {
73 s->state = NORMAL;
74 return (next(s));
75 }
76 return (1);
77 case SET:
78 if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
79 s->state = NORMAL;
80 return (next(s));
81 }
82 return (1);
83 }
84 /* NOTREACHED */
85 }
86
87 static int
bracket(s)88 bracket(s)
89 register STR *s;
90 {
91 register char *p;
92
93 switch (s->str[1]) {
94 case ':': /* "[:class:]" */
95 if ((p = strstr(s->str + 2, ":]")) == NULL)
96 return (0);
97 *p = '\0';
98 s->str += 2;
99 genclass(s);
100 s->str = p + 2;
101 return (1);
102 case '=': /* "[=equiv=]" */
103 if ((p = strstr(s->str + 2, "=]")) == NULL)
104 return (0);
105 s->str += 2;
106 genequiv(s);
107 return (1);
108 default: /* "[\###*n]" or "[#*n]" */
109 if ((p = strpbrk(s->str + 2, "*]")) == NULL)
110 return (0);
111 if (p[0] != '*' || index(p, ']') == NULL)
112 return (0);
113 s->str += 1;
114 genseq(s);
115 return (1);
116 }
117 /* NOTREACHED */
118 }
119
120 int isalnum __P((int)),
121 isalpha __P((int)),
122 isblank __P((int)),
123 isspace __P((int)),
124 iscntrl __P((int)),
125 isdigit __P((int)),
126 isgraph __P((int)),
127 islower __P((int)),
128 isprint __P((int)),
129 ispunct __P((int)),
130 isupper __P((int)),
131 isxdigit __P((int));
132
133 typedef struct {
134 char *name;
135 int (*func) __P((int));
136 int *set;
137 } CLASS;
138
139 static CLASS classes[] = {
140 { "alnum", isalnum, },
141 { "alpha", isalpha, },
142 { "blank", isblank, },
143 { "cntrl", iscntrl, },
144 { "digit", isdigit, },
145 { "graph", isgraph, },
146 { "lower", islower, },
147 { "print", isupper, },
148 { "punct", ispunct, },
149 { "space", isspace, },
150 { "upper", isupper, },
151 { "xdigit", isxdigit, },
152 };
153
154 static void
genclass(s)155 genclass(s)
156 STR *s;
157 {
158 register int cnt, (*func) __P((int));
159 CLASS *cp, tmp;
160 int *p;
161
162 tmp.name = s->str;
163 if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
164 sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
165 err("unknown class %s", s->str);
166
167 if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
168 err("%s", strerror(errno));
169 bzero(p, (NCHARS + 1) * sizeof(int));
170 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
171 if ((func)(cnt))
172 *p++ = cnt;
173 *p = OOBCH;
174
175 s->cnt = 0;
176 s->state = SET;
177 s->set = cp->set;
178 }
179
180 static int
c_class(a,b)181 c_class(a, b)
182 const void *a, *b;
183 {
184 return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
185 }
186
187 /*
188 * English doesn't have any equivalence classes, so for now
189 * we just syntax check and grab the character.
190 */
191 static void
genequiv(s)192 genequiv(s)
193 STR *s;
194 {
195 if (*s->str == '\\') {
196 s->equiv[0] = backslash(s);
197 if (*s->str != '=')
198 err("misplaced equivalence equals sign");
199 } else {
200 s->equiv[0] = s->str[0];
201 if (s->str[1] != '=')
202 err("misplaced equivalence equals sign");
203 }
204 s->str += 2;
205 s->cnt = 0;
206 s->state = SET;
207 s->set = s->equiv;
208 }
209
210 static int
genrange(s)211 genrange(s)
212 STR *s;
213 {
214 int stopval;
215 char *savestart;
216
217 savestart = s->str;
218 stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
219 if (stopval < (u_char)s->lastch) {
220 s->str = savestart;
221 return (0);
222 }
223 s->cnt = stopval - s->lastch + 1;
224 s->state = RANGE;
225 --s->lastch;
226 return (1);
227 }
228
229 static void
genseq(s)230 genseq(s)
231 STR *s;
232 {
233 char *ep;
234
235 if (s->which == STRING1)
236 err("sequences only valid in string2");
237
238 if (*s->str == '\\')
239 s->lastch = backslash(s);
240 else
241 s->lastch = *s->str++;
242 if (*s->str != '*')
243 err("misplaced sequence asterisk");
244
245 switch (*++s->str) {
246 case '\\':
247 s->cnt = backslash(s);
248 break;
249 case ']':
250 s->cnt = 0;
251 ++s->str;
252 break;
253 default:
254 if (isdigit(*s->str)) {
255 s->cnt = strtol(s->str, &ep, 0);
256 if (*ep == ']') {
257 s->str = ep + 1;
258 break;
259 }
260 }
261 err("illegal sequence count");
262 /* NOTREACHED */
263 }
264
265 s->state = s->cnt ? SEQUENCE : INFINITE;
266 }
267
268 /* Use the #defines isXXX() here, DON'T use them above. */
269 #include <ctype.h>
270
271 /*
272 * Translate \??? into a character. Up to 3 octal digits, if no digits either
273 * an escape code or a literal character.
274 */
275 static int
backslash(s)276 backslash(s)
277 register STR *s;
278 {
279 register int ch, cnt, val;
280
281 for (cnt = val = 0;;) {
282 ch = *++s->str;
283 if (!isascii(ch) || !isdigit(ch))
284 break;
285 val = val * 8 + ch - '0';
286 if (++cnt == 3) {
287 ++s->str;
288 break;
289 }
290 }
291 if (cnt)
292 return (val);
293 if (ch != '\0')
294 ++s->str;
295 switch (ch) {
296 case 'a': /* escape characters */
297 return ('\7');
298 case 'b':
299 return ('\b');
300 case 'f':
301 return ('\f');
302 case 'n':
303 return ('\n');
304 case 'r':
305 return ('\r');
306 case 't':
307 return ('\t');
308 case 'v':
309 return ('\13');
310 case '\0': /* \" -> \ */
311 s->state = EOS;
312 return ('\\');
313 default: /* \x" -> x */
314 return (ch);
315 }
316 }
317