1 /* $OpenBSD: str.c,v 1.12 2012/12/05 23:20:26 deraadt Exp $ */ 2 /* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */ 3 4 /*- 5 * Copyright (c) 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/types.h> 34 35 #include <errno.h> 36 #include <stddef.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <ctype.h> 41 #include <err.h> 42 43 #include "extern.h" 44 45 static int backslash(STR *); 46 static int bracket(STR *); 47 static int c_class(const void *, const void *); 48 static void genclass(STR *); 49 static void genequiv(STR *); 50 static int genrange(STR *); 51 static void genseq(STR *); 52 53 int 54 next(s) 55 STR *s; 56 { 57 int ch; 58 59 switch (s->state) { 60 case EOS: 61 return (0); 62 case INFINITE: 63 return (1); 64 case NORMAL: 65 switch (ch = *s->str) { 66 case '\0': 67 s->state = EOS; 68 return (0); 69 case '\\': 70 s->lastch = backslash(s); 71 break; 72 case '[': 73 if (bracket(s)) 74 return (next(s)); 75 /* FALLTHROUGH */ 76 default: 77 ++s->str; 78 s->lastch = ch; 79 break; 80 } 81 82 /* We can start a range at any time. */ 83 if (s->str[0] == '-' && genrange(s)) 84 return (next(s)); 85 return (1); 86 case RANGE: 87 if (s->cnt-- == 0) { 88 s->state = NORMAL; 89 return (next(s)); 90 } 91 ++s->lastch; 92 return (1); 93 case SEQUENCE: 94 if (s->cnt-- == 0) { 95 s->state = NORMAL; 96 return (next(s)); 97 } 98 return (1); 99 case SET: 100 if ((s->lastch = s->set[s->cnt++]) == OOBCH) { 101 s->state = NORMAL; 102 return (next(s)); 103 } 104 return (1); 105 default: 106 return 0; 107 } 108 /* NOTREACHED */ 109 } 110 111 static int 112 bracket(s) 113 STR *s; 114 { 115 char *p; 116 117 switch (s->str[1]) { 118 case ':': /* "[:class:]" */ 119 if ((p = strstr((char *)s->str + 2, ":]")) == NULL) 120 return (0); 121 *p = '\0'; 122 s->str += 2; 123 genclass(s); 124 s->str = (unsigned char *)p + 2; 125 return (1); 126 case '=': /* "[=equiv=]" */ 127 if ((p = strstr((char *)s->str + 2, "=]")) == NULL) 128 return (0); 129 s->str += 2; 130 genequiv(s); 131 return (1); 132 default: /* "[\###*n]" or "[#*n]" */ 133 if ((p = strpbrk((char *)s->str + 2, "*]")) == NULL) 134 return (0); 135 if (p[0] != '*' || strchr(p, ']') == NULL) 136 return (0); 137 s->str += 1; 138 genseq(s); 139 return (1); 140 } 141 /* NOTREACHED */ 142 } 143 144 typedef struct { 145 char *name; 146 int (*func)(int); 147 int *set; 148 } CLASS; 149 150 static CLASS classes[] = { 151 { "alnum", isalnum, }, 152 { "alpha", isalpha, }, 153 { "blank", isblank, }, 154 { "cntrl", iscntrl, }, 155 { "digit", isdigit, }, 156 { "graph", isgraph, }, 157 { "lower", islower, }, 158 { "print", isprint, }, 159 { "punct", ispunct, }, 160 { "space", isspace, }, 161 { "upper", isupper, }, 162 { "xdigit", isxdigit, }, 163 }; 164 165 static void 166 genclass(s) 167 STR *s; 168 { 169 int cnt, (*func)(int); 170 CLASS *cp, tmp; 171 int *p; 172 173 tmp.name = (char *)s->str; 174 if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / 175 sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) 176 errx(1, "unknown class %s", s->str); 177 178 if ((cp->set = p = calloc(NCHARS + 1, sizeof(int))) == NULL) 179 errx(1, "no memory for a class"); 180 for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) 181 if ((func)(cnt)) 182 *p++ = cnt; 183 *p = OOBCH; 184 185 s->cnt = 0; 186 s->state = SET; 187 s->set = cp->set; 188 } 189 190 static int 191 c_class(a, b) 192 const void *a, *b; 193 { 194 return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); 195 } 196 197 /* 198 * English doesn't have any equivalence classes, so for now 199 * we just syntax check and grab the character. 200 */ 201 static void 202 genequiv(s) 203 STR *s; 204 { 205 if (*s->str == '\\') { 206 s->equiv[0] = backslash(s); 207 if (*s->str != '=') 208 errx(1, "misplaced equivalence equals sign"); 209 } else { 210 s->equiv[0] = s->str[0]; 211 if (s->str[1] != '=') 212 errx(1, "misplaced equivalence equals sign"); 213 } 214 s->str += 2; 215 s->cnt = 0; 216 s->state = SET; 217 s->set = s->equiv; 218 } 219 220 static int 221 genrange(s) 222 STR *s; 223 { 224 int stopval; 225 unsigned char *savestart; 226 227 savestart = s->str; 228 stopval = *++s->str == '\\' ? backslash(s) : *s->str++; 229 if (stopval < (u_char)s->lastch) { 230 s->str = savestart; 231 return (0); 232 } 233 s->cnt = stopval - s->lastch + 1; 234 s->state = RANGE; 235 --s->lastch; 236 return (1); 237 } 238 239 static void 240 genseq(s) 241 STR *s; 242 { 243 char *ep; 244 245 if (s->which == STRING1) 246 errx(1, "sequences only valid in string2"); 247 248 if (*s->str == '\\') 249 s->lastch = backslash(s); 250 else 251 s->lastch = *s->str++; 252 if (*s->str != '*') 253 errx(1, "misplaced sequence asterisk"); 254 255 switch (*++s->str) { 256 case '\\': 257 s->cnt = backslash(s); 258 break; 259 case ']': 260 s->cnt = 0; 261 ++s->str; 262 break; 263 default: 264 if (isdigit(*s->str)) { 265 s->cnt = strtol((char *)s->str, &ep, 0); 266 if (*ep == ']') { 267 s->str = (unsigned char *)ep + 1; 268 break; 269 } 270 } 271 errx(1, "illegal sequence count"); 272 /* NOTREACHED */ 273 } 274 275 s->state = s->cnt ? SEQUENCE : INFINITE; 276 } 277 278 /* 279 * Translate \??? into a character. Up to 3 octal digits, if no digits either 280 * an escape code or a literal character. 281 */ 282 static int 283 backslash(s) 284 STR *s; 285 { 286 int ch, cnt, val; 287 288 for (cnt = val = 0;;) { 289 ch = *++s->str; 290 if (!isascii(ch) || !isdigit(ch)) 291 break; 292 val = val * 8 + ch - '0'; 293 if (++cnt == 3) { 294 ++s->str; 295 break; 296 } 297 } 298 if (cnt) 299 return (val); 300 if (ch != '\0') 301 ++s->str; 302 switch (ch) { 303 case 'a': /* escape characters */ 304 return ('\7'); 305 case 'b': 306 return ('\b'); 307 case 'f': 308 return ('\f'); 309 case 'n': 310 return ('\n'); 311 case 'r': 312 return ('\r'); 313 case 't': 314 return ('\t'); 315 case 'v': 316 return ('\13'); 317 case '\0': /* \" -> \ */ 318 s->state = EOS; 319 return ('\\'); 320 default: /* \x" -> x */ 321 return (ch); 322 } 323 } 324