1 /* $NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 9 * Commodore Business Machines. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 #if 0 39 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93"; 40 #else 41 __RCSID("$NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $"); 42 #endif 43 #endif /* not lint */ 44 45 /* 46 * regular expression parser 47 * 48 * external functions and return values are: 49 * rxp_compile(s) 50 * TRUE success 51 * FALSE parse failure; error message will be in char rxperr[] 52 * metas are: 53 * {...} optional pattern, equialent to [...|] 54 * | alternate pattern 55 * [...] pattern delimiters 56 * 57 * rxp_match(s) 58 * TRUE string s matches compiled pattern 59 * FALSE match failure or regexp error 60 * 61 * rxp_expand() 62 * char * reverse-engineered regular expression string 63 * NULL regexp error 64 */ 65 66 #include <stdio.h> 67 #include <stdlib.h> 68 #include <ctype.h> 69 #include "quiz.h" 70 /* regexp tokens, arg */ 71 #define LIT (-1) /* literal character, char */ 72 #define SOT (-2) /* start text anchor, - */ 73 #define EOT (-3) /* end text anchor, - */ 74 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 75 #define GRP_E (-5) /* end group, - */ 76 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 77 #define ALT_E (-7) /* alternate ends, - */ 78 #define END (-8) /* end of regexp, - */ 79 80 typedef short Rxp_t; /* type for regexp tokens */ 81 82 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 83 char rxperr[128]; /* parser error message */ 84 85 static int rxp__compile(const char *, int); 86 static char *rxp__expand(int); 87 static int rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *); 88 89 int 90 rxp_compile(s) 91 const char * s; 92 { 93 return (rxp__compile(s, TRUE)); 94 } 95 96 static int 97 rxp__compile(s, first) 98 const char *s; 99 int first; 100 { 101 static Rxp_t *rp; 102 static const char *sp; 103 Rxp_t *grp_ptr; 104 Rxp_t *alt_ptr; 105 int esc, err; 106 107 esc = 0; 108 if (first) { 109 rp = rxpbuf; 110 sp = s; 111 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 112 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 113 *rp++ = 0; 114 } 115 *rp++ = ALT_S; 116 alt_ptr = rp; 117 *rp++ = 0; 118 for (; *sp; ++sp) { 119 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 120 (void)snprintf(rxperr, sizeof(rxperr), 121 "regular expression too long %s", s); 122 return (FALSE); 123 } 124 if (*sp == ':' && !esc) 125 break; 126 if (esc) { 127 *rp++ = LIT; 128 *rp++ = *sp; 129 esc = 0; 130 } 131 else switch (*sp) { 132 case '\\': 133 esc = 1; 134 break; 135 case '{': 136 case '[': 137 *rp++ = GRP_S; 138 grp_ptr = rp; 139 *rp++ = 0; 140 sp++; 141 if ((err = rxp__compile(s, FALSE)) != TRUE) 142 return (err); 143 *rp++ = GRP_E; 144 *grp_ptr = rp - rxpbuf; 145 break; 146 case '}': 147 case ']': 148 case '|': 149 *rp++ = ALT_E; 150 *alt_ptr = rp - rxpbuf; 151 if (*sp != ']') { 152 *rp++ = ALT_S; 153 alt_ptr = rp; 154 *rp++ = 0; 155 } 156 if (*sp != '|') { 157 if (*sp != ']') { 158 *rp++ = ALT_E; 159 *alt_ptr = rp - rxpbuf; 160 } 161 if (first) { 162 (void)snprintf(rxperr, sizeof(rxperr), 163 "unmatched alternator in regexp %s", 164 s); 165 return (FALSE); 166 } 167 return (TRUE); 168 } 169 break; 170 default: 171 *rp++ = LIT; 172 *rp++ = *sp; 173 esc = 0; 174 break; 175 } 176 } 177 if (!first) { 178 (void)snprintf(rxperr, sizeof(rxperr), 179 "unmatched alternator in regexp %s", s); 180 return (FALSE); 181 } 182 *rp++ = ALT_E; 183 *alt_ptr = rp - rxpbuf; 184 *rp++ = GRP_E; 185 *(rxpbuf + 2) = rp - rxpbuf; 186 *rp++ = EOT; 187 *rp = END; 188 return (TRUE); 189 } 190 191 /* 192 * match string against compiled regular expression 193 */ 194 int 195 rxp_match(s) 196 const char * s; 197 { 198 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 199 } 200 201 static int 202 rxp__match(s, first, j_succ, j_fail, sp_fail) 203 const char *s; 204 int first; 205 Rxp_t *j_succ; /* jump here on successful alt match */ 206 Rxp_t *j_fail; /* jump here on failed match */ 207 const char *sp_fail; /* reset sp to here on failed match */ 208 { 209 static Rxp_t *rp; 210 static const char *sp; 211 int ch; 212 Rxp_t *grp_end = NULL; 213 214 if (first) { 215 rp = rxpbuf; 216 sp = s; 217 } 218 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 219 switch(*rp) { 220 case LIT: 221 rp++; 222 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 223 if (ch != *sp++) { 224 rp = j_fail; 225 sp = sp_fail; 226 return (FALSE); 227 } 228 rp++; 229 break; 230 case SOT: 231 if (sp != s) 232 return (FALSE); 233 rp++; 234 break; 235 case EOT: 236 if (*sp != 0) 237 return (FALSE); 238 rp++; 239 break; 240 case GRP_S: 241 rp++; 242 grp_end = rxpbuf + *rp++; 243 break; 244 case ALT_S: 245 rp++; 246 rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp); 247 break; 248 case ALT_E: 249 rp = j_succ; 250 return (TRUE); 251 case GRP_E: 252 rp = j_fail; 253 sp = sp_fail; 254 return (FALSE); 255 default: 256 abort(); 257 } 258 return (*rp != END ? FALSE : TRUE); 259 } 260 261 /* 262 * Reverse engineer the regular expression, by picking first of all alternates. 263 */ 264 char * 265 rxp_expand() 266 { 267 return (rxp__expand(TRUE)); 268 } 269 270 static char * 271 rxp__expand(first) 272 int first; 273 { 274 static char buf[RXP_LINE_SZ/2]; 275 static Rxp_t *rp; 276 static char *bp; 277 Rxp_t *grp_ptr; 278 char *err; 279 280 if (first) { 281 rp = rxpbuf; 282 bp = buf; 283 } 284 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 285 switch(*rp) { 286 case LIT: 287 rp++; 288 *bp++ = *rp++; 289 break; 290 case GRP_S: 291 rp++; 292 grp_ptr = rxpbuf + *rp; 293 rp++; 294 if ((err = rxp__expand(FALSE)) == NULL) 295 return (err); 296 rp = grp_ptr; 297 break; 298 case ALT_E: 299 return (buf); 300 case ALT_S: 301 rp++; 302 /* FALLTHROUGH */ 303 case SOT: 304 case EOT: 305 case GRP_E: 306 rp++; 307 break; 308 default: 309 return (NULL); 310 } 311 if (first) { 312 if (*rp != END) 313 return (NULL); 314 *bp = '\0'; 315 } 316 return (buf); 317 } 318