151611Sbostic /*- 251611Sbostic * Copyright (c) 1991 The Regents of the University of California. 351611Sbostic * All rights reserved. 451611Sbostic * 551611Sbostic * This code is derived from software contributed to Berkeley by 652145Sbostic * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 752145Sbostic * Commodore Business Machines. 851611Sbostic * 951611Sbostic * %sccs.include.redist.c% 1051611Sbostic */ 1151611Sbostic 1251611Sbostic #ifndef lint 13*57829Storek static char sccsid[] = "@(#)rxp.c 5.3 (Berkeley) 02/03/93"; 1451611Sbostic #endif /* not lint */ 1551611Sbostic 1651611Sbostic /* 1751611Sbostic * regular expression parser 1851611Sbostic * 1951611Sbostic * external functions and return values are: 2051611Sbostic * rxp_compile(s) 2151611Sbostic * TRUE success 2251611Sbostic * FALSE parse failure; error message will be in char rxperr[] 2351611Sbostic * metas are: 2451611Sbostic * {...} optional pattern, equialent to [...|] 2551611Sbostic * | alternate pattern 2651611Sbostic * [...] pattern delimiters 2751611Sbostic * 2851611Sbostic * rxp_match(s) 2951611Sbostic * TRUE string s matches compiled pattern 3051611Sbostic * FALSE match failure or regexp error 3151611Sbostic * 3251611Sbostic * rxp_expand() 3351611Sbostic * char * reverse-engineered regular expression string 3451611Sbostic * NULL regexp error 3551611Sbostic */ 3651611Sbostic 3751611Sbostic #include <stdio.h> 3851611Sbostic #include <ctype.h> 3951611Sbostic #include "quiz.h" 4051611Sbostic /* regexp tokens, arg */ 4151611Sbostic #define LIT (-1) /* literal character, char */ 4251611Sbostic #define SOT (-2) /* start text anchor, - */ 4351611Sbostic #define EOT (-3) /* end text anchor, - */ 4451611Sbostic #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 4551611Sbostic #define GRP_E (-5) /* end group, - */ 4651611Sbostic #define ALT_S (-6) /* alternate starts, ptr_to_next */ 4751611Sbostic #define ALT_E (-7) /* alternate ends, - */ 4851611Sbostic #define END (-8) /* end of regexp, - */ 4951611Sbostic 5051611Sbostic typedef short Rxp_t; /* type for regexp tokens */ 5151611Sbostic 5251611Sbostic static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 5351611Sbostic char rxperr[128]; /* parser error message */ 5451611Sbostic 55*57829Storek static int rxp__compile __P((char *, int)); 56*57829Storek static char *rxp__expand __P((int)); 57*57829Storek static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *)); 5851611Sbostic 5951611Sbostic int 6051611Sbostic rxp_compile(s) 6151611Sbostic register char * s; 6251611Sbostic { 6351611Sbostic return (rxp__compile(s, TRUE)); 6451611Sbostic } 6551611Sbostic 6651611Sbostic static int 6751611Sbostic rxp__compile(s, first) 6851611Sbostic register char *s; 6951611Sbostic int first; 7051611Sbostic { 7151611Sbostic static Rxp_t *rp; 7251611Sbostic static char *sp; 7351611Sbostic Rxp_t *grp_ptr; 7451611Sbostic Rxp_t *alt_ptr; 7551611Sbostic int esc, err; 7651611Sbostic 7751611Sbostic esc = 0; 7851611Sbostic if (first) { 7951611Sbostic rp = rxpbuf; 8051611Sbostic sp = s; 8151611Sbostic *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 8251611Sbostic *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 8351611Sbostic *rp++ = 0; 8451611Sbostic } 8551611Sbostic *rp++ = ALT_S; 8651611Sbostic alt_ptr = rp; 8751611Sbostic *rp++ = 0; 8851611Sbostic for (; *sp; ++sp) { 8951611Sbostic if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 9051611Sbostic (void)snprintf(rxperr, sizeof(rxperr), 9151611Sbostic "regular expression too long %s", s); 9251611Sbostic return (FALSE); 9351611Sbostic } 9451611Sbostic if (*sp == ':' && !esc) 9551611Sbostic break; 9651611Sbostic if (esc) { 9751611Sbostic *rp++ = LIT; 9851611Sbostic *rp++ = *sp; 9951611Sbostic esc = 0; 10051611Sbostic } 10151611Sbostic else switch (*sp) { 10251611Sbostic case '\\': 10351611Sbostic esc = 1; 10451611Sbostic break; 10551611Sbostic case '{': 10651611Sbostic case '[': 10751611Sbostic *rp++ = GRP_S; 10851611Sbostic grp_ptr = rp; 10951611Sbostic *rp++ = 0; 11051611Sbostic sp++; 11151611Sbostic if ((err = rxp__compile(s, FALSE)) != TRUE) 11251611Sbostic return (err); 11351611Sbostic *rp++ = GRP_E; 11451611Sbostic *grp_ptr = rp - rxpbuf; 11551611Sbostic break; 11651611Sbostic case '}': 11751611Sbostic case ']': 11851611Sbostic case '|': 11951611Sbostic *rp++ = ALT_E; 12051611Sbostic *alt_ptr = rp - rxpbuf; 12151611Sbostic if (*sp != ']') { 12251611Sbostic *rp++ = ALT_S; 12351611Sbostic alt_ptr = rp; 12451611Sbostic *rp++ = 0; 12551611Sbostic } 12651611Sbostic if (*sp != '|') { 12751611Sbostic if (*sp != ']') { 12851611Sbostic *rp++ = ALT_E; 12951611Sbostic *alt_ptr = rp - rxpbuf; 13051611Sbostic } 13151611Sbostic if (first) { 13251611Sbostic (void)snprintf(rxperr, sizeof(rxperr), 13351611Sbostic "unmatched alternator in regexp %s", 13451611Sbostic s); 13551611Sbostic return (FALSE); 13651611Sbostic } 13751611Sbostic return (TRUE); 13851611Sbostic } 13951611Sbostic break; 14051611Sbostic default: 14151611Sbostic *rp++ = LIT; 14251611Sbostic *rp++ = *sp; 14351611Sbostic esc = 0; 14451611Sbostic break; 14551611Sbostic } 14651611Sbostic } 14751611Sbostic if (!first) { 14851611Sbostic (void)snprintf(rxperr, sizeof(rxperr), 14951611Sbostic "unmatched alternator in regexp %s", s); 15051611Sbostic return (FALSE); 15151611Sbostic } 15251611Sbostic *rp++ = ALT_E; 15351611Sbostic *alt_ptr = rp - rxpbuf; 15451611Sbostic *rp++ = GRP_E; 15551611Sbostic *(rxpbuf + 2) = rp - rxpbuf; 15651611Sbostic *rp++ = EOT; 15751611Sbostic *rp = END; 15851611Sbostic return (TRUE); 15951611Sbostic } 16051611Sbostic 16151611Sbostic /* 16251611Sbostic * match string against compiled regular expression 16351611Sbostic */ 16451611Sbostic int 16551611Sbostic rxp_match(s) 16651611Sbostic register char * s; 16751611Sbostic { 16851611Sbostic return (rxp__match(s, TRUE, NULL, NULL, NULL)); 16951611Sbostic } 17051611Sbostic 17151611Sbostic static int 17251611Sbostic rxp__match(s, first, j_succ, j_fail, sp_fail) 17351611Sbostic char *s; 17451611Sbostic int first; 17551611Sbostic Rxp_t *j_succ; /* jump here on successful alt match */ 17651611Sbostic Rxp_t *j_fail; /* jump here on failed match */ 17751611Sbostic char *sp_fail; /* reset sp to here on failed match */ 17851611Sbostic { 17951611Sbostic static Rxp_t *rp; 18051611Sbostic static char *sp; 18151611Sbostic register int ch; 18251611Sbostic Rxp_t *grp_end; 18351611Sbostic int err; 18451611Sbostic 18551611Sbostic if (first) { 18651611Sbostic rp = rxpbuf; 18751611Sbostic sp = s; 18851611Sbostic } 18951611Sbostic while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 19051611Sbostic switch(*rp) { 19151611Sbostic case LIT: 19251611Sbostic rp++; 19351611Sbostic ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 19451611Sbostic if (ch != *sp++) { 19551611Sbostic rp = j_fail; 19651611Sbostic sp = sp_fail; 19751611Sbostic return (TRUE); 19851611Sbostic } 19951611Sbostic rp++; 20051611Sbostic break; 20151611Sbostic case SOT: 20251611Sbostic if (sp != s) 20351611Sbostic return (FALSE); 20451611Sbostic rp++; 20551611Sbostic break; 20651611Sbostic case EOT: 20751611Sbostic if (*sp != 0) 20851611Sbostic return (FALSE); 20951611Sbostic rp++; 21051611Sbostic break; 21151611Sbostic case GRP_S: 21251611Sbostic rp++; 21351611Sbostic grp_end = rxpbuf + *rp++; 21451611Sbostic break; 21551611Sbostic case ALT_S: 21651611Sbostic rp++; 21751611Sbostic if ((err = rxp__match(sp, 21851611Sbostic FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 21951611Sbostic return (err); 22051611Sbostic break; 22151611Sbostic case ALT_E: 22251611Sbostic rp = j_succ; 22351611Sbostic return (TRUE); 22451611Sbostic case GRP_E: 22551611Sbostic default: 22651611Sbostic return (FALSE); 22751611Sbostic } 22851611Sbostic return (*rp != END ? FALSE : TRUE); 22951611Sbostic } 23051611Sbostic 23151611Sbostic /* 23251611Sbostic * Reverse engineer the regular expression, by picking first of all alternates. 23351611Sbostic */ 23451611Sbostic char * 23551611Sbostic rxp_expand() 23651611Sbostic { 23751611Sbostic return (rxp__expand(TRUE)); 23851611Sbostic } 23951611Sbostic 24051611Sbostic static char * 24151611Sbostic rxp__expand(first) 24251611Sbostic int first; 24351611Sbostic { 24451611Sbostic static char buf[RXP_LINE_SZ/2]; 24551611Sbostic static Rxp_t *rp; 24651611Sbostic static char *bp; 24751611Sbostic Rxp_t *grp_ptr; 24851611Sbostic char *err; 24951611Sbostic 25051611Sbostic if (first) { 25151611Sbostic rp = rxpbuf; 25251611Sbostic bp = buf; 25351611Sbostic } 25451611Sbostic while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 25551611Sbostic switch(*rp) { 25651611Sbostic case LIT: 25751611Sbostic rp++; 25851611Sbostic *bp++ = *rp++; 25951611Sbostic break; 26051611Sbostic case GRP_S: 26151611Sbostic rp++; 26251611Sbostic grp_ptr = rxpbuf + *rp; 26351611Sbostic rp++; 26451611Sbostic if ((err = rxp__expand(FALSE)) == NULL) 26551611Sbostic return (err); 26651611Sbostic rp = grp_ptr; 26751611Sbostic break; 26851611Sbostic case ALT_E: 26951611Sbostic return (buf); 27051611Sbostic case ALT_S: 27151611Sbostic rp++; 27251611Sbostic /* FALLTHROUGH */ 27351611Sbostic case SOT: 27451611Sbostic case EOT: 27551611Sbostic case GRP_E: 27651611Sbostic rp++; 27751611Sbostic break; 27851611Sbostic default: 27951611Sbostic return (NULL); 28051611Sbostic } 28151611Sbostic if (first) { 28251611Sbostic if (*rp != END) 28351611Sbostic return (NULL); 28451611Sbostic *bp = '\0'; 28551611Sbostic } 28651611Sbostic return (buf); 28751611Sbostic } 288