151611Sbostic /*-
2*60836Sbostic * Copyright (c) 1991, 1993
3*60836Sbostic * The Regents of the University of California. All rights reserved.
451611Sbostic *
551611Sbostic * This code is derived from software contributed to Berkeley by
652145Sbostic * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
752145Sbostic * Commodore Business Machines.
851611Sbostic *
951611Sbostic * %sccs.include.redist.c%
1051611Sbostic */
1151611Sbostic
1251611Sbostic #ifndef lint
13*60836Sbostic static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 05/31/93";
1451611Sbostic #endif /* not lint */
1551611Sbostic
1651611Sbostic /*
1751611Sbostic * regular expression parser
1851611Sbostic *
1951611Sbostic * external functions and return values are:
2051611Sbostic * rxp_compile(s)
2151611Sbostic * TRUE success
2251611Sbostic * FALSE parse failure; error message will be in char rxperr[]
2351611Sbostic * metas are:
2451611Sbostic * {...} optional pattern, equialent to [...|]
2551611Sbostic * | alternate pattern
2651611Sbostic * [...] pattern delimiters
2751611Sbostic *
2851611Sbostic * rxp_match(s)
2951611Sbostic * TRUE string s matches compiled pattern
3051611Sbostic * FALSE match failure or regexp error
3151611Sbostic *
3251611Sbostic * rxp_expand()
3351611Sbostic * char * reverse-engineered regular expression string
3451611Sbostic * NULL regexp error
3551611Sbostic */
3651611Sbostic
3751611Sbostic #include <stdio.h>
3851611Sbostic #include <ctype.h>
3951611Sbostic #include "quiz.h"
4051611Sbostic /* regexp tokens, arg */
4151611Sbostic #define LIT (-1) /* literal character, char */
4251611Sbostic #define SOT (-2) /* start text anchor, - */
4351611Sbostic #define EOT (-3) /* end text anchor, - */
4451611Sbostic #define GRP_S (-4) /* start alternate grp, ptr_to_end */
4551611Sbostic #define GRP_E (-5) /* end group, - */
4651611Sbostic #define ALT_S (-6) /* alternate starts, ptr_to_next */
4751611Sbostic #define ALT_E (-7) /* alternate ends, - */
4851611Sbostic #define END (-8) /* end of regexp, - */
4951611Sbostic
5051611Sbostic typedef short Rxp_t; /* type for regexp tokens */
5151611Sbostic
5251611Sbostic static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
5351611Sbostic char rxperr[128]; /* parser error message */
5451611Sbostic
5557829Storek static int rxp__compile __P((char *, int));
5657829Storek static char *rxp__expand __P((int));
5757829Storek static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
5851611Sbostic
5951611Sbostic int
rxp_compile(s)6051611Sbostic rxp_compile(s)
6151611Sbostic register char * s;
6251611Sbostic {
6351611Sbostic return (rxp__compile(s, TRUE));
6451611Sbostic }
6551611Sbostic
6651611Sbostic static int
rxp__compile(s,first)6751611Sbostic rxp__compile(s, first)
6851611Sbostic register char *s;
6951611Sbostic int first;
7051611Sbostic {
7151611Sbostic static Rxp_t *rp;
7251611Sbostic static char *sp;
7351611Sbostic Rxp_t *grp_ptr;
7451611Sbostic Rxp_t *alt_ptr;
7551611Sbostic int esc, err;
7651611Sbostic
7751611Sbostic esc = 0;
7851611Sbostic if (first) {
7951611Sbostic rp = rxpbuf;
8051611Sbostic sp = s;
8151611Sbostic *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
8251611Sbostic *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
8351611Sbostic *rp++ = 0;
8451611Sbostic }
8551611Sbostic *rp++ = ALT_S;
8651611Sbostic alt_ptr = rp;
8751611Sbostic *rp++ = 0;
8851611Sbostic for (; *sp; ++sp) {
8951611Sbostic if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
9051611Sbostic (void)snprintf(rxperr, sizeof(rxperr),
9151611Sbostic "regular expression too long %s", s);
9251611Sbostic return (FALSE);
9351611Sbostic }
9451611Sbostic if (*sp == ':' && !esc)
9551611Sbostic break;
9651611Sbostic if (esc) {
9751611Sbostic *rp++ = LIT;
9851611Sbostic *rp++ = *sp;
9951611Sbostic esc = 0;
10051611Sbostic }
10151611Sbostic else switch (*sp) {
10251611Sbostic case '\\':
10351611Sbostic esc = 1;
10451611Sbostic break;
10551611Sbostic case '{':
10651611Sbostic case '[':
10751611Sbostic *rp++ = GRP_S;
10851611Sbostic grp_ptr = rp;
10951611Sbostic *rp++ = 0;
11051611Sbostic sp++;
11151611Sbostic if ((err = rxp__compile(s, FALSE)) != TRUE)
11251611Sbostic return (err);
11351611Sbostic *rp++ = GRP_E;
11451611Sbostic *grp_ptr = rp - rxpbuf;
11551611Sbostic break;
11651611Sbostic case '}':
11751611Sbostic case ']':
11851611Sbostic case '|':
11951611Sbostic *rp++ = ALT_E;
12051611Sbostic *alt_ptr = rp - rxpbuf;
12151611Sbostic if (*sp != ']') {
12251611Sbostic *rp++ = ALT_S;
12351611Sbostic alt_ptr = rp;
12451611Sbostic *rp++ = 0;
12551611Sbostic }
12651611Sbostic if (*sp != '|') {
12751611Sbostic if (*sp != ']') {
12851611Sbostic *rp++ = ALT_E;
12951611Sbostic *alt_ptr = rp - rxpbuf;
13051611Sbostic }
13151611Sbostic if (first) {
13251611Sbostic (void)snprintf(rxperr, sizeof(rxperr),
13351611Sbostic "unmatched alternator in regexp %s",
13451611Sbostic s);
13551611Sbostic return (FALSE);
13651611Sbostic }
13751611Sbostic return (TRUE);
13851611Sbostic }
13951611Sbostic break;
14051611Sbostic default:
14151611Sbostic *rp++ = LIT;
14251611Sbostic *rp++ = *sp;
14351611Sbostic esc = 0;
14451611Sbostic break;
14551611Sbostic }
14651611Sbostic }
14751611Sbostic if (!first) {
14851611Sbostic (void)snprintf(rxperr, sizeof(rxperr),
14951611Sbostic "unmatched alternator in regexp %s", s);
15051611Sbostic return (FALSE);
15151611Sbostic }
15251611Sbostic *rp++ = ALT_E;
15351611Sbostic *alt_ptr = rp - rxpbuf;
15451611Sbostic *rp++ = GRP_E;
15551611Sbostic *(rxpbuf + 2) = rp - rxpbuf;
15651611Sbostic *rp++ = EOT;
15751611Sbostic *rp = END;
15851611Sbostic return (TRUE);
15951611Sbostic }
16051611Sbostic
16151611Sbostic /*
16251611Sbostic * match string against compiled regular expression
16351611Sbostic */
16451611Sbostic int
rxp_match(s)16551611Sbostic rxp_match(s)
16651611Sbostic register char * s;
16751611Sbostic {
16851611Sbostic return (rxp__match(s, TRUE, NULL, NULL, NULL));
16951611Sbostic }
17051611Sbostic
17151611Sbostic static int
rxp__match(s,first,j_succ,j_fail,sp_fail)17251611Sbostic rxp__match(s, first, j_succ, j_fail, sp_fail)
17351611Sbostic char *s;
17451611Sbostic int first;
17551611Sbostic Rxp_t *j_succ; /* jump here on successful alt match */
17651611Sbostic Rxp_t *j_fail; /* jump here on failed match */
17751611Sbostic char *sp_fail; /* reset sp to here on failed match */
17851611Sbostic {
17951611Sbostic static Rxp_t *rp;
18051611Sbostic static char *sp;
18151611Sbostic register int ch;
18251611Sbostic Rxp_t *grp_end;
18351611Sbostic int err;
18451611Sbostic
18551611Sbostic if (first) {
18651611Sbostic rp = rxpbuf;
18751611Sbostic sp = s;
18851611Sbostic }
18951611Sbostic while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
19051611Sbostic switch(*rp) {
19151611Sbostic case LIT:
19251611Sbostic rp++;
19351611Sbostic ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
19451611Sbostic if (ch != *sp++) {
19551611Sbostic rp = j_fail;
19651611Sbostic sp = sp_fail;
19751611Sbostic return (TRUE);
19851611Sbostic }
19951611Sbostic rp++;
20051611Sbostic break;
20151611Sbostic case SOT:
20251611Sbostic if (sp != s)
20351611Sbostic return (FALSE);
20451611Sbostic rp++;
20551611Sbostic break;
20651611Sbostic case EOT:
20751611Sbostic if (*sp != 0)
20851611Sbostic return (FALSE);
20951611Sbostic rp++;
21051611Sbostic break;
21151611Sbostic case GRP_S:
21251611Sbostic rp++;
21351611Sbostic grp_end = rxpbuf + *rp++;
21451611Sbostic break;
21551611Sbostic case ALT_S:
21651611Sbostic rp++;
21751611Sbostic if ((err = rxp__match(sp,
21851611Sbostic FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
21951611Sbostic return (err);
22051611Sbostic break;
22151611Sbostic case ALT_E:
22251611Sbostic rp = j_succ;
22351611Sbostic return (TRUE);
22451611Sbostic case GRP_E:
22551611Sbostic default:
22651611Sbostic return (FALSE);
22751611Sbostic }
22851611Sbostic return (*rp != END ? FALSE : TRUE);
22951611Sbostic }
23051611Sbostic
23151611Sbostic /*
23251611Sbostic * Reverse engineer the regular expression, by picking first of all alternates.
23351611Sbostic */
23451611Sbostic char *
rxp_expand()23551611Sbostic rxp_expand()
23651611Sbostic {
23751611Sbostic return (rxp__expand(TRUE));
23851611Sbostic }
23951611Sbostic
24051611Sbostic static char *
rxp__expand(first)24151611Sbostic rxp__expand(first)
24251611Sbostic int first;
24351611Sbostic {
24451611Sbostic static char buf[RXP_LINE_SZ/2];
24551611Sbostic static Rxp_t *rp;
24651611Sbostic static char *bp;
24751611Sbostic Rxp_t *grp_ptr;
24851611Sbostic char *err;
24951611Sbostic
25051611Sbostic if (first) {
25151611Sbostic rp = rxpbuf;
25251611Sbostic bp = buf;
25351611Sbostic }
25451611Sbostic while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
25551611Sbostic switch(*rp) {
25651611Sbostic case LIT:
25751611Sbostic rp++;
25851611Sbostic *bp++ = *rp++;
25951611Sbostic break;
26051611Sbostic case GRP_S:
26151611Sbostic rp++;
26251611Sbostic grp_ptr = rxpbuf + *rp;
26351611Sbostic rp++;
26451611Sbostic if ((err = rxp__expand(FALSE)) == NULL)
26551611Sbostic return (err);
26651611Sbostic rp = grp_ptr;
26751611Sbostic break;
26851611Sbostic case ALT_E:
26951611Sbostic return (buf);
27051611Sbostic case ALT_S:
27151611Sbostic rp++;
27251611Sbostic /* FALLTHROUGH */
27351611Sbostic case SOT:
27451611Sbostic case EOT:
27551611Sbostic case GRP_E:
27651611Sbostic rp++;
27751611Sbostic break;
27851611Sbostic default:
27951611Sbostic return (NULL);
28051611Sbostic }
28151611Sbostic if (first) {
28251611Sbostic if (*rp != END)
28351611Sbostic return (NULL);
28451611Sbostic *bp = '\0';
28551611Sbostic }
28651611Sbostic return (buf);
28751611Sbostic }
288