155847Sbostic /*-
266362Sbostic * Copyright (c) 1992, 1993, 1994 Henry Spencer.
366362Sbostic * Copyright (c) 1992, 1993, 1994
461162Sbostic * The Regents of the University of California. All rights reserved.
555847Sbostic *
655847Sbostic * This code is derived from software contributed to Berkeley by
7*66406Sbostic * Henry Spencer.
855847Sbostic *
955847Sbostic * %sccs.include.redist.c%
1055847Sbostic *
11*66406Sbostic * @(#)regcomp.c 8.5 (Berkeley) 03/20/94
1255847Sbostic */
1355847Sbostic
1455847Sbostic #if defined(LIBC_SCCS) && !defined(lint)
15*66406Sbostic static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 03/20/94";
1655847Sbostic #endif /* LIBC_SCCS and not lint */
1755847Sbostic
1855847Sbostic #include <sys/types.h>
1955847Sbostic #include <stdio.h>
2055847Sbostic #include <string.h>
2155847Sbostic #include <ctype.h>
2255847Sbostic #include <limits.h>
2355847Sbostic #include <stdlib.h>
2455847Sbostic #include <regex.h>
2555847Sbostic
2655847Sbostic #include "utils.h"
2755847Sbostic #include "regex2.h"
2855847Sbostic
2955847Sbostic #include "cclass.h"
3055847Sbostic #include "cname.h"
3155847Sbostic
3255847Sbostic /*
3355847Sbostic * parse structure, passed up and down to avoid global variables and
3455847Sbostic * other clumsinesses
3555847Sbostic */
3655847Sbostic struct parse {
3760201Sbostic char *next; /* next character in RE */
3860201Sbostic char *end; /* end of string (-> NUL normally) */
3955847Sbostic int error; /* has an error been seen? */
4055847Sbostic sop *strip; /* malloced strip */
4155847Sbostic sopno ssize; /* malloced strip size (allocated) */
4255847Sbostic sopno slen; /* malloced strip length (used) */
4355847Sbostic int ncsalloc; /* number of csets allocated */
4455847Sbostic struct re_guts *g;
4555847Sbostic # define NPAREN 10 /* we need to remember () 1-9 for back refs */
4655847Sbostic sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
4755847Sbostic sopno pend[NPAREN]; /* -> ) ([0] unused) */
4855847Sbostic };
4955847Sbostic
5066362Sbostic /* ========= begin header generated by ./mkh ========= */
5166362Sbostic #ifdef __cplusplus
5266362Sbostic extern "C" {
5366362Sbostic #endif
5456355Sbostic
5566362Sbostic /* === regcomp.c === */
5666385Sbostic static void p_ere __P((struct parse *p, int stop));
5766385Sbostic static void p_ere_exp __P((struct parse *p));
5866385Sbostic static void p_str __P((struct parse *p));
5966385Sbostic static void p_bre __P((struct parse *p, int end1, int end2));
6066385Sbostic static int p_simp_re __P((struct parse *p, int starordinary));
6166385Sbostic static int p_count __P((struct parse *p));
6266385Sbostic static void p_bracket __P((struct parse *p));
6366385Sbostic static void p_b_term __P((struct parse *p, cset *cs));
6466385Sbostic static void p_b_cclass __P((struct parse *p, cset *cs));
6566385Sbostic static void p_b_eclass __P((struct parse *p, cset *cs));
6666385Sbostic static char p_b_symbol __P((struct parse *p));
6766385Sbostic static char p_b_coll_elem __P((struct parse *p, int endc));
6866385Sbostic static char othercase __P((int ch));
6966385Sbostic static void bothcases __P((struct parse *p, int ch));
7066385Sbostic static void ordinary __P((struct parse *p, int ch));
7166385Sbostic static void nonnewline __P((struct parse *p));
7266385Sbostic static void repeat __P((struct parse *p, sopno start, int from, int to));
7366385Sbostic static int seterr __P((struct parse *p, int e));
7466385Sbostic static cset *allocset __P((struct parse *p));
7566385Sbostic static void freeset __P((struct parse *p, cset *cs));
7666385Sbostic static int freezeset __P((struct parse *p, cset *cs));
7766385Sbostic static int firstch __P((struct parse *p, cset *cs));
7866385Sbostic static int nch __P((struct parse *p, cset *cs));
7966385Sbostic static void mcadd __P((struct parse *p, cset *cs, char *cp));
8066385Sbostic static void mcsub __P((cset *cs, char *cp));
8166385Sbostic static int mcin __P((cset *cs, char *cp));
8266385Sbostic static char *mcfind __P((cset *cs, char *cp));
8366385Sbostic static void mcinvert __P((struct parse *p, cset *cs));
8466385Sbostic static void mccase __P((struct parse *p, cset *cs));
8566385Sbostic static int isinsets __P((struct re_guts *g, int c));
8666385Sbostic static int samesets __P((struct re_guts *g, int c1, int c2));
8766385Sbostic static void categorize __P((struct parse *p, struct re_guts *g));
8866385Sbostic static sopno dupl __P((struct parse *p, sopno start, sopno finish));
8966385Sbostic static void doemit __P((struct parse *p, sop op, size_t opnd));
9066385Sbostic static void doinsert __P((struct parse *p, sop op, size_t opnd, sopno pos));
9166385Sbostic static void dofwd __P((struct parse *p, sopno pos, sop value));
9266385Sbostic static void enlarge __P((struct parse *p, sopno size));
9366385Sbostic static void stripsnug __P((struct parse *p, struct re_guts *g));
9466385Sbostic static void findmust __P((struct parse *p, struct re_guts *g));
9566385Sbostic static sopno pluscount __P((struct parse *p, struct re_guts *g));
9660201Sbostic
9766362Sbostic #ifdef __cplusplus
9866362Sbostic }
9966362Sbostic #endif
10066362Sbostic /* ========= end header generated by ./mkh ========= */
10166362Sbostic
10260201Sbostic static char nuls[10]; /* place to point scanner in event of error */
10360201Sbostic
10455847Sbostic /*
10555847Sbostic * macros for use with parse structure
10655847Sbostic * BEWARE: these know that the parse structure is named `p' !!!
10755847Sbostic */
10860201Sbostic #define PEEK() (*p->next)
10960201Sbostic #define PEEK2() (*(p->next+1))
11060201Sbostic #define MORE() (p->next < p->end)
11160201Sbostic #define MORE2() (p->next+1 < p->end)
11260201Sbostic #define SEE(c) (MORE() && PEEK() == (c))
11360201Sbostic #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
11455847Sbostic #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
11555847Sbostic #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
11655847Sbostic #define NEXT() (p->next++)
11755847Sbostic #define NEXT2() (p->next += 2)
11855847Sbostic #define NEXTn(n) (p->next += (n))
11960201Sbostic #define GETNEXT() (*p->next++)
12055847Sbostic #define SETERROR(e) seterr(p, (e))
12155847Sbostic #define REQUIRE(co, e) ((co) || SETERROR(e))
12260201Sbostic #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
12360201Sbostic #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
12460201Sbostic #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
12566362Sbostic #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
12666362Sbostic #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
12760201Sbostic #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
12860201Sbostic #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
12955847Sbostic #define HERE() (p->slen)
13055847Sbostic #define THERE() (p->slen - 1)
13166381Sbostic #define THERETHERE() (p->slen - 2)
13255847Sbostic #define DROP(n) (p->slen -= (n))
13355847Sbostic
13460201Sbostic #ifndef NDEBUG
13560201Sbostic static int never = 0; /* for use in asserts; shuts lint up */
13666362Sbostic #else
13766362Sbostic #define never 0 /* some <assert.h>s have bugs too */
13860201Sbostic #endif
13956357Sbostic
14055847Sbostic /*
14155847Sbostic - regcomp - interface for parser and compilation
14266362Sbostic = extern int regcomp(regex_t *, const char *, int);
14360201Sbostic = #define REG_BASIC 0000
14460201Sbostic = #define REG_EXTENDED 0001
14560201Sbostic = #define REG_ICASE 0002
14660201Sbostic = #define REG_NOSUB 0004
14760201Sbostic = #define REG_NEWLINE 0010
14860201Sbostic = #define REG_NOSPEC 0020
14960201Sbostic = #define REG_PEND 0040
15060201Sbostic = #define REG_DUMP 0200
15155847Sbostic */
15255847Sbostic int /* 0 success, otherwise REG_something */
regcomp(preg,pattern,cflags)15355847Sbostic regcomp(preg, pattern, cflags)
15455847Sbostic regex_t *preg;
15555847Sbostic const char *pattern;
15655847Sbostic int cflags;
15755847Sbostic {
15855847Sbostic struct parse pa;
15955847Sbostic register struct re_guts *g;
16055847Sbostic register struct parse *p = &pa;
16155847Sbostic register int i;
16260201Sbostic register size_t len;
16366362Sbostic #ifdef REDEBUG
16466362Sbostic # define GOODFLAGS(f) (f)
16566362Sbostic #else
16666362Sbostic # define GOODFLAGS(f) ((f)&~REG_DUMP)
16766362Sbostic #endif
16855847Sbostic
16966362Sbostic cflags = GOODFLAGS(cflags);
17060201Sbostic if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
17160201Sbostic return(REG_INVARG);
17260201Sbostic
17360201Sbostic if (cflags®_PEND) {
17460201Sbostic if (preg->re_endp < pattern)
17560201Sbostic return(REG_INVARG);
17660201Sbostic len = preg->re_endp - pattern;
17760201Sbostic } else
17860201Sbostic len = strlen((char *)pattern);
17960201Sbostic
18055847Sbostic /* do the mallocs early so failure handling is easy */
18160201Sbostic g = (struct re_guts *)malloc(sizeof(struct re_guts) +
18260201Sbostic (NC-1)*sizeof(cat_t));
18355847Sbostic if (g == NULL)
18455847Sbostic return(REG_ESPACE);
18560201Sbostic p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
18655847Sbostic p->strip = (sop *)malloc(p->ssize * sizeof(sop));
18755847Sbostic p->slen = 0;
18855847Sbostic if (p->strip == NULL) {
18955847Sbostic free((char *)g);
19055847Sbostic return(REG_ESPACE);
19155847Sbostic }
19255847Sbostic
19355847Sbostic /* set things up */
19455847Sbostic p->g = g;
19560201Sbostic p->next = (char *)pattern; /* convenience; we do not modify it */
19660201Sbostic p->end = p->next + len;
19755847Sbostic p->error = 0;
19855847Sbostic p->ncsalloc = 0;
19955847Sbostic for (i = 0; i < NPAREN; i++) {
20055847Sbostic p->pbegin[i] = 0;
20155847Sbostic p->pend[i] = 0;
20255847Sbostic }
20360201Sbostic g->csetsize = NC;
20455847Sbostic g->sets = NULL;
20555847Sbostic g->setbits = NULL;
20655847Sbostic g->ncsets = 0;
20755847Sbostic g->cflags = cflags;
20855847Sbostic g->iflags = 0;
20960201Sbostic g->nbol = 0;
21060201Sbostic g->neol = 0;
21155847Sbostic g->must = NULL;
21255847Sbostic g->mlen = 0;
21355847Sbostic g->nsub = 0;
21455847Sbostic g->ncategories = 1; /* category 0 is "everything else" */
21560610Sbostic g->categories = &g->catspace[-(CHAR_MIN)];
21660201Sbostic (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
21755847Sbostic g->backrefs = 0;
21855847Sbostic
21955847Sbostic /* do it */
22055847Sbostic EMIT(OEND, 0);
22155847Sbostic g->firststate = THERE();
22255847Sbostic if (cflags®_EXTENDED)
22360201Sbostic p_ere(p, OUT);
22460201Sbostic else if (cflags®_NOSPEC)
22560201Sbostic p_str(p);
22655847Sbostic else
22760201Sbostic p_bre(p, OUT, OUT);
22855847Sbostic EMIT(OEND, 0);
22955847Sbostic g->laststate = THERE();
23055847Sbostic
23155847Sbostic /* tidy up loose ends and fill things in */
23255847Sbostic categorize(p, g);
23355847Sbostic stripsnug(p, g);
23455847Sbostic findmust(p, g);
23555847Sbostic g->nplus = pluscount(p, g);
23655847Sbostic g->magic = MAGIC2;
23755847Sbostic preg->re_nsub = g->nsub;
23855847Sbostic preg->re_g = g;
23955847Sbostic preg->re_magic = MAGIC1;
24056355Sbostic #ifndef REDEBUG
24155847Sbostic /* not debugging, so can't rely on the assert() in regexec() */
24255847Sbostic if (g->iflags&BAD)
24355847Sbostic SETERROR(REG_ASSERT);
24455847Sbostic #endif
24555847Sbostic
24655847Sbostic /* win or lose, we're done */
24755847Sbostic if (p->error != 0) /* lose */
24855847Sbostic regfree(preg);
24955847Sbostic return(p->error);
25055847Sbostic }
25155847Sbostic
25255847Sbostic /*
25355847Sbostic - p_ere - ERE parser top level, concatenation and alternation
25460201Sbostic == static void p_ere(register struct parse *p, int stop);
25555847Sbostic */
25655847Sbostic static void
p_ere(p,stop)25755847Sbostic p_ere(p, stop)
25855847Sbostic register struct parse *p;
25960201Sbostic int stop; /* character this ERE should end at */
26055847Sbostic {
26160201Sbostic register char c;
26255847Sbostic register sopno prevback;
26355847Sbostic register sopno prevfwd;
26455847Sbostic register sopno conc;
26555847Sbostic register int first = 1; /* is this the first alternative? */
26655847Sbostic
26755847Sbostic for (;;) {
26855847Sbostic /* do a bunch of concatenated expressions */
26955847Sbostic conc = HERE();
27060201Sbostic while (MORE() && (c = PEEK()) != '|' && c != stop)
27155847Sbostic p_ere_exp(p);
27255847Sbostic REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
27355847Sbostic
27455847Sbostic if (!EAT('|'))
27555847Sbostic break; /* NOTE BREAK OUT */
27655847Sbostic
27755847Sbostic if (first) {
27855847Sbostic INSERT(OCH_, conc); /* offset is wrong */
27955847Sbostic prevfwd = conc;
28055847Sbostic prevback = conc;
28155847Sbostic first = 0;
28255847Sbostic }
28360201Sbostic ASTERN(OOR1, prevback);
28455847Sbostic prevback = THERE();
28560201Sbostic AHEAD(prevfwd); /* fix previous offset */
28655847Sbostic prevfwd = HERE();
28755847Sbostic EMIT(OOR2, 0); /* offset is very wrong */
28855847Sbostic }
28955847Sbostic
29055847Sbostic if (!first) { /* tail-end fixups */
29160201Sbostic AHEAD(prevfwd);
29260201Sbostic ASTERN(O_CH, prevback);
29355847Sbostic }
29455847Sbostic
29560201Sbostic assert(!MORE() || SEE(stop));
29655847Sbostic }
29755847Sbostic
29855847Sbostic /*
29955847Sbostic - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
30060201Sbostic == static void p_ere_exp(register struct parse *p);
30155847Sbostic */
30255847Sbostic static void
p_ere_exp(p)30355847Sbostic p_ere_exp(p)
30455847Sbostic register struct parse *p;
30555847Sbostic {
30660201Sbostic register char c;
30755847Sbostic register sopno pos;
30855847Sbostic register int count;
30955847Sbostic register int count2;
31055847Sbostic register sopno subno;
31155847Sbostic int wascaret = 0;
31255847Sbostic
31360201Sbostic assert(MORE()); /* caller should have ensured this */
31455847Sbostic c = GETNEXT();
31555847Sbostic
31655847Sbostic pos = HERE();
31755847Sbostic switch (c) {
31855847Sbostic case '(':
31960201Sbostic REQUIRE(MORE(), REG_EPAREN);
32055847Sbostic p->g->nsub++;
32155847Sbostic subno = p->g->nsub;
32255847Sbostic if (subno < NPAREN)
32355847Sbostic p->pbegin[subno] = HERE();
32455847Sbostic EMIT(OLPAREN, subno);
32555847Sbostic if (!SEE(')'))
32655847Sbostic p_ere(p, ')');
32755847Sbostic if (subno < NPAREN) {
32855847Sbostic p->pend[subno] = HERE();
32955847Sbostic assert(p->pend[subno] != 0);
33055847Sbostic }
33155847Sbostic EMIT(ORPAREN, subno);
33255847Sbostic MUSTEAT(')', REG_EPAREN);
33355847Sbostic break;
33455847Sbostic #ifndef POSIX_MISTAKE
33555847Sbostic case ')': /* happens only if no current unmatched ( */
33655847Sbostic /*
33755847Sbostic * You may ask, why the ifndef? Because I didn't notice
33855847Sbostic * this until slightly too late for 1003.2, and none of the
33955847Sbostic * other 1003.2 regular-expression reviewers noticed it at
34055847Sbostic * all. So an unmatched ) is legal POSIX, at least until
34155847Sbostic * we can get it fixed.
34255847Sbostic */
34355847Sbostic SETERROR(REG_EPAREN);
34455847Sbostic break;
34555847Sbostic #endif
34655847Sbostic case '^':
34755847Sbostic EMIT(OBOL, 0);
34855847Sbostic p->g->iflags |= USEBOL;
34960201Sbostic p->g->nbol++;
35055847Sbostic wascaret = 1;
35155847Sbostic break;
35255847Sbostic case '$':
35355847Sbostic EMIT(OEOL, 0);
35455847Sbostic p->g->iflags |= USEEOL;
35560201Sbostic p->g->neol++;
35655847Sbostic break;
35755847Sbostic case '|':
35855847Sbostic SETERROR(REG_EMPTY);
35955847Sbostic break;
36055847Sbostic case '*':
36155847Sbostic case '+':
36255847Sbostic case '?':
36355847Sbostic SETERROR(REG_BADRPT);
36455847Sbostic break;
36555847Sbostic case '.':
36655847Sbostic if (p->g->cflags®_NEWLINE)
36755847Sbostic nonnewline(p);
36855847Sbostic else
36955847Sbostic EMIT(OANY, 0);
37055847Sbostic break;
37155847Sbostic case '[':
37255847Sbostic p_bracket(p);
37355847Sbostic break;
37455847Sbostic case '\\':
37560201Sbostic REQUIRE(MORE(), REG_EESCAPE);
37655847Sbostic c = GETNEXT();
37760201Sbostic ordinary(p, c);
37855847Sbostic break;
37955847Sbostic case '{': /* okay as ordinary except if digit follows */
38060201Sbostic REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
38155847Sbostic /* FALLTHROUGH */
38255847Sbostic default:
38355847Sbostic ordinary(p, c);
38455847Sbostic break;
38555847Sbostic }
38655847Sbostic
38760201Sbostic if (!MORE())
38860201Sbostic return;
38955847Sbostic c = PEEK();
39060201Sbostic /* we call { a repetition if followed by a digit */
39160201Sbostic if (!( c == '*' || c == '+' || c == '?' ||
39260201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) ))
39355847Sbostic return; /* no repetition, we're done */
39455847Sbostic NEXT();
39555847Sbostic
39655847Sbostic REQUIRE(!wascaret, REG_BADRPT);
39755847Sbostic switch (c) {
39855847Sbostic case '*': /* implemented as +? */
39966381Sbostic /* this case does not require the (y|) trick, noKLUDGE */
40055847Sbostic INSERT(OPLUS_, pos);
40160201Sbostic ASTERN(O_PLUS, pos);
40255847Sbostic INSERT(OQUEST_, pos);
40360201Sbostic ASTERN(O_QUEST, pos);
40455847Sbostic break;
40555847Sbostic case '+':
40655847Sbostic INSERT(OPLUS_, pos);
40760201Sbostic ASTERN(O_PLUS, pos);
40855847Sbostic break;
40955847Sbostic case '?':
41066381Sbostic /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
41166381Sbostic INSERT(OCH_, pos); /* offset slightly wrong */
41266381Sbostic ASTERN(OOR1, pos); /* this one's right */
41366381Sbostic AHEAD(pos); /* fix the OCH_ */
41466381Sbostic EMIT(OOR2, 0); /* offset very wrong... */
41566381Sbostic AHEAD(THERE()); /* ...so fix it */
41666381Sbostic ASTERN(O_CH, THERETHERE());
41755847Sbostic break;
41855847Sbostic case '{':
41955847Sbostic count = p_count(p);
42055847Sbostic if (EAT(',')) {
42155847Sbostic if (isdigit(PEEK())) {
42255847Sbostic count2 = p_count(p);
42355847Sbostic REQUIRE(count <= count2, REG_BADBR);
42455847Sbostic } else /* single number with comma */
42555847Sbostic count2 = INFINITY;
42655847Sbostic } else /* just a single number */
42755847Sbostic count2 = count;
42855847Sbostic repeat(p, pos, count, count2);
42955847Sbostic if (!EAT('}')) { /* error heuristics */
43060201Sbostic while (MORE() && PEEK() != '}')
43155847Sbostic NEXT();
43260201Sbostic REQUIRE(MORE(), REG_EBRACE);
43360201Sbostic SETERROR(REG_BADBR);
43455847Sbostic }
43555847Sbostic break;
43655847Sbostic }
43755847Sbostic
43860201Sbostic if (!MORE())
43960201Sbostic return;
44055847Sbostic c = PEEK();
44160201Sbostic if (!( c == '*' || c == '+' || c == '?' ||
44260201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) ) )
44360201Sbostic return;
44460201Sbostic SETERROR(REG_BADRPT);
44555847Sbostic }
44655847Sbostic
44755847Sbostic /*
44860201Sbostic - p_str - string (no metacharacters) "parser"
44960201Sbostic == static void p_str(register struct parse *p);
45060201Sbostic */
45160201Sbostic static void
p_str(p)45260201Sbostic p_str(p)
45360201Sbostic register struct parse *p;
45460201Sbostic {
45560201Sbostic REQUIRE(MORE(), REG_EMPTY);
45660201Sbostic while (MORE())
45760201Sbostic ordinary(p, GETNEXT());
45860201Sbostic }
45960201Sbostic
46060201Sbostic /*
46155847Sbostic - p_bre - BRE parser top level, anchoring and concatenation
46260201Sbostic == static void p_bre(register struct parse *p, register int end1, \
46360201Sbostic == register int end2);
46460201Sbostic * Giving end1 as OUT essentially eliminates the end1/end2 check.
46555847Sbostic *
46656355Sbostic * This implementation is a bit of a kludge, in that a trailing $ is first
46756355Sbostic * taken as an ordinary character and then revised to be an anchor. The
46856355Sbostic * only undesirable side effect is that '$' gets included as a character
46956355Sbostic * category in such cases. This is fairly harmless; not worth fixing.
47060201Sbostic * The amount of lookahead needed to avoid this kludge is excessive.
47155847Sbostic */
47255847Sbostic static void
p_bre(p,end1,end2)47355847Sbostic p_bre(p, end1, end2)
47455847Sbostic register struct parse *p;
47560201Sbostic register int end1; /* first terminating character */
47660201Sbostic register int end2; /* second terminating character */
47755847Sbostic {
47855847Sbostic register sopno start = HERE();
47955847Sbostic register int first = 1; /* first subexpression? */
48056355Sbostic register int wasdollar = 0;
48155847Sbostic
48255847Sbostic if (EAT('^')) {
48355847Sbostic EMIT(OBOL, 0);
48455847Sbostic p->g->iflags |= USEBOL;
48560201Sbostic p->g->nbol++;
48655847Sbostic }
48760201Sbostic while (MORE() && !SEETWO(end1, end2)) {
48855847Sbostic wasdollar = p_simp_re(p, first);
48955847Sbostic first = 0;
49055847Sbostic }
49155847Sbostic if (wasdollar) { /* oops, that was a trailing anchor */
49255847Sbostic DROP(1);
49355847Sbostic EMIT(OEOL, 0);
49455847Sbostic p->g->iflags |= USEEOL;
49560201Sbostic p->g->neol++;
49655847Sbostic }
49755847Sbostic
49855847Sbostic REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
49955847Sbostic }
50055847Sbostic
50155847Sbostic /*
50255847Sbostic - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
50360201Sbostic == static int p_simp_re(register struct parse *p, int starordinary);
50455847Sbostic */
50555847Sbostic static int /* was the simple RE an unbackslashed $? */
p_simp_re(p,starordinary)50655847Sbostic p_simp_re(p, starordinary)
50755847Sbostic register struct parse *p;
50855847Sbostic int starordinary; /* is a leading * an ordinary character? */
50955847Sbostic {
51055847Sbostic register int c;
51155847Sbostic register int count;
51255847Sbostic register int count2;
51355847Sbostic register sopno pos;
51455847Sbostic register int i;
51555847Sbostic register sopno subno;
51655847Sbostic # define BACKSL (1<<CHAR_BIT)
51755847Sbostic
51855847Sbostic pos = HERE(); /* repetion op, if any, covers from here */
51955847Sbostic
52060201Sbostic assert(MORE()); /* caller should have ensured this */
52155847Sbostic c = GETNEXT();
52260201Sbostic if (c == '\\') {
52360201Sbostic REQUIRE(MORE(), REG_EESCAPE);
52460201Sbostic c = BACKSL | (unsigned char)GETNEXT();
52560201Sbostic }
52655847Sbostic switch (c) {
52755847Sbostic case '.':
52855847Sbostic if (p->g->cflags®_NEWLINE)
52955847Sbostic nonnewline(p);
53055847Sbostic else
53155847Sbostic EMIT(OANY, 0);
53255847Sbostic break;
53355847Sbostic case '[':
53455847Sbostic p_bracket(p);
53555847Sbostic break;
53655847Sbostic case BACKSL|'{':
53755847Sbostic SETERROR(REG_BADRPT);
53855847Sbostic break;
53955847Sbostic case BACKSL|'(':
54055847Sbostic p->g->nsub++;
54155847Sbostic subno = p->g->nsub;
54255847Sbostic if (subno < NPAREN)
54355847Sbostic p->pbegin[subno] = HERE();
54455847Sbostic EMIT(OLPAREN, subno);
54560201Sbostic /* the MORE here is an error heuristic */
54660201Sbostic if (MORE() && !SEETWO('\\', ')'))
54755847Sbostic p_bre(p, '\\', ')');
54855847Sbostic if (subno < NPAREN) {
54955847Sbostic p->pend[subno] = HERE();
55055847Sbostic assert(p->pend[subno] != 0);
55155847Sbostic }
55255847Sbostic EMIT(ORPAREN, subno);
55355847Sbostic REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
55455847Sbostic break;
55555847Sbostic case BACKSL|')': /* should not get here -- must be user */
55655847Sbostic case BACKSL|'}':
55755847Sbostic SETERROR(REG_EPAREN);
55855847Sbostic break;
55955847Sbostic case BACKSL|'1':
56055847Sbostic case BACKSL|'2':
56155847Sbostic case BACKSL|'3':
56255847Sbostic case BACKSL|'4':
56355847Sbostic case BACKSL|'5':
56455847Sbostic case BACKSL|'6':
56555847Sbostic case BACKSL|'7':
56655847Sbostic case BACKSL|'8':
56755847Sbostic case BACKSL|'9':
56855847Sbostic i = (c&~BACKSL) - '0';
56955847Sbostic assert(i < NPAREN);
57055847Sbostic if (p->pend[i] != 0) {
57155847Sbostic assert(i <= p->g->nsub);
57255847Sbostic EMIT(OBACK_, i);
57355847Sbostic assert(p->pbegin[i] != 0);
57455847Sbostic assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
57555847Sbostic assert(OP(p->strip[p->pend[i]]) == ORPAREN);
57655847Sbostic (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
57755847Sbostic EMIT(O_BACK, i);
57855847Sbostic } else
57955847Sbostic SETERROR(REG_ESUBREG);
58055847Sbostic p->g->backrefs = 1;
58155847Sbostic break;
58255847Sbostic case '*':
58355847Sbostic REQUIRE(starordinary, REG_BADRPT);
58455847Sbostic /* FALLTHROUGH */
58555847Sbostic default:
58660201Sbostic ordinary(p, c &~ BACKSL);
58755847Sbostic break;
58855847Sbostic }
58955847Sbostic
59055847Sbostic if (EAT('*')) { /* implemented as +? */
59166381Sbostic /* this case does not require the (y|) trick, noKLUDGE */
59255847Sbostic INSERT(OPLUS_, pos);
59360201Sbostic ASTERN(O_PLUS, pos);
59455847Sbostic INSERT(OQUEST_, pos);
59560201Sbostic ASTERN(O_QUEST, pos);
59655847Sbostic } else if (EATTWO('\\', '{')) {
59755847Sbostic count = p_count(p);
59855847Sbostic if (EAT(',')) {
59960201Sbostic if (MORE() && isdigit(PEEK())) {
60055847Sbostic count2 = p_count(p);
60155847Sbostic REQUIRE(count <= count2, REG_BADBR);
60255847Sbostic } else /* single number with comma */
60355847Sbostic count2 = INFINITY;
60455847Sbostic } else /* just a single number */
60555847Sbostic count2 = count;
60655847Sbostic repeat(p, pos, count, count2);
60755847Sbostic if (!EATTWO('\\', '}')) { /* error heuristics */
60860201Sbostic while (MORE() && !SEETWO('\\', '}'))
60955847Sbostic NEXT();
61060201Sbostic REQUIRE(MORE(), REG_EBRACE);
61160201Sbostic SETERROR(REG_BADBR);
61255847Sbostic }
61360201Sbostic } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
61455847Sbostic return(1);
61555847Sbostic
61655847Sbostic return(0);
61755847Sbostic }
61855847Sbostic
61955847Sbostic /*
62055847Sbostic - p_count - parse a repetition count
62160201Sbostic == static int p_count(register struct parse *p);
62255847Sbostic */
62355847Sbostic static int /* the value */
p_count(p)62455847Sbostic p_count(p)
62555847Sbostic register struct parse *p;
62655847Sbostic {
62755847Sbostic register int count = 0;
62855847Sbostic register int ndigits = 0;
62955847Sbostic
63060201Sbostic while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
63155847Sbostic count = count*10 + (GETNEXT() - '0');
63255847Sbostic ndigits++;
63355847Sbostic }
63455847Sbostic
63555847Sbostic REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
63655847Sbostic return(count);
63755847Sbostic }
63855847Sbostic
63955847Sbostic /*
64055847Sbostic - p_bracket - parse a bracketed character list
64160201Sbostic == static void p_bracket(register struct parse *p);
64255847Sbostic *
64355847Sbostic * Note a significant property of this code: if the allocset() did SETERROR,
64455847Sbostic * no set operations are done.
64555847Sbostic */
64655847Sbostic static void
p_bracket(p)64755847Sbostic p_bracket(p)
64855847Sbostic register struct parse *p;
64955847Sbostic {
65060201Sbostic register char c;
65155847Sbostic register cset *cs = allocset(p);
65255847Sbostic register int invert = 0;
65355847Sbostic
65460201Sbostic /* Dept of Truly Sickening Special-Case Kludges */
65560201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
65660201Sbostic EMIT(OBOW, 0);
65760201Sbostic NEXTn(6);
65860201Sbostic return;
65960201Sbostic }
66060201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
66160201Sbostic EMIT(OEOW, 0);
66260201Sbostic NEXTn(6);
66360201Sbostic return;
66460201Sbostic }
66560201Sbostic
66655847Sbostic if (EAT('^'))
66755847Sbostic invert++; /* make note to invert set at end */
66855847Sbostic if (EAT(']'))
66955847Sbostic CHadd(cs, ']');
67056618Sbostic else if (EAT('-'))
67156618Sbostic CHadd(cs, '-');
67260201Sbostic while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
67355847Sbostic p_b_term(p, cs);
67455847Sbostic if (EAT('-'))
67555847Sbostic CHadd(cs, '-');
67655847Sbostic MUSTEAT(']', REG_EBRACK);
67755847Sbostic
67860201Sbostic if (p->error != 0) /* don't mess things up further */
67960201Sbostic return;
68060201Sbostic
68160201Sbostic if (p->g->cflags®_ICASE) {
68255847Sbostic register int i;
68360201Sbostic register int ci;
68455847Sbostic
68555847Sbostic for (i = p->g->csetsize - 1; i >= 0; i--)
68660201Sbostic if (CHIN(cs, i) && isalpha(i)) {
68760201Sbostic ci = othercase(i);
68860201Sbostic if (ci != i)
68960201Sbostic CHadd(cs, ci);
69060201Sbostic }
69160201Sbostic if (cs->multis != NULL)
69260201Sbostic mccase(p, cs);
69360201Sbostic }
69460201Sbostic if (invert) {
69560201Sbostic register int i;
69660201Sbostic
69760201Sbostic for (i = p->g->csetsize - 1; i >= 0; i--)
69855847Sbostic if (CHIN(cs, i))
69955847Sbostic CHsub(cs, i);
70055847Sbostic else
70155847Sbostic CHadd(cs, i);
70255847Sbostic if (p->g->cflags®_NEWLINE)
70355847Sbostic CHsub(cs, '\n');
70455847Sbostic if (cs->multis != NULL)
70555847Sbostic mcinvert(p, cs);
70655847Sbostic }
70760201Sbostic
70855847Sbostic assert(cs->multis == NULL); /* xxx */
70960201Sbostic
71060201Sbostic if (nch(p, cs) == 1) { /* optimize singleton sets */
71160201Sbostic ordinary(p, firstch(p, cs));
71260201Sbostic freeset(p, cs);
71360201Sbostic } else
71460201Sbostic EMIT(OANYOF, freezeset(p, cs));
71555847Sbostic }
71655847Sbostic
71755847Sbostic /*
71855847Sbostic - p_b_term - parse one term of a bracketed character list
71960201Sbostic == static void p_b_term(register struct parse *p, register cset *cs);
72055847Sbostic */
72155847Sbostic static void
p_b_term(p,cs)72255847Sbostic p_b_term(p, cs)
72355847Sbostic register struct parse *p;
72455847Sbostic register cset *cs;
72555847Sbostic {
72660201Sbostic register char c;
72760201Sbostic register char start, finish;
72855847Sbostic register int i;
72955847Sbostic
73055847Sbostic /* classify what we've got */
73160201Sbostic switch ((MORE()) ? PEEK() : '\0') {
73255847Sbostic case '[':
73360201Sbostic c = (MORE2()) ? PEEK2() : '\0';
73455847Sbostic break;
73555847Sbostic case '-':
73655847Sbostic SETERROR(REG_ERANGE);
73755847Sbostic return; /* NOTE RETURN */
73855847Sbostic break;
73955847Sbostic default:
74055847Sbostic c = '\0';
74155847Sbostic break;
74255847Sbostic }
74355847Sbostic
74455847Sbostic switch (c) {
74555847Sbostic case ':': /* character class */
74655847Sbostic NEXT2();
74760201Sbostic REQUIRE(MORE(), REG_EBRACK);
74855847Sbostic c = PEEK();
74955847Sbostic REQUIRE(c != '-' && c != ']', REG_ECTYPE);
75055847Sbostic p_b_cclass(p, cs);
75160201Sbostic REQUIRE(MORE(), REG_EBRACK);
75255847Sbostic REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
75355847Sbostic break;
75455847Sbostic case '=': /* equivalence class */
75555847Sbostic NEXT2();
75660201Sbostic REQUIRE(MORE(), REG_EBRACK);
75755847Sbostic c = PEEK();
75855847Sbostic REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
75955847Sbostic p_b_eclass(p, cs);
76060201Sbostic REQUIRE(MORE(), REG_EBRACK);
76155847Sbostic REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
76255847Sbostic break;
76355847Sbostic default: /* symbol, ordinary character, or range */
76455847Sbostic /* xxx revision needed for multichar stuff */
76555847Sbostic start = p_b_symbol(p);
76660201Sbostic if (SEE('-') && MORE2() && PEEK2() != ']') {
76755847Sbostic /* range */
76855847Sbostic NEXT();
76955847Sbostic if (EAT('-'))
77055847Sbostic finish = '-';
77155847Sbostic else
77255847Sbostic finish = p_b_symbol(p);
77355847Sbostic } else
77455847Sbostic finish = start;
77560201Sbostic /* xxx what about signed chars here... */
77655847Sbostic REQUIRE(start <= finish, REG_ERANGE);
77760201Sbostic for (i = start; i <= finish; i++)
77855847Sbostic CHadd(cs, i);
77955847Sbostic break;
78055847Sbostic }
78155847Sbostic }
78255847Sbostic
78355847Sbostic /*
78455847Sbostic - p_b_cclass - parse a character-class name and deal with it
78560201Sbostic == static void p_b_cclass(register struct parse *p, register cset *cs);
78655847Sbostic */
78755847Sbostic static void
p_b_cclass(p,cs)78855847Sbostic p_b_cclass(p, cs)
78955847Sbostic register struct parse *p;
79055847Sbostic register cset *cs;
79155847Sbostic {
79260201Sbostic register char *sp = p->next;
79355847Sbostic register struct cclass *cp;
79460201Sbostic register size_t len;
79560201Sbostic register char *u;
79660201Sbostic register char c;
79755847Sbostic
79860201Sbostic while (MORE() && isalpha(PEEK()))
79960201Sbostic NEXT();
80060201Sbostic len = p->next - sp;
80155847Sbostic for (cp = cclasses; cp->name != NULL; cp++)
80260201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
80355847Sbostic break;
80455847Sbostic if (cp->name == NULL) {
80555847Sbostic /* oops, didn't find it */
80655847Sbostic SETERROR(REG_ECTYPE);
80755847Sbostic return;
80855847Sbostic }
80955847Sbostic
81060201Sbostic u = cp->chars;
81155847Sbostic while ((c = *u++) != '\0')
81255847Sbostic CHadd(cs, c);
81360201Sbostic for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
81466362Sbostic MCadd(p, cs, u);
81555847Sbostic }
81655847Sbostic
81755847Sbostic /*
81855847Sbostic - p_b_eclass - parse an equivalence-class name and deal with it
81960201Sbostic == static void p_b_eclass(register struct parse *p, register cset *cs);
82055847Sbostic *
82155847Sbostic * This implementation is incomplete. xxx
82255847Sbostic */
82355847Sbostic static void
p_b_eclass(p,cs)82455847Sbostic p_b_eclass(p, cs)
82555847Sbostic register struct parse *p;
82655847Sbostic register cset *cs;
82755847Sbostic {
82860201Sbostic register char c;
82955847Sbostic
83055847Sbostic c = p_b_coll_elem(p, '=');
83155847Sbostic CHadd(cs, c);
83255847Sbostic }
83355847Sbostic
83455847Sbostic /*
83555847Sbostic - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
83660201Sbostic == static char p_b_symbol(register struct parse *p);
83755847Sbostic */
83860201Sbostic static char /* value of symbol */
p_b_symbol(p)83955847Sbostic p_b_symbol(p)
84055847Sbostic register struct parse *p;
84155847Sbostic {
84260201Sbostic register char value;
84355847Sbostic
84460201Sbostic REQUIRE(MORE(), REG_EBRACK);
84560201Sbostic if (!EATTWO('[', '.'))
84655847Sbostic return(GETNEXT());
84755847Sbostic
84855847Sbostic /* collating symbol */
84955847Sbostic value = p_b_coll_elem(p, '.');
85055847Sbostic REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
85155847Sbostic return(value);
85255847Sbostic }
85355847Sbostic
85455847Sbostic /*
85555847Sbostic - p_b_coll_elem - parse a collating-element name and look it up
85660201Sbostic == static char p_b_coll_elem(register struct parse *p, int endc);
85755847Sbostic */
85860201Sbostic static char /* value of collating element */
p_b_coll_elem(p,endc)85955847Sbostic p_b_coll_elem(p, endc)
86055847Sbostic register struct parse *p;
86160201Sbostic int endc; /* name ended by endc,']' */
86255847Sbostic {
86360201Sbostic register char *sp = p->next;
86455847Sbostic register struct cname *cp;
86555847Sbostic register int len;
86660201Sbostic register char c;
86755847Sbostic
86860201Sbostic while (MORE() && !SEETWO(endc, ']'))
86955847Sbostic NEXT();
87060201Sbostic if (!MORE()) {
87155847Sbostic SETERROR(REG_EBRACK);
87255847Sbostic return(0);
87355847Sbostic }
87455847Sbostic len = p->next - sp;
87555847Sbostic for (cp = cnames; cp->name != NULL; cp++)
87660201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
87755847Sbostic return(cp->code); /* known name */
87855847Sbostic if (len == 1)
87955847Sbostic return(*sp); /* single character */
88055847Sbostic SETERROR(REG_ECOLLATE); /* neither */
88155847Sbostic return(0);
88255847Sbostic }
88355847Sbostic
88455847Sbostic /*
88555847Sbostic - othercase - return the case counterpart of an alphabetic
88660201Sbostic == static char othercase(int ch);
88755847Sbostic */
88860201Sbostic static char /* if no counterpart, return ch */
othercase(ch)88955847Sbostic othercase(ch)
89060201Sbostic int ch;
89155847Sbostic {
89255847Sbostic assert(isalpha(ch));
89355847Sbostic if (isupper(ch))
89455847Sbostic return(tolower(ch));
89555847Sbostic else if (islower(ch))
89655847Sbostic return(toupper(ch));
89755847Sbostic else /* peculiar, but could happen */
89855847Sbostic return(ch);
89955847Sbostic }
90055847Sbostic
90155847Sbostic /*
90260201Sbostic - bothcases - emit a dualcase version of a two-case character
90360201Sbostic == static void bothcases(register struct parse *p, int ch);
90456355Sbostic *
90555847Sbostic * Boy, is this implementation ever a kludge...
90655847Sbostic */
90755847Sbostic static void
bothcases(p,ch)90855847Sbostic bothcases(p, ch)
90955847Sbostic register struct parse *p;
91060201Sbostic int ch;
91155847Sbostic {
91260201Sbostic register char *oldnext = p->next;
91360201Sbostic register char *oldend = p->end;
91460201Sbostic char bracket[3];
91555847Sbostic
91660201Sbostic assert(othercase(ch) != ch); /* p_bracket() would recurse */
91755847Sbostic p->next = bracket;
91860201Sbostic p->end = bracket+2;
91955847Sbostic bracket[0] = ch;
92055847Sbostic bracket[1] = ']';
92155847Sbostic bracket[2] = '\0';
92255847Sbostic p_bracket(p);
92355847Sbostic assert(p->next == bracket+2);
92455847Sbostic p->next = oldnext;
92560201Sbostic p->end = oldend;
92655847Sbostic }
92755847Sbostic
92855847Sbostic /*
92955847Sbostic - ordinary - emit an ordinary character
93060201Sbostic == static void ordinary(register struct parse *p, register int ch);
93155847Sbostic */
93255847Sbostic static void
ordinary(p,ch)93355847Sbostic ordinary(p, ch)
93455847Sbostic register struct parse *p;
93560201Sbostic register int ch;
93655847Sbostic {
93760201Sbostic register cat_t *cap = p->g->categories;
93855847Sbostic
93960201Sbostic if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
94055847Sbostic bothcases(p, ch);
94160201Sbostic else {
94260201Sbostic EMIT(OCHAR, (unsigned char)ch);
94360201Sbostic if (cap[ch] == 0)
94460201Sbostic cap[ch] = p->g->ncategories++;
94555847Sbostic }
94655847Sbostic }
94755847Sbostic
94855847Sbostic /*
94955847Sbostic - nonnewline - emit REG_NEWLINE version of OANY
95060201Sbostic == static void nonnewline(register struct parse *p);
95156355Sbostic *
95255847Sbostic * Boy, is this implementation ever a kludge...
95355847Sbostic */
95455847Sbostic static void
nonnewline(p)95555847Sbostic nonnewline(p)
95655847Sbostic register struct parse *p;
95755847Sbostic {
95860201Sbostic register char *oldnext = p->next;
95960201Sbostic register char *oldend = p->end;
96060201Sbostic char bracket[4];
96155847Sbostic
96255847Sbostic p->next = bracket;
96360201Sbostic p->end = bracket+3;
96455847Sbostic bracket[0] = '^';
96555847Sbostic bracket[1] = '\n';
96655847Sbostic bracket[2] = ']';
96755847Sbostic bracket[3] = '\0';
96855847Sbostic p_bracket(p);
96955847Sbostic assert(p->next == bracket+3);
97055847Sbostic p->next = oldnext;
97160201Sbostic p->end = oldend;
97255847Sbostic }
97355847Sbostic
97455847Sbostic /*
97555847Sbostic - repeat - generate code for a bounded repetition, recursively if needed
97660201Sbostic == static void repeat(register struct parse *p, sopno start, int from, int to);
97755847Sbostic */
97855847Sbostic static void
repeat(p,start,from,to)97955847Sbostic repeat(p, start, from, to)
98055847Sbostic register struct parse *p;
98155847Sbostic sopno start; /* operand from here to end of strip */
98255847Sbostic int from; /* repeated from this number */
98355847Sbostic int to; /* to this number of times (maybe INFINITY) */
98455847Sbostic {
98555847Sbostic register sopno finish = HERE();
98655847Sbostic # define N 2
98755847Sbostic # define INF 3
98855847Sbostic # define REP(f, t) ((f)*8 + (t))
98955847Sbostic # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
99055847Sbostic register sopno copy;
99155847Sbostic
99255847Sbostic if (p->error != 0) /* head off possible runaway recursion */
99355847Sbostic return;
99455847Sbostic
99555847Sbostic assert(from <= to);
99655847Sbostic
99755847Sbostic switch (REP(MAP(from), MAP(to))) {
99855847Sbostic case REP(0, 0): /* must be user doing this */
99955847Sbostic DROP(finish-start); /* drop the operand */
100055847Sbostic break;
100155847Sbostic case REP(0, 1): /* as x{1,1}? */
100255847Sbostic case REP(0, N): /* as x{1,n}? */
100355847Sbostic case REP(0, INF): /* as x{1,}? */
100466381Sbostic /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
100566381Sbostic INSERT(OCH_, start); /* offset is wrong... */
100655847Sbostic repeat(p, start+1, 1, to);
100766381Sbostic ASTERN(OOR1, start);
100860201Sbostic AHEAD(start); /* ... fix it */
100966381Sbostic EMIT(OOR2, 0);
101066381Sbostic AHEAD(THERE());
101166381Sbostic ASTERN(O_CH, THERETHERE());
101255847Sbostic break;
101355847Sbostic case REP(1, 1): /* trivial case */
101455847Sbostic /* done */
101555847Sbostic break;
101655847Sbostic case REP(1, N): /* as x?x{1,n-1} */
101766381Sbostic /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
101866381Sbostic INSERT(OCH_, start);
101966381Sbostic ASTERN(OOR1, start);
102066381Sbostic AHEAD(start);
102166381Sbostic EMIT(OOR2, 0); /* offset very wrong... */
102266381Sbostic AHEAD(THERE()); /* ...so fix it */
102366381Sbostic ASTERN(O_CH, THERETHERE());
102455847Sbostic copy = dupl(p, start+1, finish+1);
102566381Sbostic assert(copy == finish+4);
102655847Sbostic repeat(p, copy, 1, to-1);
102755847Sbostic break;
102855847Sbostic case REP(1, INF): /* as x+ */
102955847Sbostic INSERT(OPLUS_, start);
103060201Sbostic ASTERN(O_PLUS, start);
103155847Sbostic break;
103255847Sbostic case REP(N, N): /* as xx{m-1,n-1} */
103355847Sbostic copy = dupl(p, start, finish);
103455847Sbostic repeat(p, copy, from-1, to-1);
103555847Sbostic break;
103655847Sbostic case REP(N, INF): /* as xx{n-1,INF} */
103755847Sbostic copy = dupl(p, start, finish);
103855847Sbostic repeat(p, copy, from-1, to);
103955847Sbostic break;
104055847Sbostic default: /* "can't happen" */
104155847Sbostic SETERROR(REG_ASSERT); /* just in case */
104255847Sbostic break;
104355847Sbostic }
104455847Sbostic }
104555847Sbostic
104655847Sbostic /*
104755847Sbostic - seterr - set an error condition
104860201Sbostic == static int seterr(register struct parse *p, int e);
104955847Sbostic */
105055847Sbostic static int /* useless but makes type checking happy */
seterr(p,e)105155847Sbostic seterr(p, e)
105255847Sbostic register struct parse *p;
105355847Sbostic int e;
105455847Sbostic {
105555847Sbostic if (p->error == 0) /* keep earliest error condition */
105655847Sbostic p->error = e;
105755847Sbostic p->next = nuls; /* try to bring things to a halt */
105860201Sbostic p->end = nuls;
105955847Sbostic return(0); /* make the return value well-defined */
106055847Sbostic }
106155847Sbostic
106255847Sbostic /*
106355847Sbostic - allocset - allocate a set of characters for []
106460201Sbostic == static cset *allocset(register struct parse *p);
106555847Sbostic */
106655847Sbostic static cset *
allocset(p)106755847Sbostic allocset(p)
106855847Sbostic register struct parse *p;
106955847Sbostic {
107055847Sbostic register int no = p->g->ncsets++;
107155847Sbostic register size_t nc;
107255847Sbostic register size_t nbytes;
107355847Sbostic register cset *cs;
107455847Sbostic register size_t css = (size_t)p->g->csetsize;
107566362Sbostic register int i;
107655847Sbostic
107755847Sbostic if (no >= p->ncsalloc) { /* need another column of space */
107855847Sbostic p->ncsalloc += CHAR_BIT;
107955847Sbostic nc = p->ncsalloc;
108055847Sbostic assert(nc % CHAR_BIT == 0);
108155847Sbostic nbytes = nc / CHAR_BIT * css;
108255847Sbostic if (p->g->sets == NULL)
108355847Sbostic p->g->sets = (cset *)malloc(nc * sizeof(cset));
108455847Sbostic else
108555847Sbostic p->g->sets = (cset *)realloc((char *)p->g->sets,
108655847Sbostic nc * sizeof(cset));
108755847Sbostic if (p->g->setbits == NULL)
108866362Sbostic p->g->setbits = (uch *)malloc(nbytes);
108966362Sbostic else {
109066362Sbostic p->g->setbits = (uch *)realloc((char *)p->g->setbits,
109155847Sbostic nbytes);
109266362Sbostic /* xxx this isn't right if setbits is now NULL */
109366362Sbostic for (i = 0; i < no; i++)
109466362Sbostic p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
109566362Sbostic }
109655847Sbostic if (p->g->sets != NULL && p->g->setbits != NULL)
109755847Sbostic (void) memset((char *)p->g->setbits + (nbytes - css),
109855847Sbostic 0, css);
109955847Sbostic else {
110055847Sbostic no = 0;
110155847Sbostic SETERROR(REG_ESPACE);
110255847Sbostic /* caller's responsibility not to do set ops */
110355847Sbostic }
110455847Sbostic }
110555847Sbostic
110655847Sbostic assert(p->g->sets != NULL); /* xxx */
110755847Sbostic cs = &p->g->sets[no];
110855847Sbostic cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
110955847Sbostic cs->mask = 1 << ((no) % CHAR_BIT);
111055847Sbostic cs->hash = 0;
111155847Sbostic cs->smultis = 0;
111255847Sbostic cs->multis = NULL;
111355847Sbostic
111455847Sbostic return(cs);
111555847Sbostic }
111655847Sbostic
111755847Sbostic /*
111860201Sbostic - freeset - free a now-unused set
111960201Sbostic == static void freeset(register struct parse *p, register cset *cs);
112060201Sbostic */
112160201Sbostic static void
freeset(p,cs)112260201Sbostic freeset(p, cs)
112360201Sbostic register struct parse *p;
112460201Sbostic register cset *cs;
112560201Sbostic {
112660201Sbostic register int i;
112760201Sbostic register cset *top = &p->g->sets[p->g->ncsets];
112860201Sbostic register size_t css = (size_t)p->g->csetsize;
112960201Sbostic
113060201Sbostic for (i = 0; i < css; i++)
113160201Sbostic CHsub(cs, i);
113260201Sbostic if (cs == top-1) /* recover only the easy case */
113360201Sbostic p->g->ncsets--;
113460201Sbostic }
113560201Sbostic
113660201Sbostic /*
113755847Sbostic - freezeset - final processing on a set of characters
113860201Sbostic == static int freezeset(register struct parse *p, register cset *cs);
113955847Sbostic *
114055847Sbostic * The main task here is merging identical sets. This is usually a waste
114155847Sbostic * of time (although the hash code minimizes the overhead), but can win
114255847Sbostic * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
114355847Sbostic * is done using addition rather than xor -- all ASCII [aA] sets xor to
114455847Sbostic * the same value!
114555847Sbostic */
114655847Sbostic static int /* set number */
freezeset(p,cs)114755847Sbostic freezeset(p, cs)
114855847Sbostic register struct parse *p;
114955847Sbostic register cset *cs;
115055847Sbostic {
115166362Sbostic register uch h = cs->hash;
115255847Sbostic register int i;
115355847Sbostic register cset *top = &p->g->sets[p->g->ncsets];
115455847Sbostic register cset *cs2;
115555847Sbostic register size_t css = (size_t)p->g->csetsize;
115655847Sbostic
115755847Sbostic /* look for an earlier one which is the same */
115855847Sbostic for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
115955847Sbostic if (cs2->hash == h && cs2 != cs) {
116055847Sbostic /* maybe */
116155847Sbostic for (i = 0; i < css; i++)
116255847Sbostic if (!!CHIN(cs2, i) != !!CHIN(cs, i))
116355847Sbostic break; /* no */
116455847Sbostic if (i == css)
116555847Sbostic break; /* yes */
116655847Sbostic }
116755847Sbostic
116855847Sbostic if (cs2 < top) { /* found one */
116960201Sbostic freeset(p, cs);
117055847Sbostic cs = cs2;
117155847Sbostic }
117255847Sbostic
117355847Sbostic return((int)(cs - p->g->sets));
117455847Sbostic }
117555847Sbostic
117655847Sbostic /*
117760201Sbostic - firstch - return first character in a set (which must have at least one)
117860201Sbostic == static int firstch(register struct parse *p, register cset *cs);
117960201Sbostic */
118060201Sbostic static int /* character; there is no "none" value */
firstch(p,cs)118160201Sbostic firstch(p, cs)
118260201Sbostic register struct parse *p;
118360201Sbostic register cset *cs;
118460201Sbostic {
118560201Sbostic register int i;
118660201Sbostic register size_t css = (size_t)p->g->csetsize;
118760201Sbostic
118860201Sbostic for (i = 0; i < css; i++)
118960201Sbostic if (CHIN(cs, i))
119060201Sbostic return((char)i);
119160201Sbostic assert(never);
119260201Sbostic return(0); /* arbitrary */
119360201Sbostic }
119460201Sbostic
119560201Sbostic /*
119660201Sbostic - nch - number of characters in a set
119760201Sbostic == static int nch(register struct parse *p, register cset *cs);
119860201Sbostic */
119960201Sbostic static int
nch(p,cs)120060201Sbostic nch(p, cs)
120160201Sbostic register struct parse *p;
120260201Sbostic register cset *cs;
120360201Sbostic {
120460201Sbostic register int i;
120560201Sbostic register size_t css = (size_t)p->g->csetsize;
120660201Sbostic register int n = 0;
120760201Sbostic
120860201Sbostic for (i = 0; i < css; i++)
120960201Sbostic if (CHIN(cs, i))
121060201Sbostic n++;
121160201Sbostic return(n);
121260201Sbostic }
121360201Sbostic
121460201Sbostic /*
121555847Sbostic - mcadd - add a collating element to a cset
121660201Sbostic == static void mcadd(register struct parse *p, register cset *cs, \
121760201Sbostic == register char *cp);
121855847Sbostic */
121955847Sbostic static void
mcadd(p,cs,cp)122055847Sbostic mcadd(p, cs, cp)
122155847Sbostic register struct parse *p;
122255847Sbostic register cset *cs;
122360201Sbostic register char *cp;
122455847Sbostic {
122555847Sbostic register size_t oldend = cs->smultis;
122655847Sbostic
122760201Sbostic cs->smultis += strlen(cp) + 1;
122855847Sbostic if (cs->multis == NULL)
122960201Sbostic cs->multis = malloc(cs->smultis);
123055847Sbostic else
123160201Sbostic cs->multis = realloc(cs->multis, cs->smultis);
123255847Sbostic if (cs->multis == NULL) {
123355847Sbostic SETERROR(REG_ESPACE);
123455847Sbostic return;
123555847Sbostic }
123655847Sbostic
123760201Sbostic (void) strcpy(cs->multis + oldend - 1, cp);
123855847Sbostic cs->multis[cs->smultis - 1] = '\0';
123955847Sbostic }
124055847Sbostic
124155847Sbostic /*
124255847Sbostic - mcsub - subtract a collating element from a cset
124360201Sbostic == static void mcsub(register cset *cs, register char *cp);
124455847Sbostic */
124555847Sbostic static void
mcsub(cs,cp)124660201Sbostic mcsub(cs, cp)
124755847Sbostic register cset *cs;
124860201Sbostic register char *cp;
124955847Sbostic {
125060201Sbostic register char *fp = mcfind(cs, cp);
125160201Sbostic register size_t len = strlen(fp);
125255847Sbostic
125360201Sbostic assert(fp != NULL);
125460201Sbostic (void) memmove(fp, fp + len + 1,
125555847Sbostic cs->smultis - (fp + len + 1 - cs->multis));
125655847Sbostic cs->smultis -= len;
125755847Sbostic
125855847Sbostic if (cs->smultis == 0) {
125960201Sbostic free(cs->multis);
126055847Sbostic cs->multis = NULL;
126155847Sbostic return;
126255847Sbostic }
126355847Sbostic
126460201Sbostic cs->multis = realloc(cs->multis, cs->smultis);
126555847Sbostic assert(cs->multis != NULL);
126655847Sbostic }
126755847Sbostic
126855847Sbostic /*
126955847Sbostic - mcin - is a collating element in a cset?
127060201Sbostic == static int mcin(register cset *cs, register char *cp);
127155847Sbostic */
127255847Sbostic static int
mcin(cs,cp)127360201Sbostic mcin(cs, cp)
127455847Sbostic register cset *cs;
127560201Sbostic register char *cp;
127655847Sbostic {
127755847Sbostic return(mcfind(cs, cp) != NULL);
127855847Sbostic }
127955847Sbostic
128055847Sbostic /*
128155847Sbostic - mcfind - find a collating element in a cset
128260201Sbostic == static char *mcfind(register cset *cs, register char *cp);
128355847Sbostic */
128460201Sbostic static char *
mcfind(cs,cp)128555847Sbostic mcfind(cs, cp)
128655847Sbostic register cset *cs;
128760201Sbostic register char *cp;
128855847Sbostic {
128960201Sbostic register char *p;
129055847Sbostic
129155847Sbostic if (cs->multis == NULL)
129255847Sbostic return(NULL);
129360201Sbostic for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
129460201Sbostic if (strcmp(cp, p) == 0)
129555847Sbostic return(p);
129655847Sbostic return(NULL);
129755847Sbostic }
129855847Sbostic
129955847Sbostic /*
130055847Sbostic - mcinvert - invert the list of collating elements in a cset
130166362Sbostic == static void mcinvert(register struct parse *p, register cset *cs);
130255847Sbostic *
130355847Sbostic * This would have to know the set of possibilities. Implementation
130455847Sbostic * is deferred.
130555847Sbostic */
130655847Sbostic static void
mcinvert(p,cs)130766362Sbostic mcinvert(p, cs)
130866362Sbostic register struct parse *p;
130955847Sbostic register cset *cs;
131055847Sbostic {
131155847Sbostic assert(cs->multis == NULL); /* xxx */
131255847Sbostic }
131355847Sbostic
131455847Sbostic /*
131560201Sbostic - mccase - add case counterparts of the list of collating elements in a cset
131666362Sbostic == static void mccase(register struct parse *p, register cset *cs);
131760201Sbostic *
131860201Sbostic * This would have to know the set of possibilities. Implementation
131960201Sbostic * is deferred.
132060201Sbostic */
132160201Sbostic static void
mccase(p,cs)132266362Sbostic mccase(p, cs)
132366362Sbostic register struct parse *p;
132460201Sbostic register cset *cs;
132560201Sbostic {
132660201Sbostic assert(cs->multis == NULL); /* xxx */
132760201Sbostic }
132860201Sbostic
132960201Sbostic /*
133055847Sbostic - isinsets - is this character in any sets?
133160201Sbostic == static int isinsets(register struct re_guts *g, int c);
133255847Sbostic */
133355847Sbostic static int /* predicate */
isinsets(g,c)133455847Sbostic isinsets(g, c)
133555847Sbostic register struct re_guts *g;
133660201Sbostic int c;
133755847Sbostic {
133866362Sbostic register uch *col;
133955847Sbostic register int i;
134055847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
134160201Sbostic register unsigned uc = (unsigned char)c;
134255847Sbostic
134355847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
134460201Sbostic if (col[uc] != 0)
134555847Sbostic return(1);
134655847Sbostic return(0);
134755847Sbostic }
134855847Sbostic
134955847Sbostic /*
135055847Sbostic - samesets - are these two characters in exactly the same sets?
135160201Sbostic == static int samesets(register struct re_guts *g, int c1, int c2);
135255847Sbostic */
135355847Sbostic static int /* predicate */
samesets(g,c1,c2)135455847Sbostic samesets(g, c1, c2)
135555847Sbostic register struct re_guts *g;
135660201Sbostic int c1;
135760201Sbostic int c2;
135855847Sbostic {
135966362Sbostic register uch *col;
136055847Sbostic register int i;
136155847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
136260201Sbostic register unsigned uc1 = (unsigned char)c1;
136360201Sbostic register unsigned uc2 = (unsigned char)c2;
136455847Sbostic
136555847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
136660201Sbostic if (col[uc1] != col[uc2])
136755847Sbostic return(0);
136855847Sbostic return(1);
136955847Sbostic }
137055847Sbostic
137155847Sbostic /*
137255847Sbostic - categorize - sort out character categories
137360201Sbostic == static void categorize(struct parse *p, register struct re_guts *g);
137455847Sbostic */
137555847Sbostic static void
categorize(p,g)137655847Sbostic categorize(p, g)
137755847Sbostic struct parse *p;
137855847Sbostic register struct re_guts *g;
137955847Sbostic {
138060201Sbostic register cat_t *cats = g->categories;
138160201Sbostic register int c;
138260201Sbostic register int c2;
138360201Sbostic register cat_t cat;
138455847Sbostic
138555847Sbostic /* avoid making error situations worse */
138655847Sbostic if (p->error != 0)
138755847Sbostic return;
138855847Sbostic
138960201Sbostic for (c = CHAR_MIN; c <= CHAR_MAX; c++)
139055847Sbostic if (cats[c] == 0 && isinsets(g, c)) {
139155847Sbostic cat = g->ncategories++;
139255847Sbostic cats[c] = cat;
139360201Sbostic for (c2 = c+1; c2 <= CHAR_MAX; c2++)
139455847Sbostic if (cats[c2] == 0 && samesets(g, c, c2))
139555847Sbostic cats[c2] = cat;
139655847Sbostic }
139755847Sbostic }
139855847Sbostic
139955847Sbostic /*
140055847Sbostic - dupl - emit a duplicate of a bunch of sops
140160201Sbostic == static sopno dupl(register struct parse *p, sopno start, sopno finish);
140255847Sbostic */
140355847Sbostic static sopno /* start of duplicate */
dupl(p,start,finish)140455847Sbostic dupl(p, start, finish)
140555847Sbostic register struct parse *p;
140655847Sbostic sopno start; /* from here */
140755847Sbostic sopno finish; /* to this less one */
140855847Sbostic {
140955847Sbostic register sopno ret = HERE();
141055847Sbostic register sopno len = finish - start;
141155847Sbostic
141255847Sbostic assert(finish >= start);
141355847Sbostic if (len == 0)
141455847Sbostic return(ret);
141555847Sbostic enlarge(p, p->ssize + len); /* this many unexpected additions */
141655847Sbostic assert(p->ssize >= p->slen + len);
141755847Sbostic (void) memcpy((char *)(p->strip + p->slen),
141855847Sbostic (char *)(p->strip + start), (size_t)len*sizeof(sop));
141955847Sbostic p->slen += len;
142055847Sbostic return(ret);
142155847Sbostic }
142255847Sbostic
142355847Sbostic /*
142455847Sbostic - doemit - emit a strip operator
142560201Sbostic == static void doemit(register struct parse *p, sop op, size_t opnd);
142655847Sbostic *
142755847Sbostic * It might seem better to implement this as a macro with a function as
142855847Sbostic * hard-case backup, but it's just too big and messy unless there are
142955847Sbostic * some changes to the data structures. Maybe later.
143055847Sbostic */
143155847Sbostic static void
doemit(p,op,opnd)143255847Sbostic doemit(p, op, opnd)
143355847Sbostic register struct parse *p;
143455847Sbostic sop op;
143555847Sbostic size_t opnd;
143655847Sbostic {
143755847Sbostic /* avoid making error situations worse */
143855847Sbostic if (p->error != 0)
143955847Sbostic return;
144055847Sbostic
144155847Sbostic /* deal with oversize operands ("can't happen", more or less) */
144255847Sbostic assert(opnd < 1<<OPSHIFT);
144355847Sbostic
144455847Sbostic /* deal with undersized strip */
144555847Sbostic if (p->slen >= p->ssize)
144655847Sbostic enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
144755847Sbostic assert(p->slen < p->ssize);
144855847Sbostic
144955847Sbostic /* finally, it's all reduced to the easy case */
145055847Sbostic p->strip[p->slen++] = SOP(op, opnd);
145155847Sbostic }
145255847Sbostic
145355847Sbostic /*
145455847Sbostic - doinsert - insert a sop into the strip
145560201Sbostic == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
145655847Sbostic */
145755847Sbostic static void
doinsert(p,op,opnd,pos)145855847Sbostic doinsert(p, op, opnd, pos)
145955847Sbostic register struct parse *p;
146055847Sbostic sop op;
146155847Sbostic size_t opnd;
146255847Sbostic sopno pos;
146355847Sbostic {
146455847Sbostic register sopno sn;
146555847Sbostic register sop s;
146655847Sbostic register int i;
146755847Sbostic
146855847Sbostic /* avoid making error situations worse */
146955847Sbostic if (p->error != 0)
147055847Sbostic return;
147155847Sbostic
147255847Sbostic sn = HERE();
147355847Sbostic EMIT(op, opnd); /* do checks, ensure space */
147455847Sbostic assert(HERE() == sn+1);
147555847Sbostic s = p->strip[sn];
147655847Sbostic
147755847Sbostic /* adjust paren pointers */
147855847Sbostic assert(pos > 0);
147955847Sbostic for (i = 1; i < NPAREN; i++) {
148055847Sbostic if (p->pbegin[i] >= pos) {
148155847Sbostic p->pbegin[i]++;
148255847Sbostic }
148355847Sbostic if (p->pend[i] >= pos) {
148455847Sbostic p->pend[i]++;
148555847Sbostic }
148655847Sbostic }
148755847Sbostic
148855847Sbostic memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
148955847Sbostic (HERE()-pos-1)*sizeof(sop));
149055847Sbostic p->strip[pos] = s;
149155847Sbostic }
149255847Sbostic
149355847Sbostic /*
149455847Sbostic - dofwd - complete a forward reference
149560201Sbostic == static void dofwd(register struct parse *p, sopno pos, sop value);
149655847Sbostic */
149755847Sbostic static void
dofwd(p,pos,value)149855847Sbostic dofwd(p, pos, value)
149955847Sbostic register struct parse *p;
150055847Sbostic register sopno pos;
150155847Sbostic sop value;
150255847Sbostic {
150355847Sbostic /* avoid making error situations worse */
150455847Sbostic if (p->error != 0)
150555847Sbostic return;
150655847Sbostic
150755847Sbostic assert(value < 1<<OPSHIFT);
150855847Sbostic p->strip[pos] = OP(p->strip[pos]) | value;
150955847Sbostic }
151055847Sbostic
151155847Sbostic /*
151255847Sbostic - enlarge - enlarge the strip
151360201Sbostic == static void enlarge(register struct parse *p, sopno size);
151455847Sbostic */
151555847Sbostic static void
enlarge(p,size)151655847Sbostic enlarge(p, size)
151755847Sbostic register struct parse *p;
151855847Sbostic register sopno size;
151955847Sbostic {
152055847Sbostic register sop *sp;
152155847Sbostic
152255847Sbostic if (p->ssize >= size)
152355847Sbostic return;
152455847Sbostic
152555847Sbostic sp = (sop *)realloc(p->strip, size*sizeof(sop));
152655847Sbostic if (sp == NULL) {
152755847Sbostic SETERROR(REG_ESPACE);
152855847Sbostic return;
152955847Sbostic }
153055847Sbostic p->strip = sp;
153155847Sbostic p->ssize = size;
153255847Sbostic }
153355847Sbostic
153455847Sbostic /*
153555847Sbostic - stripsnug - compact the strip
153660201Sbostic == static void stripsnug(register struct parse *p, register struct re_guts *g);
153755847Sbostic */
153855847Sbostic static void
stripsnug(p,g)153955847Sbostic stripsnug(p, g)
154055847Sbostic register struct parse *p;
154155847Sbostic register struct re_guts *g;
154255847Sbostic {
154355847Sbostic g->nstates = p->slen;
154466362Sbostic g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
154555847Sbostic if (g->strip == NULL) {
154655847Sbostic SETERROR(REG_ESPACE);
154755847Sbostic g->strip = p->strip;
154855847Sbostic }
154955847Sbostic }
155055847Sbostic
155155847Sbostic /*
155255847Sbostic - findmust - fill in must and mlen with longest mandatory literal string
155360201Sbostic == static void findmust(register struct parse *p, register struct re_guts *g);
155455847Sbostic *
155555847Sbostic * This algorithm could do fancy things like analyzing the operands of |
155655847Sbostic * for common subsequences. Someday. This code is simple and finds most
155755847Sbostic * of the interesting cases.
155855847Sbostic *
155955847Sbostic * Note that must and mlen got initialized during setup.
156055847Sbostic */
156156355Sbostic static void
findmust(p,g)156255847Sbostic findmust(p, g)
156355847Sbostic struct parse *p;
156455847Sbostic register struct re_guts *g;
156555847Sbostic {
156655847Sbostic register sop *scan;
156755847Sbostic sop *start;
156855847Sbostic register sop *newstart;
156955847Sbostic register sopno newlen;
157055847Sbostic register sop s;
157155847Sbostic register char *cp;
157255847Sbostic register sopno i;
157355847Sbostic
157455847Sbostic /* avoid making error situations worse */
157555847Sbostic if (p->error != 0)
157655847Sbostic return;
157755847Sbostic
157855847Sbostic /* find the longest OCHAR sequence in strip */
157955847Sbostic newlen = 0;
158055847Sbostic scan = g->strip + 1;
158155847Sbostic do {
158255847Sbostic s = *scan++;
158355847Sbostic switch (OP(s)) {
158455847Sbostic case OCHAR: /* sequence member */
158555847Sbostic if (newlen == 0) /* new sequence */
158655847Sbostic newstart = scan - 1;
158755847Sbostic newlen++;
158855847Sbostic break;
158955847Sbostic case OPLUS_: /* things that don't break one */
159055847Sbostic case OLPAREN:
159155847Sbostic case ORPAREN:
159255847Sbostic break;
159355847Sbostic case OQUEST_: /* things that must be skipped */
159455847Sbostic case OCH_:
159555847Sbostic scan--;
159655847Sbostic do {
159755847Sbostic scan += OPND(s);
159855847Sbostic s = *scan;
159955847Sbostic /* assert() interferes w debug printouts */
160055847Sbostic if (OP(s) != O_QUEST && OP(s) != O_CH &&
160155847Sbostic OP(s) != OOR2) {
160255847Sbostic g->iflags |= BAD;
160355847Sbostic return;
160455847Sbostic }
160555847Sbostic } while (OP(s) != O_QUEST && OP(s) != O_CH);
160655847Sbostic /* fallthrough */
160755847Sbostic default: /* things that break a sequence */
160855847Sbostic if (newlen > g->mlen) { /* ends one */
160955847Sbostic start = newstart;
161055847Sbostic g->mlen = newlen;
161155847Sbostic }
161255847Sbostic newlen = 0;
161355847Sbostic break;
161455847Sbostic }
161555847Sbostic } while (OP(s) != OEND);
161655847Sbostic
161755847Sbostic if (g->mlen == 0) /* there isn't one */
161855847Sbostic return;
161955847Sbostic
162055847Sbostic /* turn it into a character string */
162155847Sbostic g->must = malloc((size_t)g->mlen + 1);
162255847Sbostic if (g->must == NULL) { /* argh; just forget it */
162355847Sbostic g->mlen = 0;
162455847Sbostic return;
162555847Sbostic }
162655847Sbostic cp = g->must;
162755847Sbostic scan = start;
162855847Sbostic for (i = g->mlen; i > 0; i--) {
162955847Sbostic while (OP(s = *scan++) != OCHAR)
163055847Sbostic continue;
163166362Sbostic assert(cp < g->must + g->mlen);
163260201Sbostic *cp++ = (char)OPND(s);
163355847Sbostic }
163466362Sbostic assert(cp == g->must + g->mlen);
163555847Sbostic *cp++ = '\0'; /* just on general principles */
163655847Sbostic }
163755847Sbostic
163855847Sbostic /*
163955847Sbostic - pluscount - count + nesting
164060201Sbostic == static sopno pluscount(register struct parse *p, register struct re_guts *g);
164155847Sbostic */
164256355Sbostic static sopno /* nesting depth */
pluscount(p,g)164355847Sbostic pluscount(p, g)
164455847Sbostic struct parse *p;
164555847Sbostic register struct re_guts *g;
164655847Sbostic {
164755847Sbostic register sop *scan;
164855847Sbostic register sop s;
164955847Sbostic register sopno plusnest = 0;
165055847Sbostic register sopno maxnest = 0;
165155847Sbostic
165255847Sbostic if (p->error != 0)
165355847Sbostic return(0); /* there may not be an OEND */
165455847Sbostic
165555847Sbostic scan = g->strip + 1;
165655847Sbostic do {
165755847Sbostic s = *scan++;
165855847Sbostic switch (OP(s)) {
165955847Sbostic case OPLUS_:
166055847Sbostic plusnest++;
166155847Sbostic break;
166255847Sbostic case O_PLUS:
166355847Sbostic if (plusnest > maxnest)
166455847Sbostic maxnest = plusnest;
166555847Sbostic plusnest--;
166655847Sbostic break;
166755847Sbostic }
166855847Sbostic } while (OP(s) != OEND);
166955847Sbostic if (plusnest != 0)
167055847Sbostic g->iflags |= BAD;
167155847Sbostic return(maxnest);
167255847Sbostic }
1673