155847Sbostic /*- 2*66362Sbostic * Copyright (c) 1992, 1993, 1994 Henry Spencer. 3*66362Sbostic * Copyright (c) 1992, 1993, 1994 461162Sbostic * The Regents of the University of California. All rights reserved. 555847Sbostic * 655847Sbostic * This code is derived from software contributed to Berkeley by 755847Sbostic * Henry Spencer of the University of Toronto. 855847Sbostic * 955847Sbostic * %sccs.include.redist.c% 1055847Sbostic * 11*66362Sbostic * @(#)regcomp.c 8.2 (Berkeley) 03/16/94 1255847Sbostic */ 1355847Sbostic 1455847Sbostic #if defined(LIBC_SCCS) && !defined(lint) 15*66362Sbostic static char sccsid[] = "@(#)regcomp.c 8.2 (Berkeley) 03/16/94"; 1655847Sbostic #endif /* LIBC_SCCS and not lint */ 1755847Sbostic 1855847Sbostic #include <sys/types.h> 1955847Sbostic #include <stdio.h> 2055847Sbostic #include <string.h> 2155847Sbostic #include <ctype.h> 2255847Sbostic #include <limits.h> 2355847Sbostic #include <stdlib.h> 2455847Sbostic #include <regex.h> 2555847Sbostic 2655847Sbostic #include "utils.h" 2755847Sbostic #include "regex2.h" 2855847Sbostic 2955847Sbostic #include "cclass.h" 3055847Sbostic #include "cname.h" 3155847Sbostic 3255847Sbostic /* 3355847Sbostic * parse structure, passed up and down to avoid global variables and 3455847Sbostic * other clumsinesses 3555847Sbostic */ 3655847Sbostic struct parse { 3760201Sbostic char *next; /* next character in RE */ 3860201Sbostic char *end; /* end of string (-> NUL normally) */ 3955847Sbostic int error; /* has an error been seen? */ 4055847Sbostic sop *strip; /* malloced strip */ 4155847Sbostic sopno ssize; /* malloced strip size (allocated) */ 4255847Sbostic sopno slen; /* malloced strip length (used) */ 4355847Sbostic int ncsalloc; /* number of csets allocated */ 4455847Sbostic struct re_guts *g; 4555847Sbostic # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 4655847Sbostic sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 4755847Sbostic sopno pend[NPAREN]; /* -> ) ([0] unused) */ 4855847Sbostic }; 4955847Sbostic 50*66362Sbostic /* ========= begin header generated by ./mkh ========= */ 51*66362Sbostic #ifdef __cplusplus 52*66362Sbostic extern "C" { 53*66362Sbostic #endif 5456355Sbostic 55*66362Sbostic /* === regcomp.c === */ 56*66362Sbostic static void p_ere(register struct parse *p, int stop); 57*66362Sbostic static void p_ere_exp(register struct parse *p); 58*66362Sbostic static void p_str(register struct parse *p); 59*66362Sbostic static void p_bre(register struct parse *p, register int end1, register int end2); 60*66362Sbostic static int p_simp_re(register struct parse *p, int starordinary); 61*66362Sbostic static int p_count(register struct parse *p); 62*66362Sbostic static void p_bracket(register struct parse *p); 63*66362Sbostic static void p_b_term(register struct parse *p, register cset *cs); 64*66362Sbostic static void p_b_cclass(register struct parse *p, register cset *cs); 65*66362Sbostic static void p_b_eclass(register struct parse *p, register cset *cs); 66*66362Sbostic static char p_b_symbol(register struct parse *p); 67*66362Sbostic static char p_b_coll_elem(register struct parse *p, int endc); 68*66362Sbostic static char othercase(int ch); 69*66362Sbostic static void bothcases(register struct parse *p, int ch); 70*66362Sbostic static void ordinary(register struct parse *p, register int ch); 71*66362Sbostic static void nonnewline(register struct parse *p); 72*66362Sbostic static void repeat(register struct parse *p, sopno start, int from, int to); 73*66362Sbostic static int seterr(register struct parse *p, int e); 74*66362Sbostic static cset *allocset(register struct parse *p); 75*66362Sbostic static void freeset(register struct parse *p, register cset *cs); 76*66362Sbostic static int freezeset(register struct parse *p, register cset *cs); 77*66362Sbostic static int firstch(register struct parse *p, register cset *cs); 78*66362Sbostic static int nch(register struct parse *p, register cset *cs); 79*66362Sbostic static void mcadd(register struct parse *p, register cset *cs, register char *cp); 80*66362Sbostic static void mcsub(register cset *cs, register char *cp); 81*66362Sbostic static int mcin(register cset *cs, register char *cp); 82*66362Sbostic static char *mcfind(register cset *cs, register char *cp); 83*66362Sbostic static void mcinvert(register struct parse *p, register cset *cs); 84*66362Sbostic static void mccase(register struct parse *p, register cset *cs); 85*66362Sbostic static int isinsets(register struct re_guts *g, int c); 86*66362Sbostic static int samesets(register struct re_guts *g, int c1, int c2); 87*66362Sbostic static void categorize(struct parse *p, register struct re_guts *g); 88*66362Sbostic static sopno dupl(register struct parse *p, sopno start, sopno finish); 89*66362Sbostic static void doemit(register struct parse *p, sop op, size_t opnd); 90*66362Sbostic static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 91*66362Sbostic static void dofwd(register struct parse *p, sopno pos, sop value); 92*66362Sbostic static void enlarge(register struct parse *p, sopno size); 93*66362Sbostic static void stripsnug(register struct parse *p, register struct re_guts *g); 94*66362Sbostic static void findmust(register struct parse *p, register struct re_guts *g); 95*66362Sbostic static sopno pluscount(register struct parse *p, register struct re_guts *g); 9660201Sbostic 97*66362Sbostic #ifdef __cplusplus 98*66362Sbostic } 99*66362Sbostic #endif 100*66362Sbostic /* ========= end header generated by ./mkh ========= */ 101*66362Sbostic 10260201Sbostic static char nuls[10]; /* place to point scanner in event of error */ 10360201Sbostic 10455847Sbostic /* 10555847Sbostic * macros for use with parse structure 10655847Sbostic * BEWARE: these know that the parse structure is named `p' !!! 10755847Sbostic */ 10860201Sbostic #define PEEK() (*p->next) 10960201Sbostic #define PEEK2() (*(p->next+1)) 11060201Sbostic #define MORE() (p->next < p->end) 11160201Sbostic #define MORE2() (p->next+1 < p->end) 11260201Sbostic #define SEE(c) (MORE() && PEEK() == (c)) 11360201Sbostic #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 11455847Sbostic #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 11555847Sbostic #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 11655847Sbostic #define NEXT() (p->next++) 11755847Sbostic #define NEXT2() (p->next += 2) 11855847Sbostic #define NEXTn(n) (p->next += (n)) 11960201Sbostic #define GETNEXT() (*p->next++) 12055847Sbostic #define SETERROR(e) seterr(p, (e)) 12155847Sbostic #define REQUIRE(co, e) ((co) || SETERROR(e)) 12260201Sbostic #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 12360201Sbostic #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 12460201Sbostic #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 125*66362Sbostic #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 126*66362Sbostic #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 12760201Sbostic #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 12860201Sbostic #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 12955847Sbostic #define HERE() (p->slen) 13055847Sbostic #define THERE() (p->slen - 1) 13155847Sbostic #define DROP(n) (p->slen -= (n)) 13255847Sbostic 13360201Sbostic #ifndef NDEBUG 13460201Sbostic static int never = 0; /* for use in asserts; shuts lint up */ 135*66362Sbostic #else 136*66362Sbostic #define never 0 /* some <assert.h>s have bugs too */ 13760201Sbostic #endif 13856357Sbostic 13955847Sbostic /* 14055847Sbostic - regcomp - interface for parser and compilation 141*66362Sbostic = extern int regcomp(regex_t *, const char *, int); 14260201Sbostic = #define REG_BASIC 0000 14360201Sbostic = #define REG_EXTENDED 0001 14460201Sbostic = #define REG_ICASE 0002 14560201Sbostic = #define REG_NOSUB 0004 14660201Sbostic = #define REG_NEWLINE 0010 14760201Sbostic = #define REG_NOSPEC 0020 14860201Sbostic = #define REG_PEND 0040 14960201Sbostic = #define REG_DUMP 0200 15055847Sbostic */ 15155847Sbostic int /* 0 success, otherwise REG_something */ 15255847Sbostic regcomp(preg, pattern, cflags) 15355847Sbostic regex_t *preg; 15455847Sbostic const char *pattern; 15555847Sbostic int cflags; 15655847Sbostic { 15755847Sbostic struct parse pa; 15855847Sbostic register struct re_guts *g; 15955847Sbostic register struct parse *p = &pa; 16055847Sbostic register int i; 16160201Sbostic register size_t len; 162*66362Sbostic #ifdef REDEBUG 163*66362Sbostic # define GOODFLAGS(f) (f) 164*66362Sbostic #else 165*66362Sbostic # define GOODFLAGS(f) ((f)&~REG_DUMP) 166*66362Sbostic #endif 16755847Sbostic 168*66362Sbostic cflags = GOODFLAGS(cflags); 16960201Sbostic if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 17060201Sbostic return(REG_INVARG); 17160201Sbostic 17260201Sbostic if (cflags®_PEND) { 17360201Sbostic if (preg->re_endp < pattern) 17460201Sbostic return(REG_INVARG); 17560201Sbostic len = preg->re_endp - pattern; 17660201Sbostic } else 17760201Sbostic len = strlen((char *)pattern); 17860201Sbostic 17955847Sbostic /* do the mallocs early so failure handling is easy */ 18060201Sbostic g = (struct re_guts *)malloc(sizeof(struct re_guts) + 18160201Sbostic (NC-1)*sizeof(cat_t)); 18255847Sbostic if (g == NULL) 18355847Sbostic return(REG_ESPACE); 18460201Sbostic p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 18555847Sbostic p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 18655847Sbostic p->slen = 0; 18755847Sbostic if (p->strip == NULL) { 18855847Sbostic free((char *)g); 18955847Sbostic return(REG_ESPACE); 19055847Sbostic } 19155847Sbostic 19255847Sbostic /* set things up */ 19355847Sbostic p->g = g; 19460201Sbostic p->next = (char *)pattern; /* convenience; we do not modify it */ 19560201Sbostic p->end = p->next + len; 19655847Sbostic p->error = 0; 19755847Sbostic p->ncsalloc = 0; 19855847Sbostic for (i = 0; i < NPAREN; i++) { 19955847Sbostic p->pbegin[i] = 0; 20055847Sbostic p->pend[i] = 0; 20155847Sbostic } 20260201Sbostic g->csetsize = NC; 20355847Sbostic g->sets = NULL; 20455847Sbostic g->setbits = NULL; 20555847Sbostic g->ncsets = 0; 20655847Sbostic g->cflags = cflags; 20755847Sbostic g->iflags = 0; 20860201Sbostic g->nbol = 0; 20960201Sbostic g->neol = 0; 21055847Sbostic g->must = NULL; 21155847Sbostic g->mlen = 0; 21255847Sbostic g->nsub = 0; 21355847Sbostic g->ncategories = 1; /* category 0 is "everything else" */ 21460610Sbostic g->categories = &g->catspace[-(CHAR_MIN)]; 21560201Sbostic (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 21655847Sbostic g->backrefs = 0; 21755847Sbostic 21855847Sbostic /* do it */ 21955847Sbostic EMIT(OEND, 0); 22055847Sbostic g->firststate = THERE(); 22155847Sbostic if (cflags®_EXTENDED) 22260201Sbostic p_ere(p, OUT); 22360201Sbostic else if (cflags®_NOSPEC) 22460201Sbostic p_str(p); 22555847Sbostic else 22660201Sbostic p_bre(p, OUT, OUT); 22755847Sbostic EMIT(OEND, 0); 22855847Sbostic g->laststate = THERE(); 22955847Sbostic 23055847Sbostic /* tidy up loose ends and fill things in */ 23155847Sbostic categorize(p, g); 23255847Sbostic stripsnug(p, g); 23355847Sbostic findmust(p, g); 23455847Sbostic g->nplus = pluscount(p, g); 23555847Sbostic g->magic = MAGIC2; 23655847Sbostic preg->re_nsub = g->nsub; 23755847Sbostic preg->re_g = g; 23855847Sbostic preg->re_magic = MAGIC1; 23956355Sbostic #ifndef REDEBUG 24055847Sbostic /* not debugging, so can't rely on the assert() in regexec() */ 24155847Sbostic if (g->iflags&BAD) 24255847Sbostic SETERROR(REG_ASSERT); 24355847Sbostic #endif 24455847Sbostic 24555847Sbostic /* win or lose, we're done */ 24655847Sbostic if (p->error != 0) /* lose */ 24755847Sbostic regfree(preg); 24855847Sbostic return(p->error); 24955847Sbostic } 25055847Sbostic 25155847Sbostic /* 25255847Sbostic - p_ere - ERE parser top level, concatenation and alternation 25360201Sbostic == static void p_ere(register struct parse *p, int stop); 25455847Sbostic */ 25555847Sbostic static void 25655847Sbostic p_ere(p, stop) 25755847Sbostic register struct parse *p; 25860201Sbostic int stop; /* character this ERE should end at */ 25955847Sbostic { 26060201Sbostic register char c; 26155847Sbostic register sopno prevback; 26255847Sbostic register sopno prevfwd; 26355847Sbostic register sopno conc; 26455847Sbostic register int first = 1; /* is this the first alternative? */ 26555847Sbostic 26655847Sbostic for (;;) { 26755847Sbostic /* do a bunch of concatenated expressions */ 26855847Sbostic conc = HERE(); 26960201Sbostic while (MORE() && (c = PEEK()) != '|' && c != stop) 27055847Sbostic p_ere_exp(p); 27155847Sbostic REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 27255847Sbostic 27355847Sbostic if (!EAT('|')) 27455847Sbostic break; /* NOTE BREAK OUT */ 27555847Sbostic 27655847Sbostic if (first) { 27755847Sbostic INSERT(OCH_, conc); /* offset is wrong */ 27855847Sbostic prevfwd = conc; 27955847Sbostic prevback = conc; 28055847Sbostic first = 0; 28155847Sbostic } 28260201Sbostic ASTERN(OOR1, prevback); 28355847Sbostic prevback = THERE(); 28460201Sbostic AHEAD(prevfwd); /* fix previous offset */ 28555847Sbostic prevfwd = HERE(); 28655847Sbostic EMIT(OOR2, 0); /* offset is very wrong */ 28755847Sbostic } 28855847Sbostic 28955847Sbostic if (!first) { /* tail-end fixups */ 29060201Sbostic AHEAD(prevfwd); 29160201Sbostic ASTERN(O_CH, prevback); 29255847Sbostic } 29355847Sbostic 29460201Sbostic assert(!MORE() || SEE(stop)); 29555847Sbostic } 29655847Sbostic 29755847Sbostic /* 29855847Sbostic - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 29960201Sbostic == static void p_ere_exp(register struct parse *p); 30055847Sbostic */ 30155847Sbostic static void 30255847Sbostic p_ere_exp(p) 30355847Sbostic register struct parse *p; 30455847Sbostic { 30560201Sbostic register char c; 30655847Sbostic register sopno pos; 30755847Sbostic register int count; 30855847Sbostic register int count2; 30955847Sbostic register sopno subno; 31055847Sbostic int wascaret = 0; 31155847Sbostic 31260201Sbostic assert(MORE()); /* caller should have ensured this */ 31355847Sbostic c = GETNEXT(); 31455847Sbostic 31555847Sbostic pos = HERE(); 31655847Sbostic switch (c) { 31755847Sbostic case '(': 31860201Sbostic REQUIRE(MORE(), REG_EPAREN); 31955847Sbostic p->g->nsub++; 32055847Sbostic subno = p->g->nsub; 32155847Sbostic if (subno < NPAREN) 32255847Sbostic p->pbegin[subno] = HERE(); 32355847Sbostic EMIT(OLPAREN, subno); 32455847Sbostic if (!SEE(')')) 32555847Sbostic p_ere(p, ')'); 32655847Sbostic if (subno < NPAREN) { 32755847Sbostic p->pend[subno] = HERE(); 32855847Sbostic assert(p->pend[subno] != 0); 32955847Sbostic } 33055847Sbostic EMIT(ORPAREN, subno); 33155847Sbostic MUSTEAT(')', REG_EPAREN); 33255847Sbostic break; 33355847Sbostic #ifndef POSIX_MISTAKE 33455847Sbostic case ')': /* happens only if no current unmatched ( */ 33555847Sbostic /* 33655847Sbostic * You may ask, why the ifndef? Because I didn't notice 33755847Sbostic * this until slightly too late for 1003.2, and none of the 33855847Sbostic * other 1003.2 regular-expression reviewers noticed it at 33955847Sbostic * all. So an unmatched ) is legal POSIX, at least until 34055847Sbostic * we can get it fixed. 34155847Sbostic */ 34255847Sbostic SETERROR(REG_EPAREN); 34355847Sbostic break; 34455847Sbostic #endif 34555847Sbostic case '^': 34655847Sbostic EMIT(OBOL, 0); 34755847Sbostic p->g->iflags |= USEBOL; 34860201Sbostic p->g->nbol++; 34955847Sbostic wascaret = 1; 35055847Sbostic break; 35155847Sbostic case '$': 35255847Sbostic EMIT(OEOL, 0); 35355847Sbostic p->g->iflags |= USEEOL; 35460201Sbostic p->g->neol++; 35555847Sbostic break; 35655847Sbostic case '|': 35755847Sbostic SETERROR(REG_EMPTY); 35855847Sbostic break; 35955847Sbostic case '*': 36055847Sbostic case '+': 36155847Sbostic case '?': 36255847Sbostic SETERROR(REG_BADRPT); 36355847Sbostic break; 36455847Sbostic case '.': 36555847Sbostic if (p->g->cflags®_NEWLINE) 36655847Sbostic nonnewline(p); 36755847Sbostic else 36855847Sbostic EMIT(OANY, 0); 36955847Sbostic break; 37055847Sbostic case '[': 37155847Sbostic p_bracket(p); 37255847Sbostic break; 37355847Sbostic case '\\': 37460201Sbostic REQUIRE(MORE(), REG_EESCAPE); 37555847Sbostic c = GETNEXT(); 37660201Sbostic ordinary(p, c); 37755847Sbostic break; 37855847Sbostic case '{': /* okay as ordinary except if digit follows */ 37960201Sbostic REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 38055847Sbostic /* FALLTHROUGH */ 38155847Sbostic default: 38255847Sbostic ordinary(p, c); 38355847Sbostic break; 38455847Sbostic } 38555847Sbostic 38660201Sbostic if (!MORE()) 38760201Sbostic return; 38855847Sbostic c = PEEK(); 38960201Sbostic /* we call { a repetition if followed by a digit */ 39060201Sbostic if (!( c == '*' || c == '+' || c == '?' || 39160201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) )) 39255847Sbostic return; /* no repetition, we're done */ 39355847Sbostic NEXT(); 39455847Sbostic 39555847Sbostic REQUIRE(!wascaret, REG_BADRPT); 39655847Sbostic switch (c) { 39755847Sbostic case '*': /* implemented as +? */ 39855847Sbostic INSERT(OPLUS_, pos); 39960201Sbostic ASTERN(O_PLUS, pos); 40055847Sbostic INSERT(OQUEST_, pos); 40160201Sbostic ASTERN(O_QUEST, pos); 40255847Sbostic break; 40355847Sbostic case '+': 40455847Sbostic INSERT(OPLUS_, pos); 40560201Sbostic ASTERN(O_PLUS, pos); 40655847Sbostic break; 40755847Sbostic case '?': 40855847Sbostic INSERT(OQUEST_, pos); 40960201Sbostic ASTERN(O_QUEST, pos); 41055847Sbostic break; 41155847Sbostic case '{': 41255847Sbostic count = p_count(p); 41355847Sbostic if (EAT(',')) { 41455847Sbostic if (isdigit(PEEK())) { 41555847Sbostic count2 = p_count(p); 41655847Sbostic REQUIRE(count <= count2, REG_BADBR); 41755847Sbostic } else /* single number with comma */ 41855847Sbostic count2 = INFINITY; 41955847Sbostic } else /* just a single number */ 42055847Sbostic count2 = count; 42155847Sbostic repeat(p, pos, count, count2); 42255847Sbostic if (!EAT('}')) { /* error heuristics */ 42360201Sbostic while (MORE() && PEEK() != '}') 42455847Sbostic NEXT(); 42560201Sbostic REQUIRE(MORE(), REG_EBRACE); 42660201Sbostic SETERROR(REG_BADBR); 42755847Sbostic } 42855847Sbostic break; 42955847Sbostic } 43055847Sbostic 43160201Sbostic if (!MORE()) 43260201Sbostic return; 43355847Sbostic c = PEEK(); 43460201Sbostic if (!( c == '*' || c == '+' || c == '?' || 43560201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 43660201Sbostic return; 43760201Sbostic SETERROR(REG_BADRPT); 43855847Sbostic } 43955847Sbostic 44055847Sbostic /* 44160201Sbostic - p_str - string (no metacharacters) "parser" 44260201Sbostic == static void p_str(register struct parse *p); 44360201Sbostic */ 44460201Sbostic static void 44560201Sbostic p_str(p) 44660201Sbostic register struct parse *p; 44760201Sbostic { 44860201Sbostic REQUIRE(MORE(), REG_EMPTY); 44960201Sbostic while (MORE()) 45060201Sbostic ordinary(p, GETNEXT()); 45160201Sbostic } 45260201Sbostic 45360201Sbostic /* 45455847Sbostic - p_bre - BRE parser top level, anchoring and concatenation 45560201Sbostic == static void p_bre(register struct parse *p, register int end1, \ 45660201Sbostic == register int end2); 45760201Sbostic * Giving end1 as OUT essentially eliminates the end1/end2 check. 45855847Sbostic * 45956355Sbostic * This implementation is a bit of a kludge, in that a trailing $ is first 46056355Sbostic * taken as an ordinary character and then revised to be an anchor. The 46156355Sbostic * only undesirable side effect is that '$' gets included as a character 46256355Sbostic * category in such cases. This is fairly harmless; not worth fixing. 46360201Sbostic * The amount of lookahead needed to avoid this kludge is excessive. 46455847Sbostic */ 46555847Sbostic static void 46655847Sbostic p_bre(p, end1, end2) 46755847Sbostic register struct parse *p; 46860201Sbostic register int end1; /* first terminating character */ 46960201Sbostic register int end2; /* second terminating character */ 47055847Sbostic { 47155847Sbostic register sopno start = HERE(); 47255847Sbostic register int first = 1; /* first subexpression? */ 47356355Sbostic register int wasdollar = 0; 47455847Sbostic 47555847Sbostic if (EAT('^')) { 47655847Sbostic EMIT(OBOL, 0); 47755847Sbostic p->g->iflags |= USEBOL; 47860201Sbostic p->g->nbol++; 47955847Sbostic } 48060201Sbostic while (MORE() && !SEETWO(end1, end2)) { 48155847Sbostic wasdollar = p_simp_re(p, first); 48255847Sbostic first = 0; 48355847Sbostic } 48455847Sbostic if (wasdollar) { /* oops, that was a trailing anchor */ 48555847Sbostic DROP(1); 48655847Sbostic EMIT(OEOL, 0); 48755847Sbostic p->g->iflags |= USEEOL; 48860201Sbostic p->g->neol++; 48955847Sbostic } 49055847Sbostic 49155847Sbostic REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 49255847Sbostic } 49355847Sbostic 49455847Sbostic /* 49555847Sbostic - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 49660201Sbostic == static int p_simp_re(register struct parse *p, int starordinary); 49755847Sbostic */ 49855847Sbostic static int /* was the simple RE an unbackslashed $? */ 49955847Sbostic p_simp_re(p, starordinary) 50055847Sbostic register struct parse *p; 50155847Sbostic int starordinary; /* is a leading * an ordinary character? */ 50255847Sbostic { 50355847Sbostic register int c; 50455847Sbostic register int count; 50555847Sbostic register int count2; 50655847Sbostic register sopno pos; 50755847Sbostic register int i; 50855847Sbostic register sopno subno; 50955847Sbostic # define BACKSL (1<<CHAR_BIT) 51055847Sbostic 51155847Sbostic pos = HERE(); /* repetion op, if any, covers from here */ 51255847Sbostic 51360201Sbostic assert(MORE()); /* caller should have ensured this */ 51455847Sbostic c = GETNEXT(); 51560201Sbostic if (c == '\\') { 51660201Sbostic REQUIRE(MORE(), REG_EESCAPE); 51760201Sbostic c = BACKSL | (unsigned char)GETNEXT(); 51860201Sbostic } 51955847Sbostic switch (c) { 52055847Sbostic case '.': 52155847Sbostic if (p->g->cflags®_NEWLINE) 52255847Sbostic nonnewline(p); 52355847Sbostic else 52455847Sbostic EMIT(OANY, 0); 52555847Sbostic break; 52655847Sbostic case '[': 52755847Sbostic p_bracket(p); 52855847Sbostic break; 52955847Sbostic case BACKSL|'{': 53055847Sbostic SETERROR(REG_BADRPT); 53155847Sbostic break; 53255847Sbostic case BACKSL|'(': 53355847Sbostic p->g->nsub++; 53455847Sbostic subno = p->g->nsub; 53555847Sbostic if (subno < NPAREN) 53655847Sbostic p->pbegin[subno] = HERE(); 53755847Sbostic EMIT(OLPAREN, subno); 53860201Sbostic /* the MORE here is an error heuristic */ 53960201Sbostic if (MORE() && !SEETWO('\\', ')')) 54055847Sbostic p_bre(p, '\\', ')'); 54155847Sbostic if (subno < NPAREN) { 54255847Sbostic p->pend[subno] = HERE(); 54355847Sbostic assert(p->pend[subno] != 0); 54455847Sbostic } 54555847Sbostic EMIT(ORPAREN, subno); 54655847Sbostic REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 54755847Sbostic break; 54855847Sbostic case BACKSL|')': /* should not get here -- must be user */ 54955847Sbostic case BACKSL|'}': 55055847Sbostic SETERROR(REG_EPAREN); 55155847Sbostic break; 55255847Sbostic case BACKSL|'1': 55355847Sbostic case BACKSL|'2': 55455847Sbostic case BACKSL|'3': 55555847Sbostic case BACKSL|'4': 55655847Sbostic case BACKSL|'5': 55755847Sbostic case BACKSL|'6': 55855847Sbostic case BACKSL|'7': 55955847Sbostic case BACKSL|'8': 56055847Sbostic case BACKSL|'9': 56155847Sbostic i = (c&~BACKSL) - '0'; 56255847Sbostic assert(i < NPAREN); 56355847Sbostic if (p->pend[i] != 0) { 56455847Sbostic assert(i <= p->g->nsub); 56555847Sbostic EMIT(OBACK_, i); 56655847Sbostic assert(p->pbegin[i] != 0); 56755847Sbostic assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 56855847Sbostic assert(OP(p->strip[p->pend[i]]) == ORPAREN); 56955847Sbostic (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 57055847Sbostic EMIT(O_BACK, i); 57155847Sbostic } else 57255847Sbostic SETERROR(REG_ESUBREG); 57355847Sbostic p->g->backrefs = 1; 57455847Sbostic break; 57555847Sbostic case '*': 57655847Sbostic REQUIRE(starordinary, REG_BADRPT); 57755847Sbostic /* FALLTHROUGH */ 57855847Sbostic default: 57960201Sbostic ordinary(p, c &~ BACKSL); 58055847Sbostic break; 58155847Sbostic } 58255847Sbostic 58355847Sbostic if (EAT('*')) { /* implemented as +? */ 58455847Sbostic INSERT(OPLUS_, pos); 58560201Sbostic ASTERN(O_PLUS, pos); 58655847Sbostic INSERT(OQUEST_, pos); 58760201Sbostic ASTERN(O_QUEST, pos); 58855847Sbostic } else if (EATTWO('\\', '{')) { 58955847Sbostic count = p_count(p); 59055847Sbostic if (EAT(',')) { 59160201Sbostic if (MORE() && isdigit(PEEK())) { 59255847Sbostic count2 = p_count(p); 59355847Sbostic REQUIRE(count <= count2, REG_BADBR); 59455847Sbostic } else /* single number with comma */ 59555847Sbostic count2 = INFINITY; 59655847Sbostic } else /* just a single number */ 59755847Sbostic count2 = count; 59855847Sbostic repeat(p, pos, count, count2); 59955847Sbostic if (!EATTWO('\\', '}')) { /* error heuristics */ 60060201Sbostic while (MORE() && !SEETWO('\\', '}')) 60155847Sbostic NEXT(); 60260201Sbostic REQUIRE(MORE(), REG_EBRACE); 60360201Sbostic SETERROR(REG_BADBR); 60455847Sbostic } 60560201Sbostic } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 60655847Sbostic return(1); 60755847Sbostic 60855847Sbostic return(0); 60955847Sbostic } 61055847Sbostic 61155847Sbostic /* 61255847Sbostic - p_count - parse a repetition count 61360201Sbostic == static int p_count(register struct parse *p); 61455847Sbostic */ 61555847Sbostic static int /* the value */ 61655847Sbostic p_count(p) 61755847Sbostic register struct parse *p; 61855847Sbostic { 61955847Sbostic register int count = 0; 62055847Sbostic register int ndigits = 0; 62155847Sbostic 62260201Sbostic while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 62355847Sbostic count = count*10 + (GETNEXT() - '0'); 62455847Sbostic ndigits++; 62555847Sbostic } 62655847Sbostic 62755847Sbostic REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 62855847Sbostic return(count); 62955847Sbostic } 63055847Sbostic 63155847Sbostic /* 63255847Sbostic - p_bracket - parse a bracketed character list 63360201Sbostic == static void p_bracket(register struct parse *p); 63455847Sbostic * 63555847Sbostic * Note a significant property of this code: if the allocset() did SETERROR, 63655847Sbostic * no set operations are done. 63755847Sbostic */ 63855847Sbostic static void 63955847Sbostic p_bracket(p) 64055847Sbostic register struct parse *p; 64155847Sbostic { 64260201Sbostic register char c; 64355847Sbostic register cset *cs = allocset(p); 64455847Sbostic register int invert = 0; 64555847Sbostic 64660201Sbostic /* Dept of Truly Sickening Special-Case Kludges */ 64760201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 64860201Sbostic EMIT(OBOW, 0); 64960201Sbostic NEXTn(6); 65060201Sbostic return; 65160201Sbostic } 65260201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 65360201Sbostic EMIT(OEOW, 0); 65460201Sbostic NEXTn(6); 65560201Sbostic return; 65660201Sbostic } 65760201Sbostic 65855847Sbostic if (EAT('^')) 65955847Sbostic invert++; /* make note to invert set at end */ 66055847Sbostic if (EAT(']')) 66155847Sbostic CHadd(cs, ']'); 66256618Sbostic else if (EAT('-')) 66356618Sbostic CHadd(cs, '-'); 66460201Sbostic while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 66555847Sbostic p_b_term(p, cs); 66655847Sbostic if (EAT('-')) 66755847Sbostic CHadd(cs, '-'); 66855847Sbostic MUSTEAT(']', REG_EBRACK); 66955847Sbostic 67060201Sbostic if (p->error != 0) /* don't mess things up further */ 67160201Sbostic return; 67260201Sbostic 67360201Sbostic if (p->g->cflags®_ICASE) { 67455847Sbostic register int i; 67560201Sbostic register int ci; 67655847Sbostic 67755847Sbostic for (i = p->g->csetsize - 1; i >= 0; i--) 67860201Sbostic if (CHIN(cs, i) && isalpha(i)) { 67960201Sbostic ci = othercase(i); 68060201Sbostic if (ci != i) 68160201Sbostic CHadd(cs, ci); 68260201Sbostic } 68360201Sbostic if (cs->multis != NULL) 68460201Sbostic mccase(p, cs); 68560201Sbostic } 68660201Sbostic if (invert) { 68760201Sbostic register int i; 68860201Sbostic 68960201Sbostic for (i = p->g->csetsize - 1; i >= 0; i--) 69055847Sbostic if (CHIN(cs, i)) 69155847Sbostic CHsub(cs, i); 69255847Sbostic else 69355847Sbostic CHadd(cs, i); 69455847Sbostic if (p->g->cflags®_NEWLINE) 69555847Sbostic CHsub(cs, '\n'); 69655847Sbostic if (cs->multis != NULL) 69755847Sbostic mcinvert(p, cs); 69855847Sbostic } 69960201Sbostic 70055847Sbostic assert(cs->multis == NULL); /* xxx */ 70160201Sbostic 70260201Sbostic if (nch(p, cs) == 1) { /* optimize singleton sets */ 70360201Sbostic ordinary(p, firstch(p, cs)); 70460201Sbostic freeset(p, cs); 70560201Sbostic } else 70660201Sbostic EMIT(OANYOF, freezeset(p, cs)); 70755847Sbostic } 70855847Sbostic 70955847Sbostic /* 71055847Sbostic - p_b_term - parse one term of a bracketed character list 71160201Sbostic == static void p_b_term(register struct parse *p, register cset *cs); 71255847Sbostic */ 71355847Sbostic static void 71455847Sbostic p_b_term(p, cs) 71555847Sbostic register struct parse *p; 71655847Sbostic register cset *cs; 71755847Sbostic { 71860201Sbostic register char c; 71960201Sbostic register char start, finish; 72055847Sbostic register int i; 72155847Sbostic 72255847Sbostic /* classify what we've got */ 72360201Sbostic switch ((MORE()) ? PEEK() : '\0') { 72455847Sbostic case '[': 72560201Sbostic c = (MORE2()) ? PEEK2() : '\0'; 72655847Sbostic break; 72755847Sbostic case '-': 72855847Sbostic SETERROR(REG_ERANGE); 72955847Sbostic return; /* NOTE RETURN */ 73055847Sbostic break; 73155847Sbostic default: 73255847Sbostic c = '\0'; 73355847Sbostic break; 73455847Sbostic } 73555847Sbostic 73655847Sbostic switch (c) { 73755847Sbostic case ':': /* character class */ 73855847Sbostic NEXT2(); 73960201Sbostic REQUIRE(MORE(), REG_EBRACK); 74055847Sbostic c = PEEK(); 74155847Sbostic REQUIRE(c != '-' && c != ']', REG_ECTYPE); 74255847Sbostic p_b_cclass(p, cs); 74360201Sbostic REQUIRE(MORE(), REG_EBRACK); 74455847Sbostic REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 74555847Sbostic break; 74655847Sbostic case '=': /* equivalence class */ 74755847Sbostic NEXT2(); 74860201Sbostic REQUIRE(MORE(), REG_EBRACK); 74955847Sbostic c = PEEK(); 75055847Sbostic REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 75155847Sbostic p_b_eclass(p, cs); 75260201Sbostic REQUIRE(MORE(), REG_EBRACK); 75355847Sbostic REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 75455847Sbostic break; 75555847Sbostic default: /* symbol, ordinary character, or range */ 75655847Sbostic /* xxx revision needed for multichar stuff */ 75755847Sbostic start = p_b_symbol(p); 75860201Sbostic if (SEE('-') && MORE2() && PEEK2() != ']') { 75955847Sbostic /* range */ 76055847Sbostic NEXT(); 76155847Sbostic if (EAT('-')) 76255847Sbostic finish = '-'; 76355847Sbostic else 76455847Sbostic finish = p_b_symbol(p); 76555847Sbostic } else 76655847Sbostic finish = start; 76760201Sbostic /* xxx what about signed chars here... */ 76855847Sbostic REQUIRE(start <= finish, REG_ERANGE); 76960201Sbostic for (i = start; i <= finish; i++) 77055847Sbostic CHadd(cs, i); 77155847Sbostic break; 77255847Sbostic } 77355847Sbostic } 77455847Sbostic 77555847Sbostic /* 77655847Sbostic - p_b_cclass - parse a character-class name and deal with it 77760201Sbostic == static void p_b_cclass(register struct parse *p, register cset *cs); 77855847Sbostic */ 77955847Sbostic static void 78055847Sbostic p_b_cclass(p, cs) 78155847Sbostic register struct parse *p; 78255847Sbostic register cset *cs; 78355847Sbostic { 78460201Sbostic register char *sp = p->next; 78555847Sbostic register struct cclass *cp; 78660201Sbostic register size_t len; 78760201Sbostic register char *u; 78860201Sbostic register char c; 78955847Sbostic 79060201Sbostic while (MORE() && isalpha(PEEK())) 79160201Sbostic NEXT(); 79260201Sbostic len = p->next - sp; 79355847Sbostic for (cp = cclasses; cp->name != NULL; cp++) 79460201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 79555847Sbostic break; 79655847Sbostic if (cp->name == NULL) { 79755847Sbostic /* oops, didn't find it */ 79855847Sbostic SETERROR(REG_ECTYPE); 79955847Sbostic return; 80055847Sbostic } 80155847Sbostic 80260201Sbostic u = cp->chars; 80355847Sbostic while ((c = *u++) != '\0') 80455847Sbostic CHadd(cs, c); 80560201Sbostic for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 806*66362Sbostic MCadd(p, cs, u); 80755847Sbostic } 80855847Sbostic 80955847Sbostic /* 81055847Sbostic - p_b_eclass - parse an equivalence-class name and deal with it 81160201Sbostic == static void p_b_eclass(register struct parse *p, register cset *cs); 81255847Sbostic * 81355847Sbostic * This implementation is incomplete. xxx 81455847Sbostic */ 81555847Sbostic static void 81655847Sbostic p_b_eclass(p, cs) 81755847Sbostic register struct parse *p; 81855847Sbostic register cset *cs; 81955847Sbostic { 82060201Sbostic register char c; 82155847Sbostic 82255847Sbostic c = p_b_coll_elem(p, '='); 82355847Sbostic CHadd(cs, c); 82455847Sbostic } 82555847Sbostic 82655847Sbostic /* 82755847Sbostic - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 82860201Sbostic == static char p_b_symbol(register struct parse *p); 82955847Sbostic */ 83060201Sbostic static char /* value of symbol */ 83155847Sbostic p_b_symbol(p) 83255847Sbostic register struct parse *p; 83355847Sbostic { 83460201Sbostic register char value; 83555847Sbostic 83660201Sbostic REQUIRE(MORE(), REG_EBRACK); 83760201Sbostic if (!EATTWO('[', '.')) 83855847Sbostic return(GETNEXT()); 83955847Sbostic 84055847Sbostic /* collating symbol */ 84155847Sbostic value = p_b_coll_elem(p, '.'); 84255847Sbostic REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 84355847Sbostic return(value); 84455847Sbostic } 84555847Sbostic 84655847Sbostic /* 84755847Sbostic - p_b_coll_elem - parse a collating-element name and look it up 84860201Sbostic == static char p_b_coll_elem(register struct parse *p, int endc); 84955847Sbostic */ 85060201Sbostic static char /* value of collating element */ 85155847Sbostic p_b_coll_elem(p, endc) 85255847Sbostic register struct parse *p; 85360201Sbostic int endc; /* name ended by endc,']' */ 85455847Sbostic { 85560201Sbostic register char *sp = p->next; 85655847Sbostic register struct cname *cp; 85755847Sbostic register int len; 85860201Sbostic register char c; 85955847Sbostic 86060201Sbostic while (MORE() && !SEETWO(endc, ']')) 86155847Sbostic NEXT(); 86260201Sbostic if (!MORE()) { 86355847Sbostic SETERROR(REG_EBRACK); 86455847Sbostic return(0); 86555847Sbostic } 86655847Sbostic len = p->next - sp; 86755847Sbostic for (cp = cnames; cp->name != NULL; cp++) 86860201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 86955847Sbostic return(cp->code); /* known name */ 87055847Sbostic if (len == 1) 87155847Sbostic return(*sp); /* single character */ 87255847Sbostic SETERROR(REG_ECOLLATE); /* neither */ 87355847Sbostic return(0); 87455847Sbostic } 87555847Sbostic 87655847Sbostic /* 87755847Sbostic - othercase - return the case counterpart of an alphabetic 87860201Sbostic == static char othercase(int ch); 87955847Sbostic */ 88060201Sbostic static char /* if no counterpart, return ch */ 88155847Sbostic othercase(ch) 88260201Sbostic int ch; 88355847Sbostic { 88455847Sbostic assert(isalpha(ch)); 88555847Sbostic if (isupper(ch)) 88655847Sbostic return(tolower(ch)); 88755847Sbostic else if (islower(ch)) 88855847Sbostic return(toupper(ch)); 88955847Sbostic else /* peculiar, but could happen */ 89055847Sbostic return(ch); 89155847Sbostic } 89255847Sbostic 89355847Sbostic /* 89460201Sbostic - bothcases - emit a dualcase version of a two-case character 89560201Sbostic == static void bothcases(register struct parse *p, int ch); 89656355Sbostic * 89755847Sbostic * Boy, is this implementation ever a kludge... 89855847Sbostic */ 89955847Sbostic static void 90055847Sbostic bothcases(p, ch) 90155847Sbostic register struct parse *p; 90260201Sbostic int ch; 90355847Sbostic { 90460201Sbostic register char *oldnext = p->next; 90560201Sbostic register char *oldend = p->end; 90660201Sbostic char bracket[3]; 90755847Sbostic 90860201Sbostic assert(othercase(ch) != ch); /* p_bracket() would recurse */ 90955847Sbostic p->next = bracket; 91060201Sbostic p->end = bracket+2; 91155847Sbostic bracket[0] = ch; 91255847Sbostic bracket[1] = ']'; 91355847Sbostic bracket[2] = '\0'; 91455847Sbostic p_bracket(p); 91555847Sbostic assert(p->next == bracket+2); 91655847Sbostic p->next = oldnext; 91760201Sbostic p->end = oldend; 91855847Sbostic } 91955847Sbostic 92055847Sbostic /* 92155847Sbostic - ordinary - emit an ordinary character 92260201Sbostic == static void ordinary(register struct parse *p, register int ch); 92355847Sbostic */ 92455847Sbostic static void 92555847Sbostic ordinary(p, ch) 92655847Sbostic register struct parse *p; 92760201Sbostic register int ch; 92855847Sbostic { 92960201Sbostic register cat_t *cap = p->g->categories; 93055847Sbostic 93160201Sbostic if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 93255847Sbostic bothcases(p, ch); 93360201Sbostic else { 93460201Sbostic EMIT(OCHAR, (unsigned char)ch); 93560201Sbostic if (cap[ch] == 0) 93660201Sbostic cap[ch] = p->g->ncategories++; 93755847Sbostic } 93855847Sbostic } 93955847Sbostic 94055847Sbostic /* 94155847Sbostic - nonnewline - emit REG_NEWLINE version of OANY 94260201Sbostic == static void nonnewline(register struct parse *p); 94356355Sbostic * 94455847Sbostic * Boy, is this implementation ever a kludge... 94555847Sbostic */ 94655847Sbostic static void 94755847Sbostic nonnewline(p) 94855847Sbostic register struct parse *p; 94955847Sbostic { 95060201Sbostic register char *oldnext = p->next; 95160201Sbostic register char *oldend = p->end; 95260201Sbostic char bracket[4]; 95355847Sbostic 95455847Sbostic p->next = bracket; 95560201Sbostic p->end = bracket+3; 95655847Sbostic bracket[0] = '^'; 95755847Sbostic bracket[1] = '\n'; 95855847Sbostic bracket[2] = ']'; 95955847Sbostic bracket[3] = '\0'; 96055847Sbostic p_bracket(p); 96155847Sbostic assert(p->next == bracket+3); 96255847Sbostic p->next = oldnext; 96360201Sbostic p->end = oldend; 96455847Sbostic } 96555847Sbostic 96655847Sbostic /* 96755847Sbostic - repeat - generate code for a bounded repetition, recursively if needed 96860201Sbostic == static void repeat(register struct parse *p, sopno start, int from, int to); 96955847Sbostic */ 97055847Sbostic static void 97155847Sbostic repeat(p, start, from, to) 97255847Sbostic register struct parse *p; 97355847Sbostic sopno start; /* operand from here to end of strip */ 97455847Sbostic int from; /* repeated from this number */ 97555847Sbostic int to; /* to this number of times (maybe INFINITY) */ 97655847Sbostic { 97755847Sbostic register sopno finish = HERE(); 97855847Sbostic # define N 2 97955847Sbostic # define INF 3 98055847Sbostic # define REP(f, t) ((f)*8 + (t)) 98155847Sbostic # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 98255847Sbostic register sopno copy; 98355847Sbostic 98455847Sbostic if (p->error != 0) /* head off possible runaway recursion */ 98555847Sbostic return; 98655847Sbostic 98755847Sbostic assert(from <= to); 98855847Sbostic 98955847Sbostic switch (REP(MAP(from), MAP(to))) { 99055847Sbostic case REP(0, 0): /* must be user doing this */ 99155847Sbostic DROP(finish-start); /* drop the operand */ 99255847Sbostic break; 99355847Sbostic case REP(0, 1): /* as x{1,1}? */ 99455847Sbostic case REP(0, N): /* as x{1,n}? */ 99555847Sbostic case REP(0, INF): /* as x{1,}? */ 99655847Sbostic INSERT(OQUEST_, start); /* offset is wrong... */ 99755847Sbostic repeat(p, start+1, 1, to); 99860201Sbostic AHEAD(start); /* ... fix it */ 99960201Sbostic ASTERN(O_QUEST, start); 100055847Sbostic break; 100155847Sbostic case REP(1, 1): /* trivial case */ 100255847Sbostic /* done */ 100355847Sbostic break; 100455847Sbostic case REP(1, N): /* as x?x{1,n-1} */ 100555847Sbostic INSERT(OQUEST_, start); 100660201Sbostic ASTERN(O_QUEST, start); 100755847Sbostic copy = dupl(p, start+1, finish+1); 100855847Sbostic assert(copy == finish+2); 100955847Sbostic repeat(p, copy, 1, to-1); 101055847Sbostic break; 101155847Sbostic case REP(1, INF): /* as x+ */ 101255847Sbostic INSERT(OPLUS_, start); 101360201Sbostic ASTERN(O_PLUS, start); 101455847Sbostic break; 101555847Sbostic case REP(N, N): /* as xx{m-1,n-1} */ 101655847Sbostic copy = dupl(p, start, finish); 101755847Sbostic repeat(p, copy, from-1, to-1); 101855847Sbostic break; 101955847Sbostic case REP(N, INF): /* as xx{n-1,INF} */ 102055847Sbostic copy = dupl(p, start, finish); 102155847Sbostic repeat(p, copy, from-1, to); 102255847Sbostic break; 102355847Sbostic default: /* "can't happen" */ 102455847Sbostic SETERROR(REG_ASSERT); /* just in case */ 102555847Sbostic break; 102655847Sbostic } 102755847Sbostic } 102855847Sbostic 102955847Sbostic /* 103055847Sbostic - seterr - set an error condition 103160201Sbostic == static int seterr(register struct parse *p, int e); 103255847Sbostic */ 103355847Sbostic static int /* useless but makes type checking happy */ 103455847Sbostic seterr(p, e) 103555847Sbostic register struct parse *p; 103655847Sbostic int e; 103755847Sbostic { 103855847Sbostic if (p->error == 0) /* keep earliest error condition */ 103955847Sbostic p->error = e; 104055847Sbostic p->next = nuls; /* try to bring things to a halt */ 104160201Sbostic p->end = nuls; 104255847Sbostic return(0); /* make the return value well-defined */ 104355847Sbostic } 104455847Sbostic 104555847Sbostic /* 104655847Sbostic - allocset - allocate a set of characters for [] 104760201Sbostic == static cset *allocset(register struct parse *p); 104855847Sbostic */ 104955847Sbostic static cset * 105055847Sbostic allocset(p) 105155847Sbostic register struct parse *p; 105255847Sbostic { 105355847Sbostic register int no = p->g->ncsets++; 105455847Sbostic register size_t nc; 105555847Sbostic register size_t nbytes; 105655847Sbostic register cset *cs; 105755847Sbostic register size_t css = (size_t)p->g->csetsize; 1058*66362Sbostic register int i; 105955847Sbostic 106055847Sbostic if (no >= p->ncsalloc) { /* need another column of space */ 106155847Sbostic p->ncsalloc += CHAR_BIT; 106255847Sbostic nc = p->ncsalloc; 106355847Sbostic assert(nc % CHAR_BIT == 0); 106455847Sbostic nbytes = nc / CHAR_BIT * css; 106555847Sbostic if (p->g->sets == NULL) 106655847Sbostic p->g->sets = (cset *)malloc(nc * sizeof(cset)); 106755847Sbostic else 106855847Sbostic p->g->sets = (cset *)realloc((char *)p->g->sets, 106955847Sbostic nc * sizeof(cset)); 107055847Sbostic if (p->g->setbits == NULL) 1071*66362Sbostic p->g->setbits = (uch *)malloc(nbytes); 1072*66362Sbostic else { 1073*66362Sbostic p->g->setbits = (uch *)realloc((char *)p->g->setbits, 107455847Sbostic nbytes); 1075*66362Sbostic /* xxx this isn't right if setbits is now NULL */ 1076*66362Sbostic for (i = 0; i < no; i++) 1077*66362Sbostic p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1078*66362Sbostic } 107955847Sbostic if (p->g->sets != NULL && p->g->setbits != NULL) 108055847Sbostic (void) memset((char *)p->g->setbits + (nbytes - css), 108155847Sbostic 0, css); 108255847Sbostic else { 108355847Sbostic no = 0; 108455847Sbostic SETERROR(REG_ESPACE); 108555847Sbostic /* caller's responsibility not to do set ops */ 108655847Sbostic } 108755847Sbostic } 108855847Sbostic 108955847Sbostic assert(p->g->sets != NULL); /* xxx */ 109055847Sbostic cs = &p->g->sets[no]; 109155847Sbostic cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 109255847Sbostic cs->mask = 1 << ((no) % CHAR_BIT); 109355847Sbostic cs->hash = 0; 109455847Sbostic cs->smultis = 0; 109555847Sbostic cs->multis = NULL; 109655847Sbostic 109755847Sbostic return(cs); 109855847Sbostic } 109955847Sbostic 110055847Sbostic /* 110160201Sbostic - freeset - free a now-unused set 110260201Sbostic == static void freeset(register struct parse *p, register cset *cs); 110360201Sbostic */ 110460201Sbostic static void 110560201Sbostic freeset(p, cs) 110660201Sbostic register struct parse *p; 110760201Sbostic register cset *cs; 110860201Sbostic { 110960201Sbostic register int i; 111060201Sbostic register cset *top = &p->g->sets[p->g->ncsets]; 111160201Sbostic register size_t css = (size_t)p->g->csetsize; 111260201Sbostic 111360201Sbostic for (i = 0; i < css; i++) 111460201Sbostic CHsub(cs, i); 111560201Sbostic if (cs == top-1) /* recover only the easy case */ 111660201Sbostic p->g->ncsets--; 111760201Sbostic } 111860201Sbostic 111960201Sbostic /* 112055847Sbostic - freezeset - final processing on a set of characters 112160201Sbostic == static int freezeset(register struct parse *p, register cset *cs); 112255847Sbostic * 112355847Sbostic * The main task here is merging identical sets. This is usually a waste 112455847Sbostic * of time (although the hash code minimizes the overhead), but can win 112555847Sbostic * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 112655847Sbostic * is done using addition rather than xor -- all ASCII [aA] sets xor to 112755847Sbostic * the same value! 112855847Sbostic */ 112955847Sbostic static int /* set number */ 113055847Sbostic freezeset(p, cs) 113155847Sbostic register struct parse *p; 113255847Sbostic register cset *cs; 113355847Sbostic { 1134*66362Sbostic register uch h = cs->hash; 113555847Sbostic register int i; 113655847Sbostic register cset *top = &p->g->sets[p->g->ncsets]; 113755847Sbostic register cset *cs2; 113855847Sbostic register size_t css = (size_t)p->g->csetsize; 113955847Sbostic 114055847Sbostic /* look for an earlier one which is the same */ 114155847Sbostic for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 114255847Sbostic if (cs2->hash == h && cs2 != cs) { 114355847Sbostic /* maybe */ 114455847Sbostic for (i = 0; i < css; i++) 114555847Sbostic if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 114655847Sbostic break; /* no */ 114755847Sbostic if (i == css) 114855847Sbostic break; /* yes */ 114955847Sbostic } 115055847Sbostic 115155847Sbostic if (cs2 < top) { /* found one */ 115260201Sbostic freeset(p, cs); 115355847Sbostic cs = cs2; 115455847Sbostic } 115555847Sbostic 115655847Sbostic return((int)(cs - p->g->sets)); 115755847Sbostic } 115855847Sbostic 115955847Sbostic /* 116060201Sbostic - firstch - return first character in a set (which must have at least one) 116160201Sbostic == static int firstch(register struct parse *p, register cset *cs); 116260201Sbostic */ 116360201Sbostic static int /* character; there is no "none" value */ 116460201Sbostic firstch(p, cs) 116560201Sbostic register struct parse *p; 116660201Sbostic register cset *cs; 116760201Sbostic { 116860201Sbostic register int i; 116960201Sbostic register size_t css = (size_t)p->g->csetsize; 117060201Sbostic 117160201Sbostic for (i = 0; i < css; i++) 117260201Sbostic if (CHIN(cs, i)) 117360201Sbostic return((char)i); 117460201Sbostic assert(never); 117560201Sbostic return(0); /* arbitrary */ 117660201Sbostic } 117760201Sbostic 117860201Sbostic /* 117960201Sbostic - nch - number of characters in a set 118060201Sbostic == static int nch(register struct parse *p, register cset *cs); 118160201Sbostic */ 118260201Sbostic static int 118360201Sbostic nch(p, cs) 118460201Sbostic register struct parse *p; 118560201Sbostic register cset *cs; 118660201Sbostic { 118760201Sbostic register int i; 118860201Sbostic register size_t css = (size_t)p->g->csetsize; 118960201Sbostic register int n = 0; 119060201Sbostic 119160201Sbostic for (i = 0; i < css; i++) 119260201Sbostic if (CHIN(cs, i)) 119360201Sbostic n++; 119460201Sbostic return(n); 119560201Sbostic } 119660201Sbostic 119760201Sbostic /* 119855847Sbostic - mcadd - add a collating element to a cset 119960201Sbostic == static void mcadd(register struct parse *p, register cset *cs, \ 120060201Sbostic == register char *cp); 120155847Sbostic */ 120255847Sbostic static void 120355847Sbostic mcadd(p, cs, cp) 120455847Sbostic register struct parse *p; 120555847Sbostic register cset *cs; 120660201Sbostic register char *cp; 120755847Sbostic { 120855847Sbostic register size_t oldend = cs->smultis; 120955847Sbostic 121060201Sbostic cs->smultis += strlen(cp) + 1; 121155847Sbostic if (cs->multis == NULL) 121260201Sbostic cs->multis = malloc(cs->smultis); 121355847Sbostic else 121460201Sbostic cs->multis = realloc(cs->multis, cs->smultis); 121555847Sbostic if (cs->multis == NULL) { 121655847Sbostic SETERROR(REG_ESPACE); 121755847Sbostic return; 121855847Sbostic } 121955847Sbostic 122060201Sbostic (void) strcpy(cs->multis + oldend - 1, cp); 122155847Sbostic cs->multis[cs->smultis - 1] = '\0'; 122255847Sbostic } 122355847Sbostic 122455847Sbostic /* 122555847Sbostic - mcsub - subtract a collating element from a cset 122660201Sbostic == static void mcsub(register cset *cs, register char *cp); 122755847Sbostic */ 122855847Sbostic static void 122960201Sbostic mcsub(cs, cp) 123055847Sbostic register cset *cs; 123160201Sbostic register char *cp; 123255847Sbostic { 123360201Sbostic register char *fp = mcfind(cs, cp); 123460201Sbostic register size_t len = strlen(fp); 123555847Sbostic 123660201Sbostic assert(fp != NULL); 123760201Sbostic (void) memmove(fp, fp + len + 1, 123855847Sbostic cs->smultis - (fp + len + 1 - cs->multis)); 123955847Sbostic cs->smultis -= len; 124055847Sbostic 124155847Sbostic if (cs->smultis == 0) { 124260201Sbostic free(cs->multis); 124355847Sbostic cs->multis = NULL; 124455847Sbostic return; 124555847Sbostic } 124655847Sbostic 124760201Sbostic cs->multis = realloc(cs->multis, cs->smultis); 124855847Sbostic assert(cs->multis != NULL); 124955847Sbostic } 125055847Sbostic 125155847Sbostic /* 125255847Sbostic - mcin - is a collating element in a cset? 125360201Sbostic == static int mcin(register cset *cs, register char *cp); 125455847Sbostic */ 125555847Sbostic static int 125660201Sbostic mcin(cs, cp) 125755847Sbostic register cset *cs; 125860201Sbostic register char *cp; 125955847Sbostic { 126055847Sbostic return(mcfind(cs, cp) != NULL); 126155847Sbostic } 126255847Sbostic 126355847Sbostic /* 126455847Sbostic - mcfind - find a collating element in a cset 126560201Sbostic == static char *mcfind(register cset *cs, register char *cp); 126655847Sbostic */ 126760201Sbostic static char * 126855847Sbostic mcfind(cs, cp) 126955847Sbostic register cset *cs; 127060201Sbostic register char *cp; 127155847Sbostic { 127260201Sbostic register char *p; 127355847Sbostic 127455847Sbostic if (cs->multis == NULL) 127555847Sbostic return(NULL); 127660201Sbostic for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 127760201Sbostic if (strcmp(cp, p) == 0) 127855847Sbostic return(p); 127955847Sbostic return(NULL); 128055847Sbostic } 128155847Sbostic 128255847Sbostic /* 128355847Sbostic - mcinvert - invert the list of collating elements in a cset 1284*66362Sbostic == static void mcinvert(register struct parse *p, register cset *cs); 128555847Sbostic * 128655847Sbostic * This would have to know the set of possibilities. Implementation 128755847Sbostic * is deferred. 128855847Sbostic */ 128955847Sbostic static void 1290*66362Sbostic mcinvert(p, cs) 1291*66362Sbostic register struct parse *p; 129255847Sbostic register cset *cs; 129355847Sbostic { 129455847Sbostic assert(cs->multis == NULL); /* xxx */ 129555847Sbostic } 129655847Sbostic 129755847Sbostic /* 129860201Sbostic - mccase - add case counterparts of the list of collating elements in a cset 1299*66362Sbostic == static void mccase(register struct parse *p, register cset *cs); 130060201Sbostic * 130160201Sbostic * This would have to know the set of possibilities. Implementation 130260201Sbostic * is deferred. 130360201Sbostic */ 130460201Sbostic static void 1305*66362Sbostic mccase(p, cs) 1306*66362Sbostic register struct parse *p; 130760201Sbostic register cset *cs; 130860201Sbostic { 130960201Sbostic assert(cs->multis == NULL); /* xxx */ 131060201Sbostic } 131160201Sbostic 131260201Sbostic /* 131355847Sbostic - isinsets - is this character in any sets? 131460201Sbostic == static int isinsets(register struct re_guts *g, int c); 131555847Sbostic */ 131655847Sbostic static int /* predicate */ 131755847Sbostic isinsets(g, c) 131855847Sbostic register struct re_guts *g; 131960201Sbostic int c; 132055847Sbostic { 1321*66362Sbostic register uch *col; 132255847Sbostic register int i; 132355847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 132460201Sbostic register unsigned uc = (unsigned char)c; 132555847Sbostic 132655847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 132760201Sbostic if (col[uc] != 0) 132855847Sbostic return(1); 132955847Sbostic return(0); 133055847Sbostic } 133155847Sbostic 133255847Sbostic /* 133355847Sbostic - samesets - are these two characters in exactly the same sets? 133460201Sbostic == static int samesets(register struct re_guts *g, int c1, int c2); 133555847Sbostic */ 133655847Sbostic static int /* predicate */ 133755847Sbostic samesets(g, c1, c2) 133855847Sbostic register struct re_guts *g; 133960201Sbostic int c1; 134060201Sbostic int c2; 134155847Sbostic { 1342*66362Sbostic register uch *col; 134355847Sbostic register int i; 134455847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 134560201Sbostic register unsigned uc1 = (unsigned char)c1; 134660201Sbostic register unsigned uc2 = (unsigned char)c2; 134755847Sbostic 134855847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 134960201Sbostic if (col[uc1] != col[uc2]) 135055847Sbostic return(0); 135155847Sbostic return(1); 135255847Sbostic } 135355847Sbostic 135455847Sbostic /* 135555847Sbostic - categorize - sort out character categories 135660201Sbostic == static void categorize(struct parse *p, register struct re_guts *g); 135755847Sbostic */ 135855847Sbostic static void 135955847Sbostic categorize(p, g) 136055847Sbostic struct parse *p; 136155847Sbostic register struct re_guts *g; 136255847Sbostic { 136360201Sbostic register cat_t *cats = g->categories; 136460201Sbostic register int c; 136560201Sbostic register int c2; 136660201Sbostic register cat_t cat; 136755847Sbostic 136855847Sbostic /* avoid making error situations worse */ 136955847Sbostic if (p->error != 0) 137055847Sbostic return; 137155847Sbostic 137260201Sbostic for (c = CHAR_MIN; c <= CHAR_MAX; c++) 137355847Sbostic if (cats[c] == 0 && isinsets(g, c)) { 137455847Sbostic cat = g->ncategories++; 137555847Sbostic cats[c] = cat; 137660201Sbostic for (c2 = c+1; c2 <= CHAR_MAX; c2++) 137755847Sbostic if (cats[c2] == 0 && samesets(g, c, c2)) 137855847Sbostic cats[c2] = cat; 137955847Sbostic } 138055847Sbostic } 138155847Sbostic 138255847Sbostic /* 138355847Sbostic - dupl - emit a duplicate of a bunch of sops 138460201Sbostic == static sopno dupl(register struct parse *p, sopno start, sopno finish); 138555847Sbostic */ 138655847Sbostic static sopno /* start of duplicate */ 138755847Sbostic dupl(p, start, finish) 138855847Sbostic register struct parse *p; 138955847Sbostic sopno start; /* from here */ 139055847Sbostic sopno finish; /* to this less one */ 139155847Sbostic { 139255847Sbostic register sopno ret = HERE(); 139355847Sbostic register sopno len = finish - start; 139455847Sbostic 139555847Sbostic assert(finish >= start); 139655847Sbostic if (len == 0) 139755847Sbostic return(ret); 139855847Sbostic enlarge(p, p->ssize + len); /* this many unexpected additions */ 139955847Sbostic assert(p->ssize >= p->slen + len); 140055847Sbostic (void) memcpy((char *)(p->strip + p->slen), 140155847Sbostic (char *)(p->strip + start), (size_t)len*sizeof(sop)); 140255847Sbostic p->slen += len; 140355847Sbostic return(ret); 140455847Sbostic } 140555847Sbostic 140655847Sbostic /* 140755847Sbostic - doemit - emit a strip operator 140860201Sbostic == static void doemit(register struct parse *p, sop op, size_t opnd); 140955847Sbostic * 141055847Sbostic * It might seem better to implement this as a macro with a function as 141155847Sbostic * hard-case backup, but it's just too big and messy unless there are 141255847Sbostic * some changes to the data structures. Maybe later. 141355847Sbostic */ 141455847Sbostic static void 141555847Sbostic doemit(p, op, opnd) 141655847Sbostic register struct parse *p; 141755847Sbostic sop op; 141855847Sbostic size_t opnd; 141955847Sbostic { 142055847Sbostic /* avoid making error situations worse */ 142155847Sbostic if (p->error != 0) 142255847Sbostic return; 142355847Sbostic 142455847Sbostic /* deal with oversize operands ("can't happen", more or less) */ 142555847Sbostic assert(opnd < 1<<OPSHIFT); 142655847Sbostic 142755847Sbostic /* deal with undersized strip */ 142855847Sbostic if (p->slen >= p->ssize) 142955847Sbostic enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 143055847Sbostic assert(p->slen < p->ssize); 143155847Sbostic 143255847Sbostic /* finally, it's all reduced to the easy case */ 143355847Sbostic p->strip[p->slen++] = SOP(op, opnd); 143455847Sbostic } 143555847Sbostic 143655847Sbostic /* 143755847Sbostic - doinsert - insert a sop into the strip 143860201Sbostic == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 143955847Sbostic */ 144055847Sbostic static void 144155847Sbostic doinsert(p, op, opnd, pos) 144255847Sbostic register struct parse *p; 144355847Sbostic sop op; 144455847Sbostic size_t opnd; 144555847Sbostic sopno pos; 144655847Sbostic { 144755847Sbostic register sopno sn; 144855847Sbostic register sop s; 144955847Sbostic register int i; 145055847Sbostic 145155847Sbostic /* avoid making error situations worse */ 145255847Sbostic if (p->error != 0) 145355847Sbostic return; 145455847Sbostic 145555847Sbostic sn = HERE(); 145655847Sbostic EMIT(op, opnd); /* do checks, ensure space */ 145755847Sbostic assert(HERE() == sn+1); 145855847Sbostic s = p->strip[sn]; 145955847Sbostic 146055847Sbostic /* adjust paren pointers */ 146155847Sbostic assert(pos > 0); 146255847Sbostic for (i = 1; i < NPAREN; i++) { 146355847Sbostic if (p->pbegin[i] >= pos) { 146455847Sbostic p->pbegin[i]++; 146555847Sbostic } 146655847Sbostic if (p->pend[i] >= pos) { 146755847Sbostic p->pend[i]++; 146855847Sbostic } 146955847Sbostic } 147055847Sbostic 147155847Sbostic memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 147255847Sbostic (HERE()-pos-1)*sizeof(sop)); 147355847Sbostic p->strip[pos] = s; 147455847Sbostic } 147555847Sbostic 147655847Sbostic /* 147755847Sbostic - dofwd - complete a forward reference 147860201Sbostic == static void dofwd(register struct parse *p, sopno pos, sop value); 147955847Sbostic */ 148055847Sbostic static void 148155847Sbostic dofwd(p, pos, value) 148255847Sbostic register struct parse *p; 148355847Sbostic register sopno pos; 148455847Sbostic sop value; 148555847Sbostic { 148655847Sbostic /* avoid making error situations worse */ 148755847Sbostic if (p->error != 0) 148855847Sbostic return; 148955847Sbostic 149055847Sbostic assert(value < 1<<OPSHIFT); 149155847Sbostic p->strip[pos] = OP(p->strip[pos]) | value; 149255847Sbostic } 149355847Sbostic 149455847Sbostic /* 149555847Sbostic - enlarge - enlarge the strip 149660201Sbostic == static void enlarge(register struct parse *p, sopno size); 149755847Sbostic */ 149855847Sbostic static void 149955847Sbostic enlarge(p, size) 150055847Sbostic register struct parse *p; 150155847Sbostic register sopno size; 150255847Sbostic { 150355847Sbostic register sop *sp; 150455847Sbostic 150555847Sbostic if (p->ssize >= size) 150655847Sbostic return; 150755847Sbostic 150855847Sbostic sp = (sop *)realloc(p->strip, size*sizeof(sop)); 150955847Sbostic if (sp == NULL) { 151055847Sbostic SETERROR(REG_ESPACE); 151155847Sbostic return; 151255847Sbostic } 151355847Sbostic p->strip = sp; 151455847Sbostic p->ssize = size; 151555847Sbostic } 151655847Sbostic 151755847Sbostic /* 151855847Sbostic - stripsnug - compact the strip 151960201Sbostic == static void stripsnug(register struct parse *p, register struct re_guts *g); 152055847Sbostic */ 152155847Sbostic static void 152255847Sbostic stripsnug(p, g) 152355847Sbostic register struct parse *p; 152455847Sbostic register struct re_guts *g; 152555847Sbostic { 152655847Sbostic g->nstates = p->slen; 1527*66362Sbostic g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 152855847Sbostic if (g->strip == NULL) { 152955847Sbostic SETERROR(REG_ESPACE); 153055847Sbostic g->strip = p->strip; 153155847Sbostic } 153255847Sbostic } 153355847Sbostic 153455847Sbostic /* 153555847Sbostic - findmust - fill in must and mlen with longest mandatory literal string 153660201Sbostic == static void findmust(register struct parse *p, register struct re_guts *g); 153755847Sbostic * 153855847Sbostic * This algorithm could do fancy things like analyzing the operands of | 153955847Sbostic * for common subsequences. Someday. This code is simple and finds most 154055847Sbostic * of the interesting cases. 154155847Sbostic * 154255847Sbostic * Note that must and mlen got initialized during setup. 154355847Sbostic */ 154456355Sbostic static void 154555847Sbostic findmust(p, g) 154655847Sbostic struct parse *p; 154755847Sbostic register struct re_guts *g; 154855847Sbostic { 154955847Sbostic register sop *scan; 155055847Sbostic sop *start; 155155847Sbostic register sop *newstart; 155255847Sbostic register sopno newlen; 155355847Sbostic register sop s; 155455847Sbostic register char *cp; 155555847Sbostic register sopno i; 155655847Sbostic 155755847Sbostic /* avoid making error situations worse */ 155855847Sbostic if (p->error != 0) 155955847Sbostic return; 156055847Sbostic 156155847Sbostic /* find the longest OCHAR sequence in strip */ 156255847Sbostic newlen = 0; 156355847Sbostic scan = g->strip + 1; 156455847Sbostic do { 156555847Sbostic s = *scan++; 156655847Sbostic switch (OP(s)) { 156755847Sbostic case OCHAR: /* sequence member */ 156855847Sbostic if (newlen == 0) /* new sequence */ 156955847Sbostic newstart = scan - 1; 157055847Sbostic newlen++; 157155847Sbostic break; 157255847Sbostic case OPLUS_: /* things that don't break one */ 157355847Sbostic case OLPAREN: 157455847Sbostic case ORPAREN: 157555847Sbostic break; 157655847Sbostic case OQUEST_: /* things that must be skipped */ 157755847Sbostic case OCH_: 157855847Sbostic scan--; 157955847Sbostic do { 158055847Sbostic scan += OPND(s); 158155847Sbostic s = *scan; 158255847Sbostic /* assert() interferes w debug printouts */ 158355847Sbostic if (OP(s) != O_QUEST && OP(s) != O_CH && 158455847Sbostic OP(s) != OOR2) { 158555847Sbostic g->iflags |= BAD; 158655847Sbostic return; 158755847Sbostic } 158855847Sbostic } while (OP(s) != O_QUEST && OP(s) != O_CH); 158955847Sbostic /* fallthrough */ 159055847Sbostic default: /* things that break a sequence */ 159155847Sbostic if (newlen > g->mlen) { /* ends one */ 159255847Sbostic start = newstart; 159355847Sbostic g->mlen = newlen; 159455847Sbostic } 159555847Sbostic newlen = 0; 159655847Sbostic break; 159755847Sbostic } 159855847Sbostic } while (OP(s) != OEND); 159955847Sbostic 160055847Sbostic if (g->mlen == 0) /* there isn't one */ 160155847Sbostic return; 160255847Sbostic 160355847Sbostic /* turn it into a character string */ 160455847Sbostic g->must = malloc((size_t)g->mlen + 1); 160555847Sbostic if (g->must == NULL) { /* argh; just forget it */ 160655847Sbostic g->mlen = 0; 160755847Sbostic return; 160855847Sbostic } 160955847Sbostic cp = g->must; 161055847Sbostic scan = start; 161155847Sbostic for (i = g->mlen; i > 0; i--) { 161255847Sbostic while (OP(s = *scan++) != OCHAR) 161355847Sbostic continue; 1614*66362Sbostic assert(cp < g->must + g->mlen); 161560201Sbostic *cp++ = (char)OPND(s); 161655847Sbostic } 1617*66362Sbostic assert(cp == g->must + g->mlen); 161855847Sbostic *cp++ = '\0'; /* just on general principles */ 161955847Sbostic } 162055847Sbostic 162155847Sbostic /* 162255847Sbostic - pluscount - count + nesting 162360201Sbostic == static sopno pluscount(register struct parse *p, register struct re_guts *g); 162455847Sbostic */ 162556355Sbostic static sopno /* nesting depth */ 162655847Sbostic pluscount(p, g) 162755847Sbostic struct parse *p; 162855847Sbostic register struct re_guts *g; 162955847Sbostic { 163055847Sbostic register sop *scan; 163155847Sbostic register sop s; 163255847Sbostic register sopno plusnest = 0; 163355847Sbostic register sopno maxnest = 0; 163455847Sbostic 163555847Sbostic if (p->error != 0) 163655847Sbostic return(0); /* there may not be an OEND */ 163755847Sbostic 163855847Sbostic scan = g->strip + 1; 163955847Sbostic do { 164055847Sbostic s = *scan++; 164155847Sbostic switch (OP(s)) { 164255847Sbostic case OPLUS_: 164355847Sbostic plusnest++; 164455847Sbostic break; 164555847Sbostic case O_PLUS: 164655847Sbostic if (plusnest > maxnest) 164755847Sbostic maxnest = plusnest; 164855847Sbostic plusnest--; 164955847Sbostic break; 165055847Sbostic } 165155847Sbostic } while (OP(s) != OEND); 165255847Sbostic if (plusnest != 0) 165355847Sbostic g->iflags |= BAD; 165455847Sbostic return(maxnest); 165555847Sbostic } 1656