155847Sbostic /*- 255847Sbostic * Copyright (c) 1992 Henry Spencer. 355847Sbostic * Copyright (c) 1992 The Regents of the University of California. 455847Sbostic * All rights reserved. 555847Sbostic * 655847Sbostic * This code is derived from software contributed to Berkeley by 755847Sbostic * Henry Spencer of the University of Toronto. 855847Sbostic * 955847Sbostic * %sccs.include.redist.c% 1055847Sbostic * 11*60610Sbostic * @(#)regcomp.c 5.7 (Berkeley) 05/30/93 1255847Sbostic */ 1355847Sbostic 1455847Sbostic #if defined(LIBC_SCCS) && !defined(lint) 15*60610Sbostic static char sccsid[] = "@(#)regcomp.c 5.7 (Berkeley) 05/30/93"; 1655847Sbostic #endif /* LIBC_SCCS and not lint */ 1755847Sbostic 1855847Sbostic #include <sys/types.h> 1955847Sbostic #include <stdio.h> 2055847Sbostic #include <string.h> 2155847Sbostic #include <ctype.h> 2255847Sbostic #include <limits.h> 2355847Sbostic #include <stdlib.h> 2455847Sbostic #include <regex.h> 2555847Sbostic 2655847Sbostic #include "utils.h" 2755847Sbostic #include "regex2.h" 2855847Sbostic 2955847Sbostic #include "cclass.h" 3055847Sbostic #include "cname.h" 3155847Sbostic 3255847Sbostic /* 3355847Sbostic * parse structure, passed up and down to avoid global variables and 3455847Sbostic * other clumsinesses 3555847Sbostic */ 3655847Sbostic struct parse { 3760201Sbostic char *next; /* next character in RE */ 3860201Sbostic char *end; /* end of string (-> NUL normally) */ 3955847Sbostic int error; /* has an error been seen? */ 4055847Sbostic sop *strip; /* malloced strip */ 4155847Sbostic sopno ssize; /* malloced strip size (allocated) */ 4255847Sbostic sopno slen; /* malloced strip length (used) */ 4355847Sbostic int ncsalloc; /* number of csets allocated */ 4455847Sbostic struct re_guts *g; 4555847Sbostic # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 4655847Sbostic sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 4755847Sbostic sopno pend[NPAREN]; /* -> ) ([0] unused) */ 4855847Sbostic }; 4955847Sbostic 5056355Sbostic 5160201Sbostic static void p_ere(/*register struct parse *p, int stop*/); 5260201Sbostic static void p_ere_exp(/*register struct parse *p*/); 5360201Sbostic static void p_str(/*register struct parse *p*/); 5460201Sbostic static void p_bre(/*register struct parse *p, register int end1, register int end2*/); 5560201Sbostic static int p_simp_re(/*register struct parse *p, int starordinary*/); 5660201Sbostic static int p_count(/*register struct parse *p*/); 5760201Sbostic static void p_bracket(/*register struct parse *p*/); 5860201Sbostic static void p_b_term(/*register struct parse *p, register cset *cs*/); 5960201Sbostic static void p_b_cclass(/*register struct parse *p, register cset *cs*/); 6060201Sbostic static void p_b_eclass(/*register struct parse *p, register cset *cs*/); 6160201Sbostic static char p_b_symbol(/*register struct parse *p*/); 6260201Sbostic static char p_b_coll_elem(/*register struct parse *p, int endc*/); 6360201Sbostic static char othercase(/*int ch*/); 6460201Sbostic static void bothcases(/*register struct parse *p, int ch*/); 6560201Sbostic static void ordinary(/*register struct parse *p, register int ch*/); 6660201Sbostic static void nonnewline(/*register struct parse *p*/); 6760201Sbostic static void repeat(/*register struct parse *p, sopno start, int from, int to*/); 6860201Sbostic static int seterr(/*register struct parse *p, int e*/); 6960201Sbostic static cset *allocset(/*register struct parse *p*/); 7060201Sbostic static void freeset(/*register struct parse *p, register cset *cs*/); 7160201Sbostic static int freezeset(/*register struct parse *p, register cset *cs*/); 7260201Sbostic static int firstch(/*register struct parse *p, register cset *cs*/); 7360201Sbostic static int nch(/*register struct parse *p, register cset *cs*/); 7460201Sbostic static void mcadd(/*register struct parse *p, register cset *cs, register char *cp*/); 7560201Sbostic static void mcsub(/*register cset *cs, register char *cp*/); 7660201Sbostic static int mcin(/*register cset *cs, register char *cp*/); 7760201Sbostic static char *mcfind(/*register cset *cs, register char *cp*/); 7860201Sbostic static void mcinvert(/*register cset *cs*/); 7960201Sbostic static void mccase(/*register cset *cs*/); 8060201Sbostic static int isinsets(/*register struct re_guts *g, int c*/); 8160201Sbostic static int samesets(/*register struct re_guts *g, int c1, int c2*/); 8260201Sbostic static void categorize(/*struct parse *p, register struct re_guts *g*/); 8360201Sbostic static sopno dupl(/*register struct parse *p, sopno start, sopno finish*/); 8460201Sbostic static void doemit(/*register struct parse *p, sop op, size_t opnd*/); 8560201Sbostic static void doinsert(/*register struct parse *p, sop op, size_t opnd, sopno pos*/); 8660201Sbostic static void dofwd(/*register struct parse *p, sopno pos, sop value*/); 8760201Sbostic static void enlarge(/*register struct parse *p, sopno size*/); 8860201Sbostic static void stripsnug(/*register struct parse *p, register struct re_guts *g*/); 8960201Sbostic static void findmust(/*register struct parse *p, register struct re_guts *g*/); 9060201Sbostic static sopno pluscount(/*register struct parse *p, register struct re_guts *g*/); 9160201Sbostic 9260201Sbostic static char nuls[10]; /* place to point scanner in event of error */ 9360201Sbostic 9455847Sbostic /* 9555847Sbostic * macros for use with parse structure 9655847Sbostic * BEWARE: these know that the parse structure is named `p' !!! 9755847Sbostic */ 9860201Sbostic #define PEEK() (*p->next) 9960201Sbostic #define PEEK2() (*(p->next+1)) 10060201Sbostic #define MORE() (p->next < p->end) 10160201Sbostic #define MORE2() (p->next+1 < p->end) 10260201Sbostic #define SEE(c) (MORE() && PEEK() == (c)) 10360201Sbostic #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 10455847Sbostic #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 10555847Sbostic #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 10655847Sbostic #define NEXT() (p->next++) 10755847Sbostic #define NEXT2() (p->next += 2) 10855847Sbostic #define NEXTn(n) (p->next += (n)) 10960201Sbostic #define GETNEXT() (*p->next++) 11055847Sbostic #define SETERROR(e) seterr(p, (e)) 11155847Sbostic #define REQUIRE(co, e) ((co) || SETERROR(e)) 11260201Sbostic #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 11360201Sbostic #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 11460201Sbostic #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 11555847Sbostic #define EMIT(sop, sopnd) doemit(p, sop, (size_t)(sopnd)) 11655847Sbostic #define INSERT(sop, pos) doinsert(p, sop, HERE()-(pos)+1, pos) 11760201Sbostic #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 11860201Sbostic #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 11955847Sbostic #define HERE() (p->slen) 12055847Sbostic #define THERE() (p->slen - 1) 12155847Sbostic #define DROP(n) (p->slen -= (n)) 12255847Sbostic 12360201Sbostic #ifndef NDEBUG 12460201Sbostic static int never = 0; /* for use in asserts; shuts lint up */ 12560201Sbostic #endif 12656357Sbostic 12755847Sbostic /* 12855847Sbostic - regcomp - interface for parser and compilation 12960201Sbostic = extern int regcomp(regex_t *preg, const char *pattern, int cflags); 13060201Sbostic = #define REG_BASIC 0000 13160201Sbostic = #define REG_EXTENDED 0001 13260201Sbostic = #define REG_ICASE 0002 13360201Sbostic = #define REG_NOSUB 0004 13460201Sbostic = #define REG_NEWLINE 0010 13560201Sbostic = #define REG_NOSPEC 0020 13660201Sbostic = #define REG_PEND 0040 13760201Sbostic = #define REG_DUMP 0200 13855847Sbostic */ 13955847Sbostic int /* 0 success, otherwise REG_something */ 14055847Sbostic regcomp(preg, pattern, cflags) 14155847Sbostic regex_t *preg; 14255847Sbostic const char *pattern; 14355847Sbostic int cflags; 14455847Sbostic { 14555847Sbostic struct parse pa; 14655847Sbostic register struct re_guts *g; 14755847Sbostic register struct parse *p = &pa; 14855847Sbostic register int i; 14960201Sbostic register size_t len; 15055847Sbostic 15160201Sbostic if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 15260201Sbostic return(REG_INVARG); 15360201Sbostic 15460201Sbostic if (cflags®_PEND) { 15560201Sbostic if (preg->re_endp < pattern) 15660201Sbostic return(REG_INVARG); 15760201Sbostic len = preg->re_endp - pattern; 15860201Sbostic } else 15960201Sbostic len = strlen((char *)pattern); 16060201Sbostic 16155847Sbostic /* do the mallocs early so failure handling is easy */ 16260201Sbostic g = (struct re_guts *)malloc(sizeof(struct re_guts) + 16360201Sbostic (NC-1)*sizeof(cat_t)); 16455847Sbostic if (g == NULL) 16555847Sbostic return(REG_ESPACE); 16660201Sbostic p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 16755847Sbostic p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 16855847Sbostic p->slen = 0; 16955847Sbostic if (p->strip == NULL) { 17055847Sbostic free((char *)g); 17155847Sbostic return(REG_ESPACE); 17255847Sbostic } 17355847Sbostic 17455847Sbostic /* set things up */ 17555847Sbostic p->g = g; 17660201Sbostic p->next = (char *)pattern; /* convenience; we do not modify it */ 17760201Sbostic p->end = p->next + len; 17855847Sbostic p->error = 0; 17955847Sbostic p->ncsalloc = 0; 18055847Sbostic for (i = 0; i < NPAREN; i++) { 18155847Sbostic p->pbegin[i] = 0; 18255847Sbostic p->pend[i] = 0; 18355847Sbostic } 18460201Sbostic g->csetsize = NC; 18555847Sbostic g->sets = NULL; 18655847Sbostic g->setbits = NULL; 18755847Sbostic g->ncsets = 0; 18855847Sbostic g->cflags = cflags; 18955847Sbostic g->iflags = 0; 19060201Sbostic g->nbol = 0; 19160201Sbostic g->neol = 0; 19255847Sbostic g->must = NULL; 19355847Sbostic g->mlen = 0; 19455847Sbostic g->nsub = 0; 19555847Sbostic g->ncategories = 1; /* category 0 is "everything else" */ 196*60610Sbostic g->categories = &g->catspace[-(CHAR_MIN)]; 19760201Sbostic (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 19855847Sbostic g->backrefs = 0; 19955847Sbostic 20055847Sbostic /* do it */ 20155847Sbostic EMIT(OEND, 0); 20255847Sbostic g->firststate = THERE(); 20355847Sbostic if (cflags®_EXTENDED) 20460201Sbostic p_ere(p, OUT); 20560201Sbostic else if (cflags®_NOSPEC) 20660201Sbostic p_str(p); 20755847Sbostic else 20860201Sbostic p_bre(p, OUT, OUT); 20955847Sbostic EMIT(OEND, 0); 21055847Sbostic g->laststate = THERE(); 21155847Sbostic 21255847Sbostic /* tidy up loose ends and fill things in */ 21355847Sbostic categorize(p, g); 21455847Sbostic stripsnug(p, g); 21555847Sbostic findmust(p, g); 21655847Sbostic g->nplus = pluscount(p, g); 21755847Sbostic g->magic = MAGIC2; 21855847Sbostic preg->re_nsub = g->nsub; 21955847Sbostic preg->re_g = g; 22055847Sbostic preg->re_magic = MAGIC1; 22156355Sbostic #ifndef REDEBUG 22255847Sbostic /* not debugging, so can't rely on the assert() in regexec() */ 22355847Sbostic if (g->iflags&BAD) 22455847Sbostic SETERROR(REG_ASSERT); 22555847Sbostic #endif 22655847Sbostic 22755847Sbostic /* win or lose, we're done */ 22855847Sbostic if (p->error != 0) /* lose */ 22955847Sbostic regfree(preg); 23055847Sbostic return(p->error); 23155847Sbostic } 23255847Sbostic 23355847Sbostic /* 23455847Sbostic - p_ere - ERE parser top level, concatenation and alternation 23560201Sbostic == static void p_ere(register struct parse *p, int stop); 23655847Sbostic */ 23755847Sbostic static void 23855847Sbostic p_ere(p, stop) 23955847Sbostic register struct parse *p; 24060201Sbostic int stop; /* character this ERE should end at */ 24155847Sbostic { 24260201Sbostic register char c; 24355847Sbostic register sopno prevback; 24455847Sbostic register sopno prevfwd; 24555847Sbostic register sopno conc; 24655847Sbostic register int first = 1; /* is this the first alternative? */ 24755847Sbostic 24855847Sbostic for (;;) { 24955847Sbostic /* do a bunch of concatenated expressions */ 25055847Sbostic conc = HERE(); 25160201Sbostic while (MORE() && (c = PEEK()) != '|' && c != stop) 25255847Sbostic p_ere_exp(p); 25355847Sbostic REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 25455847Sbostic 25555847Sbostic if (!EAT('|')) 25655847Sbostic break; /* NOTE BREAK OUT */ 25755847Sbostic 25855847Sbostic if (first) { 25955847Sbostic INSERT(OCH_, conc); /* offset is wrong */ 26055847Sbostic prevfwd = conc; 26155847Sbostic prevback = conc; 26255847Sbostic first = 0; 26355847Sbostic } 26460201Sbostic ASTERN(OOR1, prevback); 26555847Sbostic prevback = THERE(); 26660201Sbostic AHEAD(prevfwd); /* fix previous offset */ 26755847Sbostic prevfwd = HERE(); 26855847Sbostic EMIT(OOR2, 0); /* offset is very wrong */ 26955847Sbostic } 27055847Sbostic 27155847Sbostic if (!first) { /* tail-end fixups */ 27260201Sbostic AHEAD(prevfwd); 27360201Sbostic ASTERN(O_CH, prevback); 27455847Sbostic } 27555847Sbostic 27660201Sbostic assert(!MORE() || SEE(stop)); 27755847Sbostic } 27855847Sbostic 27955847Sbostic /* 28055847Sbostic - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 28160201Sbostic == static void p_ere_exp(register struct parse *p); 28255847Sbostic */ 28355847Sbostic static void 28455847Sbostic p_ere_exp(p) 28555847Sbostic register struct parse *p; 28655847Sbostic { 28760201Sbostic register char c; 28855847Sbostic register sopno pos; 28955847Sbostic register int count; 29055847Sbostic register int count2; 29155847Sbostic register sopno subno; 29255847Sbostic int wascaret = 0; 29355847Sbostic 29460201Sbostic assert(MORE()); /* caller should have ensured this */ 29555847Sbostic c = GETNEXT(); 29655847Sbostic 29755847Sbostic pos = HERE(); 29855847Sbostic switch (c) { 29955847Sbostic case '(': 30060201Sbostic REQUIRE(MORE(), REG_EPAREN); 30155847Sbostic p->g->nsub++; 30255847Sbostic subno = p->g->nsub; 30355847Sbostic if (subno < NPAREN) 30455847Sbostic p->pbegin[subno] = HERE(); 30555847Sbostic EMIT(OLPAREN, subno); 30655847Sbostic if (!SEE(')')) 30755847Sbostic p_ere(p, ')'); 30855847Sbostic if (subno < NPAREN) { 30955847Sbostic p->pend[subno] = HERE(); 31055847Sbostic assert(p->pend[subno] != 0); 31155847Sbostic } 31255847Sbostic EMIT(ORPAREN, subno); 31355847Sbostic MUSTEAT(')', REG_EPAREN); 31455847Sbostic break; 31555847Sbostic #ifndef POSIX_MISTAKE 31655847Sbostic case ')': /* happens only if no current unmatched ( */ 31755847Sbostic /* 31855847Sbostic * You may ask, why the ifndef? Because I didn't notice 31955847Sbostic * this until slightly too late for 1003.2, and none of the 32055847Sbostic * other 1003.2 regular-expression reviewers noticed it at 32155847Sbostic * all. So an unmatched ) is legal POSIX, at least until 32255847Sbostic * we can get it fixed. 32355847Sbostic */ 32455847Sbostic SETERROR(REG_EPAREN); 32555847Sbostic break; 32655847Sbostic #endif 32755847Sbostic case '^': 32855847Sbostic EMIT(OBOL, 0); 32955847Sbostic p->g->iflags |= USEBOL; 33060201Sbostic p->g->nbol++; 33155847Sbostic wascaret = 1; 33255847Sbostic break; 33355847Sbostic case '$': 33455847Sbostic EMIT(OEOL, 0); 33555847Sbostic p->g->iflags |= USEEOL; 33660201Sbostic p->g->neol++; 33755847Sbostic break; 33855847Sbostic case '|': 33955847Sbostic SETERROR(REG_EMPTY); 34055847Sbostic break; 34155847Sbostic case '*': 34255847Sbostic case '+': 34355847Sbostic case '?': 34455847Sbostic SETERROR(REG_BADRPT); 34555847Sbostic break; 34655847Sbostic case '.': 34755847Sbostic if (p->g->cflags®_NEWLINE) 34855847Sbostic nonnewline(p); 34955847Sbostic else 35055847Sbostic EMIT(OANY, 0); 35155847Sbostic break; 35255847Sbostic case '[': 35355847Sbostic p_bracket(p); 35455847Sbostic break; 35555847Sbostic case '\\': 35660201Sbostic REQUIRE(MORE(), REG_EESCAPE); 35755847Sbostic c = GETNEXT(); 35860201Sbostic ordinary(p, c); 35955847Sbostic break; 36055847Sbostic case '{': /* okay as ordinary except if digit follows */ 36160201Sbostic REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 36255847Sbostic /* FALLTHROUGH */ 36355847Sbostic default: 36455847Sbostic ordinary(p, c); 36555847Sbostic break; 36655847Sbostic } 36755847Sbostic 36860201Sbostic if (!MORE()) 36960201Sbostic return; 37055847Sbostic c = PEEK(); 37160201Sbostic /* we call { a repetition if followed by a digit */ 37260201Sbostic if (!( c == '*' || c == '+' || c == '?' || 37360201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) )) 37455847Sbostic return; /* no repetition, we're done */ 37555847Sbostic NEXT(); 37655847Sbostic 37755847Sbostic REQUIRE(!wascaret, REG_BADRPT); 37855847Sbostic switch (c) { 37955847Sbostic case '*': /* implemented as +? */ 38055847Sbostic INSERT(OPLUS_, pos); 38160201Sbostic ASTERN(O_PLUS, pos); 38255847Sbostic INSERT(OQUEST_, pos); 38360201Sbostic ASTERN(O_QUEST, pos); 38455847Sbostic break; 38555847Sbostic case '+': 38655847Sbostic INSERT(OPLUS_, pos); 38760201Sbostic ASTERN(O_PLUS, pos); 38855847Sbostic break; 38955847Sbostic case '?': 39055847Sbostic INSERT(OQUEST_, pos); 39160201Sbostic ASTERN(O_QUEST, pos); 39255847Sbostic break; 39355847Sbostic case '{': 39455847Sbostic count = p_count(p); 39555847Sbostic if (EAT(',')) { 39655847Sbostic if (isdigit(PEEK())) { 39755847Sbostic count2 = p_count(p); 39855847Sbostic REQUIRE(count <= count2, REG_BADBR); 39955847Sbostic } else /* single number with comma */ 40055847Sbostic count2 = INFINITY; 40155847Sbostic } else /* just a single number */ 40255847Sbostic count2 = count; 40355847Sbostic repeat(p, pos, count, count2); 40455847Sbostic if (!EAT('}')) { /* error heuristics */ 40560201Sbostic while (MORE() && PEEK() != '}') 40655847Sbostic NEXT(); 40760201Sbostic REQUIRE(MORE(), REG_EBRACE); 40860201Sbostic SETERROR(REG_BADBR); 40955847Sbostic } 41055847Sbostic break; 41155847Sbostic } 41255847Sbostic 41360201Sbostic if (!MORE()) 41460201Sbostic return; 41555847Sbostic c = PEEK(); 41660201Sbostic if (!( c == '*' || c == '+' || c == '?' || 41760201Sbostic (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 41860201Sbostic return; 41960201Sbostic SETERROR(REG_BADRPT); 42055847Sbostic } 42155847Sbostic 42255847Sbostic /* 42360201Sbostic - p_str - string (no metacharacters) "parser" 42460201Sbostic == static void p_str(register struct parse *p); 42560201Sbostic */ 42660201Sbostic static void 42760201Sbostic p_str(p) 42860201Sbostic register struct parse *p; 42960201Sbostic { 43060201Sbostic REQUIRE(MORE(), REG_EMPTY); 43160201Sbostic while (MORE()) 43260201Sbostic ordinary(p, GETNEXT()); 43360201Sbostic } 43460201Sbostic 43560201Sbostic /* 43655847Sbostic - p_bre - BRE parser top level, anchoring and concatenation 43760201Sbostic == static void p_bre(register struct parse *p, register int end1, \ 43860201Sbostic == register int end2); 43960201Sbostic * Giving end1 as OUT essentially eliminates the end1/end2 check. 44055847Sbostic * 44156355Sbostic * This implementation is a bit of a kludge, in that a trailing $ is first 44256355Sbostic * taken as an ordinary character and then revised to be an anchor. The 44356355Sbostic * only undesirable side effect is that '$' gets included as a character 44456355Sbostic * category in such cases. This is fairly harmless; not worth fixing. 44560201Sbostic * The amount of lookahead needed to avoid this kludge is excessive. 44655847Sbostic */ 44755847Sbostic static void 44855847Sbostic p_bre(p, end1, end2) 44955847Sbostic register struct parse *p; 45060201Sbostic register int end1; /* first terminating character */ 45160201Sbostic register int end2; /* second terminating character */ 45255847Sbostic { 45355847Sbostic register sopno start = HERE(); 45455847Sbostic register int first = 1; /* first subexpression? */ 45556355Sbostic register int wasdollar = 0; 45655847Sbostic 45755847Sbostic if (EAT('^')) { 45855847Sbostic EMIT(OBOL, 0); 45955847Sbostic p->g->iflags |= USEBOL; 46060201Sbostic p->g->nbol++; 46155847Sbostic } 46260201Sbostic while (MORE() && !SEETWO(end1, end2)) { 46355847Sbostic wasdollar = p_simp_re(p, first); 46455847Sbostic first = 0; 46555847Sbostic } 46655847Sbostic if (wasdollar) { /* oops, that was a trailing anchor */ 46755847Sbostic DROP(1); 46855847Sbostic EMIT(OEOL, 0); 46955847Sbostic p->g->iflags |= USEEOL; 47060201Sbostic p->g->neol++; 47155847Sbostic } 47255847Sbostic 47355847Sbostic REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 47455847Sbostic } 47555847Sbostic 47655847Sbostic /* 47755847Sbostic - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 47860201Sbostic == static int p_simp_re(register struct parse *p, int starordinary); 47955847Sbostic */ 48055847Sbostic static int /* was the simple RE an unbackslashed $? */ 48155847Sbostic p_simp_re(p, starordinary) 48255847Sbostic register struct parse *p; 48355847Sbostic int starordinary; /* is a leading * an ordinary character? */ 48455847Sbostic { 48555847Sbostic register int c; 48655847Sbostic register int count; 48755847Sbostic register int count2; 48855847Sbostic register sopno pos; 48955847Sbostic register int i; 49055847Sbostic register sopno subno; 49155847Sbostic # define BACKSL (1<<CHAR_BIT) 49255847Sbostic 49355847Sbostic pos = HERE(); /* repetion op, if any, covers from here */ 49455847Sbostic 49560201Sbostic assert(MORE()); /* caller should have ensured this */ 49655847Sbostic c = GETNEXT(); 49760201Sbostic if (c == '\\') { 49860201Sbostic REQUIRE(MORE(), REG_EESCAPE); 49960201Sbostic c = BACKSL | (unsigned char)GETNEXT(); 50060201Sbostic } 50155847Sbostic switch (c) { 50255847Sbostic case '.': 50355847Sbostic if (p->g->cflags®_NEWLINE) 50455847Sbostic nonnewline(p); 50555847Sbostic else 50655847Sbostic EMIT(OANY, 0); 50755847Sbostic break; 50855847Sbostic case '[': 50955847Sbostic p_bracket(p); 51055847Sbostic break; 51155847Sbostic case BACKSL|'{': 51255847Sbostic SETERROR(REG_BADRPT); 51355847Sbostic break; 51455847Sbostic case BACKSL|'(': 51555847Sbostic p->g->nsub++; 51655847Sbostic subno = p->g->nsub; 51755847Sbostic if (subno < NPAREN) 51855847Sbostic p->pbegin[subno] = HERE(); 51955847Sbostic EMIT(OLPAREN, subno); 52060201Sbostic /* the MORE here is an error heuristic */ 52160201Sbostic if (MORE() && !SEETWO('\\', ')')) 52255847Sbostic p_bre(p, '\\', ')'); 52355847Sbostic if (subno < NPAREN) { 52455847Sbostic p->pend[subno] = HERE(); 52555847Sbostic assert(p->pend[subno] != 0); 52655847Sbostic } 52755847Sbostic EMIT(ORPAREN, subno); 52855847Sbostic REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 52955847Sbostic break; 53055847Sbostic case BACKSL|')': /* should not get here -- must be user */ 53155847Sbostic case BACKSL|'}': 53255847Sbostic SETERROR(REG_EPAREN); 53355847Sbostic break; 53455847Sbostic case BACKSL|'1': 53555847Sbostic case BACKSL|'2': 53655847Sbostic case BACKSL|'3': 53755847Sbostic case BACKSL|'4': 53855847Sbostic case BACKSL|'5': 53955847Sbostic case BACKSL|'6': 54055847Sbostic case BACKSL|'7': 54155847Sbostic case BACKSL|'8': 54255847Sbostic case BACKSL|'9': 54355847Sbostic i = (c&~BACKSL) - '0'; 54455847Sbostic assert(i < NPAREN); 54555847Sbostic if (p->pend[i] != 0) { 54655847Sbostic assert(i <= p->g->nsub); 54755847Sbostic EMIT(OBACK_, i); 54855847Sbostic assert(p->pbegin[i] != 0); 54955847Sbostic assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 55055847Sbostic assert(OP(p->strip[p->pend[i]]) == ORPAREN); 55155847Sbostic (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 55255847Sbostic EMIT(O_BACK, i); 55355847Sbostic } else 55455847Sbostic SETERROR(REG_ESUBREG); 55555847Sbostic p->g->backrefs = 1; 55655847Sbostic break; 55755847Sbostic case '*': 55855847Sbostic REQUIRE(starordinary, REG_BADRPT); 55955847Sbostic /* FALLTHROUGH */ 56055847Sbostic default: 56160201Sbostic ordinary(p, c &~ BACKSL); 56255847Sbostic break; 56355847Sbostic } 56455847Sbostic 56555847Sbostic if (EAT('*')) { /* implemented as +? */ 56655847Sbostic INSERT(OPLUS_, pos); 56760201Sbostic ASTERN(O_PLUS, pos); 56855847Sbostic INSERT(OQUEST_, pos); 56960201Sbostic ASTERN(O_QUEST, pos); 57055847Sbostic } else if (EATTWO('\\', '{')) { 57155847Sbostic count = p_count(p); 57255847Sbostic if (EAT(',')) { 57360201Sbostic if (MORE() && isdigit(PEEK())) { 57455847Sbostic count2 = p_count(p); 57555847Sbostic REQUIRE(count <= count2, REG_BADBR); 57655847Sbostic } else /* single number with comma */ 57755847Sbostic count2 = INFINITY; 57855847Sbostic } else /* just a single number */ 57955847Sbostic count2 = count; 58055847Sbostic repeat(p, pos, count, count2); 58155847Sbostic if (!EATTWO('\\', '}')) { /* error heuristics */ 58260201Sbostic while (MORE() && !SEETWO('\\', '}')) 58355847Sbostic NEXT(); 58460201Sbostic REQUIRE(MORE(), REG_EBRACE); 58560201Sbostic SETERROR(REG_BADBR); 58655847Sbostic } 58760201Sbostic } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 58855847Sbostic return(1); 58955847Sbostic 59055847Sbostic return(0); 59155847Sbostic } 59255847Sbostic 59355847Sbostic /* 59455847Sbostic - p_count - parse a repetition count 59560201Sbostic == static int p_count(register struct parse *p); 59655847Sbostic */ 59755847Sbostic static int /* the value */ 59855847Sbostic p_count(p) 59955847Sbostic register struct parse *p; 60055847Sbostic { 60155847Sbostic register int count = 0; 60255847Sbostic register int ndigits = 0; 60355847Sbostic 60460201Sbostic while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 60555847Sbostic count = count*10 + (GETNEXT() - '0'); 60655847Sbostic ndigits++; 60755847Sbostic } 60855847Sbostic 60955847Sbostic REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 61055847Sbostic return(count); 61155847Sbostic } 61255847Sbostic 61355847Sbostic /* 61455847Sbostic - p_bracket - parse a bracketed character list 61560201Sbostic == static void p_bracket(register struct parse *p); 61655847Sbostic * 61755847Sbostic * Note a significant property of this code: if the allocset() did SETERROR, 61855847Sbostic * no set operations are done. 61955847Sbostic */ 62055847Sbostic static void 62155847Sbostic p_bracket(p) 62255847Sbostic register struct parse *p; 62355847Sbostic { 62460201Sbostic register char c; 62555847Sbostic register cset *cs = allocset(p); 62655847Sbostic register int invert = 0; 62755847Sbostic 62860201Sbostic /* Dept of Truly Sickening Special-Case Kludges */ 62960201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 63060201Sbostic EMIT(OBOW, 0); 63160201Sbostic NEXTn(6); 63260201Sbostic return; 63360201Sbostic } 63460201Sbostic if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 63560201Sbostic EMIT(OEOW, 0); 63660201Sbostic NEXTn(6); 63760201Sbostic return; 63860201Sbostic } 63960201Sbostic 64055847Sbostic if (EAT('^')) 64155847Sbostic invert++; /* make note to invert set at end */ 64255847Sbostic if (EAT(']')) 64355847Sbostic CHadd(cs, ']'); 64456618Sbostic else if (EAT('-')) 64556618Sbostic CHadd(cs, '-'); 64660201Sbostic while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 64755847Sbostic p_b_term(p, cs); 64855847Sbostic if (EAT('-')) 64955847Sbostic CHadd(cs, '-'); 65055847Sbostic MUSTEAT(']', REG_EBRACK); 65155847Sbostic 65260201Sbostic if (p->error != 0) /* don't mess things up further */ 65360201Sbostic return; 65460201Sbostic 65560201Sbostic if (p->g->cflags®_ICASE) { 65655847Sbostic register int i; 65760201Sbostic register int ci; 65855847Sbostic 65955847Sbostic for (i = p->g->csetsize - 1; i >= 0; i--) 66060201Sbostic if (CHIN(cs, i) && isalpha(i)) { 66160201Sbostic ci = othercase(i); 66260201Sbostic if (ci != i) 66360201Sbostic CHadd(cs, ci); 66460201Sbostic } 66560201Sbostic if (cs->multis != NULL) 66660201Sbostic mccase(p, cs); 66760201Sbostic } 66860201Sbostic if (invert) { 66960201Sbostic register int i; 67060201Sbostic 67160201Sbostic for (i = p->g->csetsize - 1; i >= 0; i--) 67255847Sbostic if (CHIN(cs, i)) 67355847Sbostic CHsub(cs, i); 67455847Sbostic else 67555847Sbostic CHadd(cs, i); 67655847Sbostic if (p->g->cflags®_NEWLINE) 67755847Sbostic CHsub(cs, '\n'); 67855847Sbostic if (cs->multis != NULL) 67955847Sbostic mcinvert(p, cs); 68055847Sbostic } 68160201Sbostic 68255847Sbostic assert(cs->multis == NULL); /* xxx */ 68360201Sbostic 68460201Sbostic if (nch(p, cs) == 1) { /* optimize singleton sets */ 68560201Sbostic ordinary(p, firstch(p, cs)); 68660201Sbostic freeset(p, cs); 68760201Sbostic } else 68860201Sbostic EMIT(OANYOF, freezeset(p, cs)); 68955847Sbostic } 69055847Sbostic 69155847Sbostic /* 69255847Sbostic - p_b_term - parse one term of a bracketed character list 69360201Sbostic == static void p_b_term(register struct parse *p, register cset *cs); 69455847Sbostic */ 69555847Sbostic static void 69655847Sbostic p_b_term(p, cs) 69755847Sbostic register struct parse *p; 69855847Sbostic register cset *cs; 69955847Sbostic { 70060201Sbostic register char c; 70160201Sbostic register char start, finish; 70255847Sbostic register int i; 70355847Sbostic 70455847Sbostic /* classify what we've got */ 70560201Sbostic switch ((MORE()) ? PEEK() : '\0') { 70655847Sbostic case '[': 70760201Sbostic c = (MORE2()) ? PEEK2() : '\0'; 70855847Sbostic break; 70955847Sbostic case '-': 71055847Sbostic SETERROR(REG_ERANGE); 71155847Sbostic return; /* NOTE RETURN */ 71255847Sbostic break; 71355847Sbostic default: 71455847Sbostic c = '\0'; 71555847Sbostic break; 71655847Sbostic } 71755847Sbostic 71855847Sbostic switch (c) { 71955847Sbostic case ':': /* character class */ 72055847Sbostic NEXT2(); 72160201Sbostic REQUIRE(MORE(), REG_EBRACK); 72255847Sbostic c = PEEK(); 72355847Sbostic REQUIRE(c != '-' && c != ']', REG_ECTYPE); 72455847Sbostic p_b_cclass(p, cs); 72560201Sbostic REQUIRE(MORE(), REG_EBRACK); 72655847Sbostic REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 72755847Sbostic break; 72855847Sbostic case '=': /* equivalence class */ 72955847Sbostic NEXT2(); 73060201Sbostic REQUIRE(MORE(), REG_EBRACK); 73155847Sbostic c = PEEK(); 73255847Sbostic REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 73355847Sbostic p_b_eclass(p, cs); 73460201Sbostic REQUIRE(MORE(), REG_EBRACK); 73555847Sbostic REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 73655847Sbostic break; 73755847Sbostic default: /* symbol, ordinary character, or range */ 73855847Sbostic /* xxx revision needed for multichar stuff */ 73955847Sbostic start = p_b_symbol(p); 74060201Sbostic if (SEE('-') && MORE2() && PEEK2() != ']') { 74155847Sbostic /* range */ 74255847Sbostic NEXT(); 74355847Sbostic if (EAT('-')) 74455847Sbostic finish = '-'; 74555847Sbostic else 74655847Sbostic finish = p_b_symbol(p); 74755847Sbostic } else 74855847Sbostic finish = start; 74960201Sbostic /* xxx what about signed chars here... */ 75055847Sbostic REQUIRE(start <= finish, REG_ERANGE); 75160201Sbostic for (i = start; i <= finish; i++) 75255847Sbostic CHadd(cs, i); 75355847Sbostic break; 75455847Sbostic } 75555847Sbostic } 75655847Sbostic 75755847Sbostic /* 75855847Sbostic - p_b_cclass - parse a character-class name and deal with it 75960201Sbostic == static void p_b_cclass(register struct parse *p, register cset *cs); 76055847Sbostic */ 76155847Sbostic static void 76255847Sbostic p_b_cclass(p, cs) 76355847Sbostic register struct parse *p; 76455847Sbostic register cset *cs; 76555847Sbostic { 76660201Sbostic register char *sp = p->next; 76755847Sbostic register struct cclass *cp; 76860201Sbostic register size_t len; 76960201Sbostic register char *u; 77060201Sbostic register char c; 77155847Sbostic 77260201Sbostic while (MORE() && isalpha(PEEK())) 77360201Sbostic NEXT(); 77460201Sbostic len = p->next - sp; 77555847Sbostic for (cp = cclasses; cp->name != NULL; cp++) 77660201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 77755847Sbostic break; 77855847Sbostic if (cp->name == NULL) { 77955847Sbostic /* oops, didn't find it */ 78055847Sbostic SETERROR(REG_ECTYPE); 78155847Sbostic return; 78255847Sbostic } 78355847Sbostic 78460201Sbostic u = cp->chars; 78555847Sbostic while ((c = *u++) != '\0') 78655847Sbostic CHadd(cs, c); 78760201Sbostic for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 78855847Sbostic MCadd(cs, u); 78955847Sbostic } 79055847Sbostic 79155847Sbostic /* 79255847Sbostic - p_b_eclass - parse an equivalence-class name and deal with it 79360201Sbostic == static void p_b_eclass(register struct parse *p, register cset *cs); 79455847Sbostic * 79555847Sbostic * This implementation is incomplete. xxx 79655847Sbostic */ 79755847Sbostic static void 79855847Sbostic p_b_eclass(p, cs) 79955847Sbostic register struct parse *p; 80055847Sbostic register cset *cs; 80155847Sbostic { 80260201Sbostic register char c; 80355847Sbostic 80455847Sbostic c = p_b_coll_elem(p, '='); 80555847Sbostic CHadd(cs, c); 80655847Sbostic } 80755847Sbostic 80855847Sbostic /* 80955847Sbostic - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 81060201Sbostic == static char p_b_symbol(register struct parse *p); 81155847Sbostic */ 81260201Sbostic static char /* value of symbol */ 81355847Sbostic p_b_symbol(p) 81455847Sbostic register struct parse *p; 81555847Sbostic { 81660201Sbostic register char value; 81755847Sbostic 81860201Sbostic REQUIRE(MORE(), REG_EBRACK); 81960201Sbostic if (!EATTWO('[', '.')) 82055847Sbostic return(GETNEXT()); 82155847Sbostic 82255847Sbostic /* collating symbol */ 82355847Sbostic value = p_b_coll_elem(p, '.'); 82455847Sbostic REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 82555847Sbostic return(value); 82655847Sbostic } 82755847Sbostic 82855847Sbostic /* 82955847Sbostic - p_b_coll_elem - parse a collating-element name and look it up 83060201Sbostic == static char p_b_coll_elem(register struct parse *p, int endc); 83155847Sbostic */ 83260201Sbostic static char /* value of collating element */ 83355847Sbostic p_b_coll_elem(p, endc) 83455847Sbostic register struct parse *p; 83560201Sbostic int endc; /* name ended by endc,']' */ 83655847Sbostic { 83760201Sbostic register char *sp = p->next; 83855847Sbostic register struct cname *cp; 83955847Sbostic register int len; 84060201Sbostic register char c; 84155847Sbostic 84260201Sbostic while (MORE() && !SEETWO(endc, ']')) 84355847Sbostic NEXT(); 84460201Sbostic if (!MORE()) { 84555847Sbostic SETERROR(REG_EBRACK); 84655847Sbostic return(0); 84755847Sbostic } 84855847Sbostic len = p->next - sp; 84955847Sbostic for (cp = cnames; cp->name != NULL; cp++) 85060201Sbostic if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 85155847Sbostic return(cp->code); /* known name */ 85255847Sbostic if (len == 1) 85355847Sbostic return(*sp); /* single character */ 85455847Sbostic SETERROR(REG_ECOLLATE); /* neither */ 85555847Sbostic return(0); 85655847Sbostic } 85755847Sbostic 85855847Sbostic /* 85955847Sbostic - othercase - return the case counterpart of an alphabetic 86060201Sbostic == static char othercase(int ch); 86155847Sbostic */ 86260201Sbostic static char /* if no counterpart, return ch */ 86355847Sbostic othercase(ch) 86460201Sbostic int ch; 86555847Sbostic { 86655847Sbostic assert(isalpha(ch)); 86755847Sbostic if (isupper(ch)) 86855847Sbostic return(tolower(ch)); 86955847Sbostic else if (islower(ch)) 87055847Sbostic return(toupper(ch)); 87155847Sbostic else /* peculiar, but could happen */ 87255847Sbostic return(ch); 87355847Sbostic } 87455847Sbostic 87555847Sbostic /* 87660201Sbostic - bothcases - emit a dualcase version of a two-case character 87760201Sbostic == static void bothcases(register struct parse *p, int ch); 87856355Sbostic * 87955847Sbostic * Boy, is this implementation ever a kludge... 88055847Sbostic */ 88155847Sbostic static void 88255847Sbostic bothcases(p, ch) 88355847Sbostic register struct parse *p; 88460201Sbostic int ch; 88555847Sbostic { 88660201Sbostic register char *oldnext = p->next; 88760201Sbostic register char *oldend = p->end; 88860201Sbostic char bracket[3]; 88955847Sbostic 89060201Sbostic assert(othercase(ch) != ch); /* p_bracket() would recurse */ 89155847Sbostic p->next = bracket; 89260201Sbostic p->end = bracket+2; 89355847Sbostic bracket[0] = ch; 89455847Sbostic bracket[1] = ']'; 89555847Sbostic bracket[2] = '\0'; 89655847Sbostic p_bracket(p); 89755847Sbostic assert(p->next == bracket+2); 89855847Sbostic p->next = oldnext; 89960201Sbostic p->end = oldend; 90055847Sbostic } 90155847Sbostic 90255847Sbostic /* 90355847Sbostic - ordinary - emit an ordinary character 90460201Sbostic == static void ordinary(register struct parse *p, register int ch); 90555847Sbostic */ 90655847Sbostic static void 90755847Sbostic ordinary(p, ch) 90855847Sbostic register struct parse *p; 90960201Sbostic register int ch; 91055847Sbostic { 91160201Sbostic register cat_t *cap = p->g->categories; 91255847Sbostic 91360201Sbostic if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 91455847Sbostic bothcases(p, ch); 91560201Sbostic else { 91660201Sbostic EMIT(OCHAR, (unsigned char)ch); 91760201Sbostic if (cap[ch] == 0) 91860201Sbostic cap[ch] = p->g->ncategories++; 91955847Sbostic } 92055847Sbostic } 92155847Sbostic 92255847Sbostic /* 92355847Sbostic - nonnewline - emit REG_NEWLINE version of OANY 92460201Sbostic == static void nonnewline(register struct parse *p); 92556355Sbostic * 92655847Sbostic * Boy, is this implementation ever a kludge... 92755847Sbostic */ 92855847Sbostic static void 92955847Sbostic nonnewline(p) 93055847Sbostic register struct parse *p; 93155847Sbostic { 93260201Sbostic register char *oldnext = p->next; 93360201Sbostic register char *oldend = p->end; 93460201Sbostic char bracket[4]; 93555847Sbostic 93655847Sbostic p->next = bracket; 93760201Sbostic p->end = bracket+3; 93855847Sbostic bracket[0] = '^'; 93955847Sbostic bracket[1] = '\n'; 94055847Sbostic bracket[2] = ']'; 94155847Sbostic bracket[3] = '\0'; 94255847Sbostic p_bracket(p); 94355847Sbostic assert(p->next == bracket+3); 94455847Sbostic p->next = oldnext; 94560201Sbostic p->end = oldend; 94655847Sbostic } 94755847Sbostic 94855847Sbostic /* 94955847Sbostic - repeat - generate code for a bounded repetition, recursively if needed 95060201Sbostic == static void repeat(register struct parse *p, sopno start, int from, int to); 95155847Sbostic */ 95255847Sbostic static void 95355847Sbostic repeat(p, start, from, to) 95455847Sbostic register struct parse *p; 95555847Sbostic sopno start; /* operand from here to end of strip */ 95655847Sbostic int from; /* repeated from this number */ 95755847Sbostic int to; /* to this number of times (maybe INFINITY) */ 95855847Sbostic { 95955847Sbostic register sopno finish = HERE(); 96055847Sbostic # define N 2 96155847Sbostic # define INF 3 96255847Sbostic # define REP(f, t) ((f)*8 + (t)) 96355847Sbostic # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 96455847Sbostic register sopno copy; 96555847Sbostic 96655847Sbostic if (p->error != 0) /* head off possible runaway recursion */ 96755847Sbostic return; 96855847Sbostic 96955847Sbostic assert(from <= to); 97055847Sbostic 97155847Sbostic switch (REP(MAP(from), MAP(to))) { 97255847Sbostic case REP(0, 0): /* must be user doing this */ 97355847Sbostic DROP(finish-start); /* drop the operand */ 97455847Sbostic break; 97555847Sbostic case REP(0, 1): /* as x{1,1}? */ 97655847Sbostic case REP(0, N): /* as x{1,n}? */ 97755847Sbostic case REP(0, INF): /* as x{1,}? */ 97855847Sbostic INSERT(OQUEST_, start); /* offset is wrong... */ 97955847Sbostic repeat(p, start+1, 1, to); 98060201Sbostic AHEAD(start); /* ... fix it */ 98160201Sbostic ASTERN(O_QUEST, start); 98255847Sbostic break; 98355847Sbostic case REP(1, 1): /* trivial case */ 98455847Sbostic /* done */ 98555847Sbostic break; 98655847Sbostic case REP(1, N): /* as x?x{1,n-1} */ 98755847Sbostic INSERT(OQUEST_, start); 98860201Sbostic ASTERN(O_QUEST, start); 98955847Sbostic copy = dupl(p, start+1, finish+1); 99055847Sbostic assert(copy == finish+2); 99155847Sbostic repeat(p, copy, 1, to-1); 99255847Sbostic break; 99355847Sbostic case REP(1, INF): /* as x+ */ 99455847Sbostic INSERT(OPLUS_, start); 99560201Sbostic ASTERN(O_PLUS, start); 99655847Sbostic break; 99755847Sbostic case REP(N, N): /* as xx{m-1,n-1} */ 99855847Sbostic copy = dupl(p, start, finish); 99955847Sbostic repeat(p, copy, from-1, to-1); 100055847Sbostic break; 100155847Sbostic case REP(N, INF): /* as xx{n-1,INF} */ 100255847Sbostic copy = dupl(p, start, finish); 100355847Sbostic repeat(p, copy, from-1, to); 100455847Sbostic break; 100555847Sbostic default: /* "can't happen" */ 100655847Sbostic SETERROR(REG_ASSERT); /* just in case */ 100755847Sbostic break; 100855847Sbostic } 100955847Sbostic } 101055847Sbostic 101155847Sbostic /* 101255847Sbostic - seterr - set an error condition 101360201Sbostic == static int seterr(register struct parse *p, int e); 101455847Sbostic */ 101555847Sbostic static int /* useless but makes type checking happy */ 101655847Sbostic seterr(p, e) 101755847Sbostic register struct parse *p; 101855847Sbostic int e; 101955847Sbostic { 102055847Sbostic if (p->error == 0) /* keep earliest error condition */ 102155847Sbostic p->error = e; 102255847Sbostic p->next = nuls; /* try to bring things to a halt */ 102360201Sbostic p->end = nuls; 102455847Sbostic return(0); /* make the return value well-defined */ 102555847Sbostic } 102655847Sbostic 102755847Sbostic /* 102855847Sbostic - allocset - allocate a set of characters for [] 102960201Sbostic == static cset *allocset(register struct parse *p); 103055847Sbostic */ 103155847Sbostic static cset * 103255847Sbostic allocset(p) 103355847Sbostic register struct parse *p; 103455847Sbostic { 103555847Sbostic register int no = p->g->ncsets++; 103655847Sbostic register size_t nc; 103755847Sbostic register size_t nbytes; 103855847Sbostic register cset *cs; 103955847Sbostic register size_t css = (size_t)p->g->csetsize; 104055847Sbostic 104155847Sbostic if (no >= p->ncsalloc) { /* need another column of space */ 104255847Sbostic p->ncsalloc += CHAR_BIT; 104355847Sbostic nc = p->ncsalloc; 104455847Sbostic assert(nc % CHAR_BIT == 0); 104555847Sbostic nbytes = nc / CHAR_BIT * css; 104655847Sbostic if (p->g->sets == NULL) 104755847Sbostic p->g->sets = (cset *)malloc(nc * sizeof(cset)); 104855847Sbostic else 104955847Sbostic p->g->sets = (cset *)realloc((char *)p->g->sets, 105055847Sbostic nc * sizeof(cset)); 105155847Sbostic if (p->g->setbits == NULL) 105255847Sbostic p->g->setbits = (uchar *)malloc(nbytes); 105355847Sbostic else 105455847Sbostic p->g->setbits = (uchar *)realloc((char *)p->g->setbits, 105555847Sbostic nbytes); 105655847Sbostic if (p->g->sets != NULL && p->g->setbits != NULL) 105755847Sbostic (void) memset((char *)p->g->setbits + (nbytes - css), 105855847Sbostic 0, css); 105955847Sbostic else { 106055847Sbostic no = 0; 106155847Sbostic SETERROR(REG_ESPACE); 106255847Sbostic /* caller's responsibility not to do set ops */ 106355847Sbostic } 106455847Sbostic } 106555847Sbostic 106655847Sbostic assert(p->g->sets != NULL); /* xxx */ 106755847Sbostic cs = &p->g->sets[no]; 106855847Sbostic cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 106955847Sbostic cs->mask = 1 << ((no) % CHAR_BIT); 107055847Sbostic cs->hash = 0; 107155847Sbostic cs->smultis = 0; 107255847Sbostic cs->multis = NULL; 107355847Sbostic 107455847Sbostic return(cs); 107555847Sbostic } 107655847Sbostic 107755847Sbostic /* 107860201Sbostic - freeset - free a now-unused set 107960201Sbostic == static void freeset(register struct parse *p, register cset *cs); 108060201Sbostic */ 108160201Sbostic static void 108260201Sbostic freeset(p, cs) 108360201Sbostic register struct parse *p; 108460201Sbostic register cset *cs; 108560201Sbostic { 108660201Sbostic register int i; 108760201Sbostic register cset *top = &p->g->sets[p->g->ncsets]; 108860201Sbostic register size_t css = (size_t)p->g->csetsize; 108960201Sbostic 109060201Sbostic for (i = 0; i < css; i++) 109160201Sbostic CHsub(cs, i); 109260201Sbostic if (cs == top-1) /* recover only the easy case */ 109360201Sbostic p->g->ncsets--; 109460201Sbostic } 109560201Sbostic 109660201Sbostic /* 109755847Sbostic - freezeset - final processing on a set of characters 109860201Sbostic == static int freezeset(register struct parse *p, register cset *cs); 109955847Sbostic * 110055847Sbostic * The main task here is merging identical sets. This is usually a waste 110155847Sbostic * of time (although the hash code minimizes the overhead), but can win 110255847Sbostic * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 110355847Sbostic * is done using addition rather than xor -- all ASCII [aA] sets xor to 110455847Sbostic * the same value! 110555847Sbostic */ 110655847Sbostic static int /* set number */ 110755847Sbostic freezeset(p, cs) 110855847Sbostic register struct parse *p; 110955847Sbostic register cset *cs; 111055847Sbostic { 111155847Sbostic register uchar h = cs->hash; 111255847Sbostic register int i; 111355847Sbostic register cset *top = &p->g->sets[p->g->ncsets]; 111455847Sbostic register cset *cs2; 111555847Sbostic register size_t css = (size_t)p->g->csetsize; 111655847Sbostic 111755847Sbostic /* look for an earlier one which is the same */ 111855847Sbostic for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 111955847Sbostic if (cs2->hash == h && cs2 != cs) { 112055847Sbostic /* maybe */ 112155847Sbostic for (i = 0; i < css; i++) 112255847Sbostic if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 112355847Sbostic break; /* no */ 112455847Sbostic if (i == css) 112555847Sbostic break; /* yes */ 112655847Sbostic } 112755847Sbostic 112855847Sbostic if (cs2 < top) { /* found one */ 112960201Sbostic freeset(p, cs); 113055847Sbostic cs = cs2; 113155847Sbostic } 113255847Sbostic 113355847Sbostic return((int)(cs - p->g->sets)); 113455847Sbostic } 113555847Sbostic 113655847Sbostic /* 113760201Sbostic - firstch - return first character in a set (which must have at least one) 113860201Sbostic == static int firstch(register struct parse *p, register cset *cs); 113960201Sbostic */ 114060201Sbostic static int /* character; there is no "none" value */ 114160201Sbostic firstch(p, cs) 114260201Sbostic register struct parse *p; 114360201Sbostic register cset *cs; 114460201Sbostic { 114560201Sbostic register int i; 114660201Sbostic register size_t css = (size_t)p->g->csetsize; 114760201Sbostic 114860201Sbostic for (i = 0; i < css; i++) 114960201Sbostic if (CHIN(cs, i)) 115060201Sbostic return((char)i); 115160201Sbostic assert(never); 115260201Sbostic return(0); /* arbitrary */ 115360201Sbostic } 115460201Sbostic 115560201Sbostic /* 115660201Sbostic - nch - number of characters in a set 115760201Sbostic == static int nch(register struct parse *p, register cset *cs); 115860201Sbostic */ 115960201Sbostic static int 116060201Sbostic nch(p, cs) 116160201Sbostic register struct parse *p; 116260201Sbostic register cset *cs; 116360201Sbostic { 116460201Sbostic register int i; 116560201Sbostic register size_t css = (size_t)p->g->csetsize; 116660201Sbostic register int n = 0; 116760201Sbostic 116860201Sbostic for (i = 0; i < css; i++) 116960201Sbostic if (CHIN(cs, i)) 117060201Sbostic n++; 117160201Sbostic return(n); 117260201Sbostic } 117360201Sbostic 117460201Sbostic /* 117555847Sbostic - mcadd - add a collating element to a cset 117660201Sbostic == static void mcadd(register struct parse *p, register cset *cs, \ 117760201Sbostic == register char *cp); 117855847Sbostic */ 117955847Sbostic static void 118055847Sbostic mcadd(p, cs, cp) 118155847Sbostic register struct parse *p; 118255847Sbostic register cset *cs; 118360201Sbostic register char *cp; 118455847Sbostic { 118555847Sbostic register size_t oldend = cs->smultis; 118655847Sbostic 118760201Sbostic cs->smultis += strlen(cp) + 1; 118855847Sbostic if (cs->multis == NULL) 118960201Sbostic cs->multis = malloc(cs->smultis); 119055847Sbostic else 119160201Sbostic cs->multis = realloc(cs->multis, cs->smultis); 119255847Sbostic if (cs->multis == NULL) { 119355847Sbostic SETERROR(REG_ESPACE); 119455847Sbostic return; 119555847Sbostic } 119655847Sbostic 119760201Sbostic (void) strcpy(cs->multis + oldend - 1, cp); 119855847Sbostic cs->multis[cs->smultis - 1] = '\0'; 119955847Sbostic } 120055847Sbostic 120155847Sbostic /* 120255847Sbostic - mcsub - subtract a collating element from a cset 120360201Sbostic == static void mcsub(register cset *cs, register char *cp); 120455847Sbostic */ 120555847Sbostic static void 120660201Sbostic mcsub(cs, cp) 120755847Sbostic register cset *cs; 120860201Sbostic register char *cp; 120955847Sbostic { 121060201Sbostic register char *fp = mcfind(cs, cp); 121160201Sbostic register size_t len = strlen(fp); 121255847Sbostic 121360201Sbostic assert(fp != NULL); 121460201Sbostic (void) memmove(fp, fp + len + 1, 121555847Sbostic cs->smultis - (fp + len + 1 - cs->multis)); 121655847Sbostic cs->smultis -= len; 121755847Sbostic 121855847Sbostic if (cs->smultis == 0) { 121960201Sbostic free(cs->multis); 122055847Sbostic cs->multis = NULL; 122155847Sbostic return; 122255847Sbostic } 122355847Sbostic 122460201Sbostic cs->multis = realloc(cs->multis, cs->smultis); 122555847Sbostic assert(cs->multis != NULL); 122655847Sbostic } 122755847Sbostic 122855847Sbostic /* 122955847Sbostic - mcin - is a collating element in a cset? 123060201Sbostic == static int mcin(register cset *cs, register char *cp); 123155847Sbostic */ 123255847Sbostic static int 123360201Sbostic mcin(cs, cp) 123455847Sbostic register cset *cs; 123560201Sbostic register char *cp; 123655847Sbostic { 123755847Sbostic return(mcfind(cs, cp) != NULL); 123855847Sbostic } 123955847Sbostic 124055847Sbostic /* 124155847Sbostic - mcfind - find a collating element in a cset 124260201Sbostic == static char *mcfind(register cset *cs, register char *cp); 124355847Sbostic */ 124460201Sbostic static char * 124555847Sbostic mcfind(cs, cp) 124655847Sbostic register cset *cs; 124760201Sbostic register char *cp; 124855847Sbostic { 124960201Sbostic register char *p; 125055847Sbostic 125155847Sbostic if (cs->multis == NULL) 125255847Sbostic return(NULL); 125360201Sbostic for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 125460201Sbostic if (strcmp(cp, p) == 0) 125555847Sbostic return(p); 125655847Sbostic return(NULL); 125755847Sbostic } 125855847Sbostic 125955847Sbostic /* 126055847Sbostic - mcinvert - invert the list of collating elements in a cset 126160201Sbostic == static void mcinvert(register cset *cs); 126255847Sbostic * 126355847Sbostic * This would have to know the set of possibilities. Implementation 126455847Sbostic * is deferred. 126555847Sbostic */ 126655847Sbostic static void 126760201Sbostic mcinvert(cs) 126855847Sbostic register cset *cs; 126955847Sbostic { 127055847Sbostic assert(cs->multis == NULL); /* xxx */ 127155847Sbostic } 127255847Sbostic 127355847Sbostic /* 127460201Sbostic - mccase - add case counterparts of the list of collating elements in a cset 127560201Sbostic == static void mccase(register cset *cs); 127660201Sbostic * 127760201Sbostic * This would have to know the set of possibilities. Implementation 127860201Sbostic * is deferred. 127960201Sbostic */ 128060201Sbostic static void 128160201Sbostic mccase(cs) 128260201Sbostic register cset *cs; 128360201Sbostic { 128460201Sbostic assert(cs->multis == NULL); /* xxx */ 128560201Sbostic } 128660201Sbostic 128760201Sbostic /* 128855847Sbostic - isinsets - is this character in any sets? 128960201Sbostic == static int isinsets(register struct re_guts *g, int c); 129055847Sbostic */ 129155847Sbostic static int /* predicate */ 129255847Sbostic isinsets(g, c) 129355847Sbostic register struct re_guts *g; 129460201Sbostic int c; 129555847Sbostic { 129655847Sbostic register uchar *col; 129755847Sbostic register int i; 129855847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 129960201Sbostic register unsigned uc = (unsigned char)c; 130055847Sbostic 130155847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 130260201Sbostic if (col[uc] != 0) 130355847Sbostic return(1); 130455847Sbostic return(0); 130555847Sbostic } 130655847Sbostic 130755847Sbostic /* 130855847Sbostic - samesets - are these two characters in exactly the same sets? 130960201Sbostic == static int samesets(register struct re_guts *g, int c1, int c2); 131055847Sbostic */ 131155847Sbostic static int /* predicate */ 131255847Sbostic samesets(g, c1, c2) 131355847Sbostic register struct re_guts *g; 131460201Sbostic int c1; 131560201Sbostic int c2; 131655847Sbostic { 131755847Sbostic register uchar *col; 131855847Sbostic register int i; 131955847Sbostic register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 132060201Sbostic register unsigned uc1 = (unsigned char)c1; 132160201Sbostic register unsigned uc2 = (unsigned char)c2; 132255847Sbostic 132355847Sbostic for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 132460201Sbostic if (col[uc1] != col[uc2]) 132555847Sbostic return(0); 132655847Sbostic return(1); 132755847Sbostic } 132855847Sbostic 132955847Sbostic /* 133055847Sbostic - categorize - sort out character categories 133160201Sbostic == static void categorize(struct parse *p, register struct re_guts *g); 133255847Sbostic */ 133355847Sbostic static void 133455847Sbostic categorize(p, g) 133555847Sbostic struct parse *p; 133655847Sbostic register struct re_guts *g; 133755847Sbostic { 133860201Sbostic register cat_t *cats = g->categories; 133960201Sbostic register int c; 134060201Sbostic register int c2; 134160201Sbostic register cat_t cat; 134255847Sbostic 134355847Sbostic /* avoid making error situations worse */ 134455847Sbostic if (p->error != 0) 134555847Sbostic return; 134655847Sbostic 134760201Sbostic for (c = CHAR_MIN; c <= CHAR_MAX; c++) 134855847Sbostic if (cats[c] == 0 && isinsets(g, c)) { 134955847Sbostic cat = g->ncategories++; 135055847Sbostic cats[c] = cat; 135160201Sbostic for (c2 = c+1; c2 <= CHAR_MAX; c2++) 135255847Sbostic if (cats[c2] == 0 && samesets(g, c, c2)) 135355847Sbostic cats[c2] = cat; 135455847Sbostic } 135555847Sbostic } 135655847Sbostic 135755847Sbostic /* 135855847Sbostic - dupl - emit a duplicate of a bunch of sops 135960201Sbostic == static sopno dupl(register struct parse *p, sopno start, sopno finish); 136055847Sbostic */ 136155847Sbostic static sopno /* start of duplicate */ 136255847Sbostic dupl(p, start, finish) 136355847Sbostic register struct parse *p; 136455847Sbostic sopno start; /* from here */ 136555847Sbostic sopno finish; /* to this less one */ 136655847Sbostic { 136755847Sbostic register sopno ret = HERE(); 136855847Sbostic register sopno len = finish - start; 136955847Sbostic 137055847Sbostic assert(finish >= start); 137155847Sbostic if (len == 0) 137255847Sbostic return(ret); 137355847Sbostic enlarge(p, p->ssize + len); /* this many unexpected additions */ 137455847Sbostic assert(p->ssize >= p->slen + len); 137555847Sbostic (void) memcpy((char *)(p->strip + p->slen), 137655847Sbostic (char *)(p->strip + start), (size_t)len*sizeof(sop)); 137755847Sbostic p->slen += len; 137855847Sbostic return(ret); 137955847Sbostic } 138055847Sbostic 138155847Sbostic /* 138255847Sbostic - doemit - emit a strip operator 138360201Sbostic == static void doemit(register struct parse *p, sop op, size_t opnd); 138455847Sbostic * 138555847Sbostic * It might seem better to implement this as a macro with a function as 138655847Sbostic * hard-case backup, but it's just too big and messy unless there are 138755847Sbostic * some changes to the data structures. Maybe later. 138855847Sbostic */ 138955847Sbostic static void 139055847Sbostic doemit(p, op, opnd) 139155847Sbostic register struct parse *p; 139255847Sbostic sop op; 139355847Sbostic size_t opnd; 139455847Sbostic { 139555847Sbostic /* avoid making error situations worse */ 139655847Sbostic if (p->error != 0) 139755847Sbostic return; 139855847Sbostic 139955847Sbostic /* deal with oversize operands ("can't happen", more or less) */ 140055847Sbostic assert(opnd < 1<<OPSHIFT); 140155847Sbostic 140255847Sbostic /* deal with undersized strip */ 140355847Sbostic if (p->slen >= p->ssize) 140455847Sbostic enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 140555847Sbostic assert(p->slen < p->ssize); 140655847Sbostic 140755847Sbostic /* finally, it's all reduced to the easy case */ 140855847Sbostic p->strip[p->slen++] = SOP(op, opnd); 140955847Sbostic } 141055847Sbostic 141155847Sbostic /* 141255847Sbostic - doinsert - insert a sop into the strip 141360201Sbostic == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 141455847Sbostic */ 141555847Sbostic static void 141655847Sbostic doinsert(p, op, opnd, pos) 141755847Sbostic register struct parse *p; 141855847Sbostic sop op; 141955847Sbostic size_t opnd; 142055847Sbostic sopno pos; 142155847Sbostic { 142255847Sbostic register sopno sn; 142355847Sbostic register sop s; 142455847Sbostic register int i; 142555847Sbostic 142655847Sbostic /* avoid making error situations worse */ 142755847Sbostic if (p->error != 0) 142855847Sbostic return; 142955847Sbostic 143055847Sbostic sn = HERE(); 143155847Sbostic EMIT(op, opnd); /* do checks, ensure space */ 143255847Sbostic assert(HERE() == sn+1); 143355847Sbostic s = p->strip[sn]; 143455847Sbostic 143555847Sbostic /* adjust paren pointers */ 143655847Sbostic assert(pos > 0); 143755847Sbostic for (i = 1; i < NPAREN; i++) { 143855847Sbostic if (p->pbegin[i] >= pos) { 143955847Sbostic p->pbegin[i]++; 144055847Sbostic } 144155847Sbostic if (p->pend[i] >= pos) { 144255847Sbostic p->pend[i]++; 144355847Sbostic } 144455847Sbostic } 144555847Sbostic 144655847Sbostic memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 144755847Sbostic (HERE()-pos-1)*sizeof(sop)); 144855847Sbostic p->strip[pos] = s; 144955847Sbostic } 145055847Sbostic 145155847Sbostic /* 145255847Sbostic - dofwd - complete a forward reference 145360201Sbostic == static void dofwd(register struct parse *p, sopno pos, sop value); 145455847Sbostic */ 145555847Sbostic static void 145655847Sbostic dofwd(p, pos, value) 145755847Sbostic register struct parse *p; 145855847Sbostic register sopno pos; 145955847Sbostic sop value; 146055847Sbostic { 146155847Sbostic /* avoid making error situations worse */ 146255847Sbostic if (p->error != 0) 146355847Sbostic return; 146455847Sbostic 146555847Sbostic assert(value < 1<<OPSHIFT); 146655847Sbostic p->strip[pos] = OP(p->strip[pos]) | value; 146755847Sbostic } 146855847Sbostic 146955847Sbostic /* 147055847Sbostic - enlarge - enlarge the strip 147160201Sbostic == static void enlarge(register struct parse *p, sopno size); 147255847Sbostic */ 147355847Sbostic static void 147455847Sbostic enlarge(p, size) 147555847Sbostic register struct parse *p; 147655847Sbostic register sopno size; 147755847Sbostic { 147855847Sbostic register sop *sp; 147955847Sbostic 148055847Sbostic if (p->ssize >= size) 148155847Sbostic return; 148255847Sbostic 148355847Sbostic sp = (sop *)realloc(p->strip, size*sizeof(sop)); 148455847Sbostic if (sp == NULL) { 148555847Sbostic SETERROR(REG_ESPACE); 148655847Sbostic return; 148755847Sbostic } 148855847Sbostic p->strip = sp; 148955847Sbostic p->ssize = size; 149055847Sbostic } 149155847Sbostic 149255847Sbostic /* 149355847Sbostic - stripsnug - compact the strip 149460201Sbostic == static void stripsnug(register struct parse *p, register struct re_guts *g); 149555847Sbostic */ 149655847Sbostic static void 149755847Sbostic stripsnug(p, g) 149855847Sbostic register struct parse *p; 149955847Sbostic register struct re_guts *g; 150055847Sbostic { 150155847Sbostic g->nstates = p->slen; 150255847Sbostic g->strip = (sop *)realloc((sop *)p->strip, p->slen * sizeof(sop)); 150355847Sbostic if (g->strip == NULL) { 150455847Sbostic SETERROR(REG_ESPACE); 150555847Sbostic g->strip = p->strip; 150655847Sbostic } 150755847Sbostic } 150855847Sbostic 150955847Sbostic /* 151055847Sbostic - findmust - fill in must and mlen with longest mandatory literal string 151160201Sbostic == static void findmust(register struct parse *p, register struct re_guts *g); 151255847Sbostic * 151355847Sbostic * This algorithm could do fancy things like analyzing the operands of | 151455847Sbostic * for common subsequences. Someday. This code is simple and finds most 151555847Sbostic * of the interesting cases. 151655847Sbostic * 151755847Sbostic * Note that must and mlen got initialized during setup. 151855847Sbostic */ 151956355Sbostic static void 152055847Sbostic findmust(p, g) 152155847Sbostic struct parse *p; 152255847Sbostic register struct re_guts *g; 152355847Sbostic { 152455847Sbostic register sop *scan; 152555847Sbostic sop *start; 152655847Sbostic register sop *newstart; 152755847Sbostic register sopno newlen; 152855847Sbostic register sop s; 152955847Sbostic register char *cp; 153055847Sbostic register sopno i; 153155847Sbostic 153255847Sbostic /* avoid making error situations worse */ 153355847Sbostic if (p->error != 0) 153455847Sbostic return; 153555847Sbostic 153655847Sbostic /* find the longest OCHAR sequence in strip */ 153755847Sbostic newlen = 0; 153855847Sbostic scan = g->strip + 1; 153955847Sbostic do { 154055847Sbostic s = *scan++; 154155847Sbostic switch (OP(s)) { 154255847Sbostic case OCHAR: /* sequence member */ 154355847Sbostic if (newlen == 0) /* new sequence */ 154455847Sbostic newstart = scan - 1; 154555847Sbostic newlen++; 154655847Sbostic break; 154755847Sbostic case OPLUS_: /* things that don't break one */ 154855847Sbostic case OLPAREN: 154955847Sbostic case ORPAREN: 155055847Sbostic break; 155155847Sbostic case OQUEST_: /* things that must be skipped */ 155255847Sbostic case OCH_: 155355847Sbostic scan--; 155455847Sbostic do { 155555847Sbostic scan += OPND(s); 155655847Sbostic s = *scan; 155755847Sbostic /* assert() interferes w debug printouts */ 155855847Sbostic if (OP(s) != O_QUEST && OP(s) != O_CH && 155955847Sbostic OP(s) != OOR2) { 156055847Sbostic g->iflags |= BAD; 156155847Sbostic return; 156255847Sbostic } 156355847Sbostic } while (OP(s) != O_QUEST && OP(s) != O_CH); 156455847Sbostic /* fallthrough */ 156555847Sbostic default: /* things that break a sequence */ 156655847Sbostic if (newlen > g->mlen) { /* ends one */ 156755847Sbostic start = newstart; 156855847Sbostic g->mlen = newlen; 156955847Sbostic } 157055847Sbostic newlen = 0; 157155847Sbostic break; 157255847Sbostic } 157355847Sbostic } while (OP(s) != OEND); 157455847Sbostic 157555847Sbostic if (g->mlen == 0) /* there isn't one */ 157655847Sbostic return; 157755847Sbostic 157855847Sbostic /* turn it into a character string */ 157955847Sbostic g->must = malloc((size_t)g->mlen + 1); 158055847Sbostic if (g->must == NULL) { /* argh; just forget it */ 158155847Sbostic g->mlen = 0; 158255847Sbostic return; 158355847Sbostic } 158455847Sbostic cp = g->must; 158555847Sbostic scan = start; 158655847Sbostic for (i = g->mlen; i > 0; i--) { 158755847Sbostic while (OP(s = *scan++) != OCHAR) 158855847Sbostic continue; 158960201Sbostic *cp++ = (char)OPND(s); 159055847Sbostic } 159155847Sbostic *cp++ = '\0'; /* just on general principles */ 159255847Sbostic } 159355847Sbostic 159455847Sbostic /* 159555847Sbostic - pluscount - count + nesting 159660201Sbostic == static sopno pluscount(register struct parse *p, register struct re_guts *g); 159755847Sbostic */ 159856355Sbostic static sopno /* nesting depth */ 159955847Sbostic pluscount(p, g) 160055847Sbostic struct parse *p; 160155847Sbostic register struct re_guts *g; 160255847Sbostic { 160355847Sbostic register sop *scan; 160455847Sbostic register sop s; 160555847Sbostic register sopno plusnest = 0; 160655847Sbostic register sopno maxnest = 0; 160755847Sbostic 160855847Sbostic if (p->error != 0) 160955847Sbostic return(0); /* there may not be an OEND */ 161055847Sbostic 161155847Sbostic scan = g->strip + 1; 161255847Sbostic do { 161355847Sbostic s = *scan++; 161455847Sbostic switch (OP(s)) { 161555847Sbostic case OPLUS_: 161655847Sbostic plusnest++; 161755847Sbostic break; 161855847Sbostic case O_PLUS: 161955847Sbostic if (plusnest > maxnest) 162055847Sbostic maxnest = plusnest; 162155847Sbostic plusnest--; 162255847Sbostic break; 162355847Sbostic } 162455847Sbostic } while (OP(s) != OEND); 162555847Sbostic if (plusnest != 0) 162655847Sbostic g->iflags |= BAD; 162755847Sbostic return(maxnest); 162855847Sbostic } 1629