155851Sbostic /*- 2*66362Sbostic * Copyright (c) 1992, 1993, 1994 Henry Spencer. 3*66362Sbostic * Copyright (c) 1992, 1993, 1994 461164Sbostic * The Regents of the University of California. All rights reserved. 555851Sbostic * 655851Sbostic * This code is derived from software contributed to Berkeley by 755851Sbostic * Henry Spencer of the University of Toronto. 855851Sbostic * 955851Sbostic * %sccs.include.redist.c% 1055851Sbostic * 11*66362Sbostic * @(#)regexec.c 8.2 (Berkeley) 03/16/94 1255851Sbostic */ 1355851Sbostic 1455851Sbostic #if defined(LIBC_SCCS) && !defined(lint) 15*66362Sbostic static char sccsid[] = "@(#)regexec.c 8.2 (Berkeley) 03/16/94"; 1655851Sbostic #endif /* LIBC_SCCS and not lint */ 1755851Sbostic 1856355Sbostic /* 1956355Sbostic * the outer shell of regexec() 2056355Sbostic * 2156355Sbostic * This file includes engine.c *twice*, after muchos fiddling with the 2256355Sbostic * macros that code uses. This lets the same code operate on two different 2356355Sbostic * representations for state sets. 2456355Sbostic */ 2555851Sbostic #include <sys/types.h> 2655851Sbostic #include <stdio.h> 2755851Sbostic #include <stdlib.h> 2855851Sbostic #include <string.h> 2955851Sbostic #include <limits.h> 3056355Sbostic #include <ctype.h> 3155851Sbostic #include <regex.h> 3255851Sbostic 3355851Sbostic #include "utils.h" 3455851Sbostic #include "regex2.h" 3555851Sbostic 3660201Sbostic static int nope = 0; /* for use in asserts; shuts lint up */ 3760201Sbostic 3855851Sbostic /* macros for manipulating states, small version */ 3955851Sbostic #define states long 4056355Sbostic #define states1 states /* for later use in regexec() decision */ 4155851Sbostic #define CLEAR(v) ((v) = 0) 4255851Sbostic #define SET0(v, n) ((v) &= ~(1 << (n))) 4355851Sbostic #define SET1(v, n) ((v) |= 1 << (n)) 4455851Sbostic #define ISSET(v, n) ((v) & (1 << (n))) 4555851Sbostic #define ASSIGN(d, s) ((d) = (s)) 4655851Sbostic #define EQ(a, b) ((a) == (b)) 4755851Sbostic #define STATEVARS int dummy /* dummy version */ 4855851Sbostic #define STATESETUP(m, n) /* nothing */ 4955851Sbostic #define STATETEARDOWN(m) /* nothing */ 5056355Sbostic #define SETUP(v) ((v) = 0) 5155851Sbostic #define onestate int 5260201Sbostic #define INIT(o, n) ((o) = (unsigned)1 << (n)) 5355851Sbostic #define INC(o) ((o) <<= 1) 5455851Sbostic #define ISSTATEIN(v, o) ((v) & (o)) 5555851Sbostic /* some abbreviations; note that some of these know variable names! */ 5655851Sbostic /* do "if I'm here, I can also be there" etc without branches */ 5760201Sbostic #define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) 5860201Sbostic #define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) 5960201Sbostic #define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) 6055851Sbostic /* function names */ 6155851Sbostic #define SNAMES /* engine.c looks after details */ 6255851Sbostic 6355851Sbostic #include "engine.c" 6455851Sbostic 6555851Sbostic /* now undo things */ 6655851Sbostic #undef states 6755851Sbostic #undef CLEAR 6855851Sbostic #undef SET0 6955851Sbostic #undef SET1 7055851Sbostic #undef ISSET 7155851Sbostic #undef ASSIGN 7255851Sbostic #undef EQ 7355851Sbostic #undef STATEVARS 7455851Sbostic #undef STATESETUP 7555851Sbostic #undef STATETEARDOWN 7655851Sbostic #undef SETUP 7755851Sbostic #undef onestate 7855851Sbostic #undef INIT 7955851Sbostic #undef INC 8055851Sbostic #undef ISSTATEIN 8155851Sbostic #undef FWD 8255851Sbostic #undef BACK 8355851Sbostic #undef ISSETBACK 8455851Sbostic #undef SNAMES 8555851Sbostic 8655851Sbostic /* macros for manipulating states, large version */ 8755851Sbostic #define states char * 8855851Sbostic #define CLEAR(v) memset(v, 0, m->g->nstates) 8955851Sbostic #define SET0(v, n) ((v)[n] = 0) 9055851Sbostic #define SET1(v, n) ((v)[n] = 1) 9155851Sbostic #define ISSET(v, n) ((v)[n]) 9255851Sbostic #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) 9355851Sbostic #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 9455851Sbostic #define STATEVARS int vn; char *space 9555851Sbostic #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 9655851Sbostic if ((m)->space == NULL) return(REG_ESPACE); \ 9755851Sbostic (m)->vn = 0; } 9855851Sbostic #define STATETEARDOWN(m) { free((m)->space); } 9955851Sbostic #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 10055851Sbostic #define onestate int 10155851Sbostic #define INIT(o, n) ((o) = (n)) 10255851Sbostic #define INC(o) ((o)++) 10355851Sbostic #define ISSTATEIN(v, o) ((v)[o]) 10455851Sbostic /* some abbreviations; note that some of these know variable names! */ 10555851Sbostic /* do "if I'm here, I can also be there" etc without branches */ 10655851Sbostic #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 10755851Sbostic #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 10855851Sbostic #define ISSETBACK(v, n) ((v)[here - (n)]) 10955851Sbostic /* function names */ 11055851Sbostic #define LNAMES /* flag */ 11155851Sbostic 11255851Sbostic #include "engine.c" 11356355Sbostic 11456355Sbostic /* 11556355Sbostic - regexec - interface for matching 116*66362Sbostic = extern int regexec(const regex_t *, const char *, size_t, \ 117*66362Sbostic = regmatch_t [], int); 11860201Sbostic = #define REG_NOTBOL 00001 11960201Sbostic = #define REG_NOTEOL 00002 12060201Sbostic = #define REG_STARTEND 00004 12160201Sbostic = #define REG_TRACE 00400 // tracing of execution 12260201Sbostic = #define REG_LARGE 01000 // force large representation 12360201Sbostic = #define REG_BACKR 02000 // force use of backref code 12456355Sbostic * 12556355Sbostic * We put this here so we can exploit knowledge of the state representation 12656355Sbostic * when choosing which matcher to call. Also, by this point the matchers 12756355Sbostic * have been prototyped. 12856355Sbostic */ 12956355Sbostic int /* 0 success, REG_NOMATCH failure */ 13056355Sbostic regexec(preg, string, nmatch, pmatch, eflags) 13156355Sbostic const regex_t *preg; 13256355Sbostic const char *string; 13356355Sbostic size_t nmatch; 13456355Sbostic regmatch_t pmatch[]; 13556355Sbostic int eflags; 13656355Sbostic { 13756355Sbostic register struct re_guts *g = preg->re_g; 13856355Sbostic #ifdef REDEBUG 13956355Sbostic # define GOODFLAGS(f) (f) 14056355Sbostic #else 14156355Sbostic # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 14256355Sbostic #endif 14356355Sbostic 14456355Sbostic if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 14556355Sbostic return(REG_BADPAT); 14656355Sbostic assert(!(g->iflags&BAD)); 14756355Sbostic if (g->iflags&BAD) /* backstop for no-debug case */ 14856355Sbostic return(REG_BADPAT); 149*66362Sbostic eflags = GOODFLAGS(eflags); 15056355Sbostic 15156355Sbostic if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) 15260201Sbostic return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); 15356355Sbostic else 15460201Sbostic return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); 15556355Sbostic } 156