158186Selan /* Extended regular expression matching and search library.
258186Selan    Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
358186Selan 
458186Selan    This program is free software; you can redistribute it and/or modify
558186Selan    it under the terms of the GNU General Public License as published by
658186Selan    the Free Software Foundation; either version 2, or (at your option)
758186Selan    any later version.
858186Selan 
958186Selan    This program is distributed in the hope that it will be useful,
1058186Selan    but WITHOUT ANY WARRANTY; without even the implied warranty of
1158186Selan    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1258186Selan    GNU General Public License for more details.
1358186Selan 
1458186Selan    You should have received a copy of the GNU General Public License
1558186Selan    along with this program; if not, write to the Free Software
1658186Selan    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
1758186Selan 
1858186Selan 
1958186Selan // This is a translation into C++ of regex.c, the GNU regexp package.
2058186Selan 
2158186Selan /* To test, compile with -Dtest.  This Dtestable feature turns this into
2258186Selan    a self-contained program which reads a pattern, describes how it
2358186Selan    compiles, then reads a string and searches for it.
2458186Selan 
2558186Selan    On the other hand, if you compile with both -Dtest and -Dcanned you
2658186Selan    can run some tests we've already thought of.  */
2758186Selan 
2858186Selan /* AIX requires the alloca decl to be the first thing in the file. */
2958186Selan #ifdef __GNUC__
30*60385Selan #define alloca alloca
3158186Selan #else
3258186Selan #ifdef sparc
3358186Selan #include <alloca.h>
3458186Selan #else
3558186Selan #ifdef _AIX
3658186Selan #pragma alloca
3758186Selan #else
3858186Selan char *alloca ();
3958186Selan #endif
4058186Selan #endif
4158186Selan #endif
4258186Selan 
4358186Selan #ifdef emacs
4458186Selan 
4558186Selan /* The `emacs' switch turns on certain special matching commands
4658186Selan   that make sense only in emacs. */
4758186Selan 
4858186Selan #include "config.h"
4958186Selan #include "lisp.h"
5058186Selan #include "buffer.h"
5158186Selan #include "syntax.h"
5258186Selan 
5358186Selan #else  /* not emacs */
5458186Selan 
5558186Selan #include <_G_config.h>
5658186Selan #include <string.h>
5758186Selan #include <stdlib.h>
5858186Selan 
5958186Selan /* Define the syntax stuff, so we can do the \<, \>, etc.  */
6058186Selan 
6158186Selan /* This must be nonzero for the wordchar and notwordchar pattern
6258186Selan    commands in re_match_2.  */
6358186Selan #ifndef Sword
6458186Selan #define Sword 1
6558186Selan #endif
6658186Selan 
6758186Selan #define SYNTAX(c) re_syntax_table[c]
6858186Selan 
6958186Selan 
7058186Selan #ifdef SYNTAX_TABLE
7158186Selan 
7258186Selan char *re_syntax_table;
7358186Selan 
7458186Selan #else /* not SYNTAX_TABLE */
7558186Selan 
7658186Selan static char re_syntax_table[256];
7758186Selan 
7858186Selan 
7958186Selan static void
init_syntax_once()8058186Selan init_syntax_once ()
8158186Selan {
8258186Selan    register int c;
8358186Selan    static int done = 0;
8458186Selan 
8558186Selan    if (done)
8658186Selan      return;
8758186Selan 
8858186Selan    memset (re_syntax_table, 0, sizeof re_syntax_table);
8958186Selan 
9058186Selan    for (c = 'a'; c <= 'z'; c++)
9158186Selan      re_syntax_table[c] = Sword;
9258186Selan 
9358186Selan    for (c = 'A'; c <= 'Z'; c++)
9458186Selan      re_syntax_table[c] = Sword;
9558186Selan 
9658186Selan    for (c = '0'; c <= '9'; c++)
9758186Selan      re_syntax_table[c] = Sword;
9858186Selan 
9958186Selan    done = 1;
10058186Selan }
10158186Selan 
10258186Selan #endif /* SYNTAX_TABLE */
10358186Selan #endif /* emacs */
10458186Selan 
10558186Selan /* We write fatal error messages on standard error.  */
10658186Selan #include <stdio.h>
10758186Selan 
10858186Selan /* isalpha(3) etc. are used for the character classes.  */
10958186Selan #include <ctype.h>
11058186Selan /* Sequents are missing isgraph.  */
11158186Selan #ifndef isgraph
11258186Selan #define isgraph(c) (isprint((c)) && !isspace((c)))
11358186Selan #endif
11458186Selan 
11558186Selan /* Get the interface, including the syntax bits.  */
11658186Selan #include "regex.h"
11758186Selan 
11858186Selan 
11958186Selan /* These are the command codes that appear in compiled regular
12058186Selan    expressions, one per byte.  Some command codes are followed by
12158186Selan    argument bytes.  A command code can specify any interpretation
12258186Selan    whatsoever for its arguments.  Zero-bytes may appear in the compiled
12358186Selan    regular expression.
12458186Selan 
12558186Selan    The value of `exactn' is needed in search.c (search_buffer) in emacs.
12658186Selan    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
12758186Selan    `exactn' we use here must also be 1.  */
12858186Selan 
12958186Selan enum regexpcode
13058186Selan   {
13158186Selan     unused=0,
13258186Selan     exactn=1, /* Followed by one byte giving n, then by n literal bytes.  */
13358186Selan     begline,  /* Fail unless at beginning of line.  */
13458186Selan     endline,  /* Fail unless at end of line.  */
13558186Selan     jump,     /* Followed by two bytes giving relative address to jump to.  */
13658186Selan     on_failure_jump,	 /* Followed by two bytes giving relative address of
13758186Selan 			    place to resume at in case of failure.  */
13858186Selan     finalize_jump,	 /* Throw away latest failure point and then jump to
13958186Selan 			    address.  */
14058186Selan     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
14158186Selan 			    This is used to jump back to the beginning
14258186Selan 			    of a repeat.  If the command that follows
14358186Selan 			    this jump is clearly incompatible with the
14458186Selan 			    one at the beginning of the repeat, such that
14558186Selan 			    we can be sure that there is no use backtracking
14658186Selan 			    out of repetitions already completed,
14758186Selan 			    then we finalize.  */
14858186Selan     dummy_failure_jump,  /* Jump, and push a dummy failure point. This
14958186Selan 			    failure point will be thrown away if an attempt
15058186Selan                             is made to use it for a failure. A + construct
15158186Selan                             makes this before the first repeat.  Also
15258186Selan                             use it as an intermediary kind of jump when
15358186Selan                             compiling an or construct.  */
15458186Selan     succeed_n,	 /* Used like on_failure_jump except has to succeed n times;
15558186Selan 		    then gets turned into an on_failure_jump. The relative
15658186Selan                     address following it is useless until then.  The
15758186Selan                     address is followed by two bytes containing n.  */
15858186Selan     jump_n,	 /* Similar to jump, but jump n times only; also the relative
15958186Selan 		    address following is in turn followed by yet two more bytes
16058186Selan                     containing n.  */
16158186Selan     set_number_at,	/* Set the following relative location to the
16258186Selan 			   subsequent number.  */
16358186Selan     anychar,	 /* Matches any (more or less) one character.  */
16458186Selan     charset,     /* Matches any one char belonging to specified set.
16558186Selan 		    First following byte is number of bitmap bytes.
16658186Selan 		    Then come bytes for a bitmap saying which chars are in.
16758186Selan 		    Bits in each byte are ordered low-bit-first.
16858186Selan 		    A character is in the set if its bit is 1.
16958186Selan 		    A character too large to have a bit in the map
17058186Selan 		    is automatically not in the set.  */
17158186Selan     charset_not, /* Same parameters as charset, but match any character
17258186Selan                     that is not one of those specified.  */
17358186Selan     start_memory, /* Start remembering the text that is matched, for
17458186Selan 		    storing in a memory register.  Followed by one
17558186Selan                     byte containing the register number.  Register numbers
17658186Selan                     must be in the range 0 through RE_NREGS.  */
17758186Selan     stop_memory, /* Stop remembering the text that is matched
17858186Selan 		    and store it in a memory register.  Followed by
17958186Selan                     one byte containing the register number. Register
18058186Selan                     numbers must be in the range 0 through RE_NREGS.  */
18158186Selan     duplicate,   /* Match a duplicate of something remembered.
18258186Selan 		    Followed by one byte containing the index of the memory
18358186Selan                     register.  */
18458186Selan #ifdef emacs
18558186Selan     before_dot,	 /* Succeeds if before point.  */
18658186Selan     at_dot,	 /* Succeeds if at point.  */
18758186Selan     after_dot,	 /* Succeeds if after point.  */
18858186Selan #endif
18958186Selan     begbuf,      /* Succeeds if at beginning of buffer.  */
19058186Selan     endbuf,      /* Succeeds if at end of buffer.  */
19158186Selan     wordchar,    /* Matches any word-constituent character.  */
19258186Selan     notwordchar, /* Matches any char that is not a word-constituent.  */
19358186Selan     wordbeg,	 /* Succeeds if at word beginning.  */
19458186Selan     wordend,	 /* Succeeds if at word end.  */
19558186Selan     wordbound,   /* Succeeds if at a word boundary.  */
19658186Selan     notwordbound,/* Succeeds if not at a word boundary.  */
19758186Selan #ifdef emacs
19858186Selan     syntaxspec,  /* Matches any character whose syntax is specified.
19958186Selan 		    followed by a byte which contains a syntax code,
20058186Selan                     e.g., Sword.  */
20158186Selan     notsyntaxspec /* Matches any character whose syntax differs from
20258186Selan                      that specified.  */
20358186Selan #endif
20458186Selan   };
20558186Selan 
20658186Selan 
20758186Selan /* Number of failure points to allocate space for initially,
20858186Selan    when matching.  If this number is exceeded, more space is allocated,
20958186Selan    so it is not a hard limit.  */
21058186Selan 
21158186Selan #ifndef NFAILURES
21258186Selan #define NFAILURES 80
21358186Selan #endif
21458186Selan 
21558186Selan 
21658186Selan #ifndef SIGN_EXTEND_CHAR
21758186Selan #ifdef __STDC__
21858186Selan #define SIGN_EXTEND_CHAR(c) ((signed char)(c))
21958186Selan #else
22058186Selan #define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele.  */
22158186Selan #endif
22258186Selan #endif /* not SIGN_EXTEND_CHAR */
22358186Selan 
22458186Selan /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
22558186Selan #define STORE_NUMBER(destination, number)				\
22658186Selan   { (destination)[0] = (number) & 0377;					\
22758186Selan     (destination)[1] = (number) >> 8; }
22858186Selan 
22958186Selan /* Same as STORE_NUMBER, except increment the destination pointer to
23058186Selan    the byte after where the number is stored.  Watch out that values for
23158186Selan    DESTINATION such as p + 1 won't work, whereas p will.  */
23258186Selan #define STORE_NUMBER_AND_INCR(destination, number)			\
23358186Selan   { STORE_NUMBER(destination, number);					\
23458186Selan     (destination) += 2; }
23558186Selan 
23658186Selan 
23758186Selan /* Put into DESTINATION a number stored in two contingous bytes starting
23858186Selan    at SOURCE.  */
23958186Selan #define EXTRACT_NUMBER(destination, source)				\
24058186Selan   { (destination) = *(source) & 0377;					\
24158186Selan     (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
24258186Selan 
24358186Selan /* Same as EXTRACT_NUMBER, except increment the pointer for source to
24458186Selan    point to second byte of SOURCE.  Note that SOURCE has to be a value
24558186Selan    such as p, not, e.g., p + 1. */
24658186Selan #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
24758186Selan   { EXTRACT_NUMBER (destination, source);				\
24858186Selan     (source) += 2; }
24958186Selan 
25058186Selan 
25158186Selan /* Specify the precise syntax of regexps for compilation.  This provides
25258186Selan    for compatibility for various utilities which historically have
25358186Selan    different, incompatible syntaxes.
25458186Selan 
25558186Selan    The argument SYNTAX is a bit-mask comprised of the various bits
25658186Selan    defined in regex.h.  */
25758186Selan 
25858186Selan int
re_set_syntax(int syntax)25958186Selan re_set_syntax (int syntax)
26058186Selan {
26158186Selan   int ret;
26258186Selan 
26358186Selan   ret = obscure_syntax;
26458186Selan   obscure_syntax = syntax;
26558186Selan   return ret;
26658186Selan }
26758186Selan 
26858186Selan /* Set by re_set_syntax to the current regexp syntax to recognize.  */
26958186Selan int obscure_syntax = 0;
27058186Selan 
27158186Selan 
27258186Selan 
27358186Selan /* Macros for re_compile_pattern, which is found below these definitions.  */
27458186Selan 
27558186Selan #define CHAR_CLASS_MAX_LENGTH  6
27658186Selan 
27758186Selan /* Fetch the next character in the uncompiled pattern, translating it if
27858186Selan    necessary.  */
27958186Selan #define PATFETCH(c)							\
28058186Selan   {if (p == pend) goto end_of_pattern;					\
28158186Selan   c = * (const unsigned char *) p++;						\
28258186Selan   if (translate) c = translate[c]; }
28358186Selan 
28458186Selan /* Fetch the next character in the uncompiled pattern, with no
28558186Selan    translation.  */
28658186Selan #define PATFETCH_RAW(c)							\
28758186Selan  {if (p == pend) goto end_of_pattern;					\
28858186Selan   c = * (const unsigned char *) p++; }
28958186Selan 
29058186Selan #define PATUNFETCH p--
29158186Selan 
29258186Selan 
29358186Selan /* If the buffer isn't allocated when it comes in, use this.  */
29458186Selan #define INIT_BUF_SIZE  28
29558186Selan 
29658186Selan /* Make sure we have at least N more bytes of space in buffer.  */
29758186Selan #define GET_BUFFER_SPACE(n)						\
29858186Selan   {								        \
29958186Selan     while (b - bufp->buffer + (n) >= bufp->allocated)			\
30058186Selan       EXTEND_BUFFER;							\
30158186Selan   }
30258186Selan 
30358186Selan /* Make sure we have one more byte of buffer space and then add CH to it.  */
30458186Selan #define BUFPUSH(ch)							\
30558186Selan   {									\
30658186Selan     GET_BUFFER_SPACE (1);						\
30758186Selan     *b++ = (char) (ch);							\
30858186Selan   }
30958186Selan 
31058186Selan /* Extend the buffer by twice its current size via reallociation and
31158186Selan    reset the pointers that pointed into the old allocation to point to
31258186Selan    the correct places in the new allocation.  If extending the buffer
31358186Selan    results in it being larger than 1 << 16, then flag memory exhausted.  */
31458186Selan #define EXTEND_BUFFER							\
31558186Selan   { char *old_buffer = bufp->buffer;					\
31658186Selan     if (bufp->allocated == (1L<<16)) goto too_big;			\
31758186Selan     bufp->allocated *= 2;						\
31858186Selan     if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16);		\
31958186Selan     bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);	\
32058186Selan     if (bufp->buffer == 0)						\
32158186Selan       goto memory_exhausted;						\
32258186Selan     b = (b - old_buffer) + bufp->buffer;				\
32358186Selan     if (fixup_jump)							\
32458186Selan       fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;		\
32558186Selan     if (laststart)							\
32658186Selan       laststart = (laststart - old_buffer) + bufp->buffer;		\
32758186Selan     begalt = (begalt - old_buffer) + bufp->buffer;			\
32858186Selan     if (pending_exact)							\
32958186Selan       pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
33058186Selan   }
33158186Selan 
33258186Selan /* Set the bit for character C in a character set list.  */
33358186Selan #define SET_LIST_BIT(c)  (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
33458186Selan 
33558186Selan /* Get the next unsigned number in the uncompiled pattern.  */
33658186Selan #define GET_UNSIGNED_NUMBER(num) 					\
33758186Selan   { if (p != pend) 							\
33858186Selan       { 								\
33958186Selan         PATFETCH (c); 							\
34058186Selan 	while (isdigit (c)) 						\
34158186Selan 	  { 								\
34258186Selan 	    if (num < 0) 						\
34358186Selan 	       num = 0; 						\
34458186Selan             num = num * 10 + c - '0'; 					\
34558186Selan 	    if (p == pend) 						\
34658186Selan 	       break; 							\
34758186Selan 	    PATFETCH (c); 						\
34858186Selan 	  } 								\
34958186Selan         } 								\
35058186Selan   }
35158186Selan 
35258186Selan /* Subroutines for re_compile_pattern.  */
35358186Selan static void store_jump (char *from, char opcode, char *to);
35458186Selan static void insert_jump (char op, char *from, char *to, char *current_end);
35558186Selan static void store_jump_n  (char *from, char opcode, char *to, unsigned n);
35658186Selan static void insert_jump_n (char, char *, char *, char *, unsigned);
35758186Selan static void insert_op_2 (char, char *, char *_end, int, int);
35858186Selan 
35958186Selan 
36058186Selan /* re_compile_pattern takes a regular-expression string
36158186Selan    and converts it into a buffer full of byte commands for matching.
36258186Selan 
36358186Selan    PATTERN   is the address of the pattern string
36458186Selan    SIZE      is the length of it.
36558186Selan    BUFP	    is a  struct re_pattern_buffer *  which points to the info
36658186Selan 	     on where to store the byte commands.
36758186Selan 	     This structure contains a  char *  which points to the
36858186Selan 	     actual space, which should have been obtained with malloc.
36958186Selan 	     re_compile_pattern may use realloc to grow the buffer space.
37058186Selan 
37158186Selan    The number of bytes of commands can be found out by looking in
37258186Selan    the `struct re_pattern_buffer' that bufp pointed to, after
37358186Selan    re_compile_pattern returns. */
37458186Selan 
37558186Selan char *
re_compile_pattern(const char * pattern,int size,struct re_pattern_buffer * bufp)37658186Selan re_compile_pattern (const char *pattern, int size, struct re_pattern_buffer *bufp)
37758186Selan {
37858186Selan   register char *b = bufp->buffer;
37958186Selan   register const char *p = pattern;
38058186Selan   const char *pend = pattern + size;
38158186Selan   register unsigned c, c1;
38258186Selan   const char *p1;
38358186Selan   unsigned char *translate = (unsigned char *) bufp->translate;
38458186Selan 
38558186Selan   /* Address of the count-byte of the most recently inserted `exactn'
38658186Selan      command.  This makes it possible to tell whether a new exact-match
38758186Selan      character can be added to that command or requires a new `exactn'
38858186Selan      command.  */
38958186Selan 
39058186Selan   char *pending_exact = 0;
39158186Selan 
39258186Selan   /* Address of the place where a forward-jump should go to the end of
39358186Selan      the containing expression.  Each alternative of an `or', except the
39458186Selan      last, ends with a forward-jump of this sort.  */
39558186Selan 
39658186Selan   char *fixup_jump = 0;
39758186Selan 
39858186Selan   /* Address of start of the most recently finished expression.
39958186Selan      This tells postfix * where to find the start of its operand.  */
40058186Selan 
40158186Selan   char *laststart = 0;
40258186Selan 
40358186Selan   /* In processing a repeat, 1 means zero matches is allowed.  */
40458186Selan 
40558186Selan   char zero_times_ok;
40658186Selan 
40758186Selan   /* In processing a repeat, 1 means many matches is allowed.  */
40858186Selan 
40958186Selan   char many_times_ok;
41058186Selan 
41158186Selan   /* Address of beginning of regexp, or inside of last \(.  */
41258186Selan 
41358186Selan   char *begalt = b;
41458186Selan 
41558186Selan   /* In processing an interval, at least this many matches must be made.  */
41658186Selan   int lower_bound;
41758186Selan 
41858186Selan   /* In processing an interval, at most this many matches can be made.  */
41958186Selan   int upper_bound;
42058186Selan 
42158186Selan   /* Place in pattern (i.e., the {) to which to go back if the interval
42258186Selan      is invalid.  */
42358186Selan   const char *beg_interval = 0;
42458186Selan 
42558186Selan   /* Stack of information saved by \( and restored by \).
42658186Selan      Four stack elements are pushed by each \(:
42758186Selan        First, the value of b.
42858186Selan        Second, the value of fixup_jump.
42958186Selan        Third, the value of regnum.
43058186Selan        Fourth, the value of begalt.  */
43158186Selan 
43258186Selan   int stackb[40];
43358186Selan   int *stackp = stackb;
43458186Selan   int *stacke = stackb + 40;
43558186Selan   int *stackt;
43658186Selan 
43758186Selan   /* Counts \('s as they are encountered.  Remembered for the matching \),
43858186Selan      where it becomes the register number to put in the stop_memory
43958186Selan      command.  */
44058186Selan 
44158186Selan   unsigned regnum = 1;
44258186Selan 
44358186Selan   bufp->fastmap_accurate = 0;
44458186Selan 
44558186Selan #ifndef emacs
44658186Selan #ifndef SYNTAX_TABLE
44758186Selan   /* Initialize the syntax table.  */
44858186Selan    init_syntax_once();
44958186Selan #endif
45058186Selan #endif
45158186Selan 
45258186Selan   if (bufp->allocated == 0)
45358186Selan     {
45458186Selan       bufp->allocated = INIT_BUF_SIZE;
45558186Selan       if (bufp->buffer)
45658186Selan 	/* EXTEND_BUFFER loses when bufp->allocated is 0.  */
45758186Selan 	bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
45858186Selan       else
45958186Selan 	/* Caller did not allocate a buffer.  Do it for them.  */
46058186Selan 	bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
46158186Selan       if (!bufp->buffer) goto memory_exhausted;
46258186Selan       begalt = b = bufp->buffer;
46358186Selan     }
46458186Selan 
46558186Selan   while (p != pend)
46658186Selan     {
46758186Selan       PATFETCH (c);
46858186Selan 
46958186Selan       switch (c)
47058186Selan 	{
47158186Selan 	case '$':
47258186Selan 	  {
47358186Selan 	    const char *p1 = p;
47458186Selan 	    /* When testing what follows the $,
47558186Selan 	       look past the \-constructs that don't consume anything.  */
47658186Selan 	    if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
47758186Selan 	      while (p1 != pend)
47858186Selan 		{
47958186Selan 		  if (*p1 == '\\' && p1 + 1 != pend
48058186Selan 		      && (p1[1] == '<' || p1[1] == '>'
48158186Selan 			  || p1[1] == '`' || p1[1] == '\''
48258186Selan #ifdef emacs
48358186Selan 			  || p1[1] == '='
48458186Selan #endif
48558186Selan 			  || p1[1] == 'b' || p1[1] == 'B'))
48658186Selan 		    p1 += 2;
48758186Selan 		  else
48858186Selan 		    break;
48958186Selan 		}
49058186Selan             if (obscure_syntax & RE_TIGHT_VBAR)
49158186Selan 	      {
49258186Selan 		if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
49358186Selan 		  goto normal_char;
49458186Selan 		/* Make operand of last vbar end before this `$'.  */
49558186Selan 		if (fixup_jump)
49658186Selan 		  store_jump (fixup_jump, jump, b);
49758186Selan 		fixup_jump = 0;
49858186Selan 		BUFPUSH (endline);
49958186Selan 		break;
50058186Selan 	      }
50158186Selan 	    /* $ means succeed if at end of line, but only in special contexts.
50258186Selan 	      If validly in the middle of a pattern, it is a normal character. */
50358186Selan 
50458186Selan             if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
50558186Selan 	      goto invalid_pattern;
50658186Selan 	    if (p1 == pend || *p1 == '\n'
50758186Selan 		|| (obscure_syntax & RE_CONTEXT_INDEP_OPS)
50858186Selan 		|| (obscure_syntax & RE_NO_BK_PARENS
50958186Selan 		    ? *p1 == ')'
51058186Selan 		    : *p1 == '\\' && p1[1] == ')')
51158186Selan 		|| (obscure_syntax & RE_NO_BK_VBAR
51258186Selan 		    ? *p1 == '|'
51358186Selan 		    : *p1 == '\\' && p1[1] == '|'))
51458186Selan 	      {
51558186Selan 		BUFPUSH (endline);
51658186Selan 		break;
51758186Selan 	      }
51858186Selan 	    goto normal_char;
51958186Selan           }
52058186Selan 	case '^':
52158186Selan 	  /* ^ means succeed if at beg of line, but only if no preceding
52258186Selan              pattern.  */
52358186Selan 
52458186Selan           if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
52558186Selan             goto invalid_pattern;
52658186Selan           if (laststart && p - 2 >= pattern && p[-2] != '\n'
52758186Selan 	       && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
52858186Selan 	    goto normal_char;
52958186Selan 	  if (obscure_syntax & RE_TIGHT_VBAR)
53058186Selan 	    {
53158186Selan 	      if (p != pattern + 1
53258186Selan 		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
53358186Selan 		goto normal_char;
53458186Selan 	      BUFPUSH (begline);
53558186Selan 	      begalt = b;
53658186Selan 	    }
53758186Selan 	  else
53858186Selan 	    BUFPUSH (begline);
53958186Selan 	  break;
54058186Selan 
54158186Selan 	case '+':
54258186Selan 	case '?':
54358186Selan 	  if ((obscure_syntax & RE_BK_PLUS_QM)
54458186Selan 	      || (obscure_syntax & RE_LIMITED_OPS))
54558186Selan 	    goto normal_char;
54658186Selan 	handle_plus:
54758186Selan 	case '*':
54858186Selan 	  /* If there is no previous pattern, char not special. */
54958186Selan 	  if (!laststart)
55058186Selan             {
55158186Selan               if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
55258186Selan                 goto invalid_pattern;
55358186Selan               else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
55458186Selan 		goto normal_char;
55558186Selan             }
55658186Selan 	  /* If there is a sequence of repetition chars,
55758186Selan 	     collapse it down to just one.  */
55858186Selan 	  zero_times_ok = 0;
55958186Selan 	  many_times_ok = 0;
56058186Selan 	  while (1)
56158186Selan 	    {
56258186Selan 	      zero_times_ok |= c != '+';
56358186Selan 	      many_times_ok |= c != '?';
56458186Selan 	      if (p == pend)
56558186Selan 		break;
56658186Selan 	      PATFETCH (c);
56758186Selan 	      if (c == '*')
56858186Selan 		;
56958186Selan 	      else if (!(obscure_syntax & RE_BK_PLUS_QM)
57058186Selan 		       && (c == '+' || c == '?'))
57158186Selan 		;
57258186Selan 	      else if ((obscure_syntax & RE_BK_PLUS_QM)
57358186Selan 		       && c == '\\')
57458186Selan 		{
57558186Selan 		  int c1;
57658186Selan 		  PATFETCH (c1);
57758186Selan 		  if (!(c1 == '+' || c1 == '?'))
57858186Selan 		    {
57958186Selan 		      PATUNFETCH;
58058186Selan 		      PATUNFETCH;
58158186Selan 		      break;
58258186Selan 		    }
58358186Selan 		  c = c1;
58458186Selan 		}
58558186Selan 	      else
58658186Selan 		{
58758186Selan 		  PATUNFETCH;
58858186Selan 		  break;
58958186Selan 		}
59058186Selan 	    }
59158186Selan 
59258186Selan 	  /* Star, etc. applied to an empty pattern is equivalent
59358186Selan 	     to an empty pattern.  */
59458186Selan 	  if (!laststart)
59558186Selan 	    break;
59658186Selan 
59758186Selan 	  /* Now we know whether or not zero matches is allowed
59858186Selan 	     and also whether or not two or more matches is allowed.  */
59958186Selan 	  if (many_times_ok)
60058186Selan 	    {
60158186Selan 	      /* If more than one repetition is allowed, put in at the
60258186Selan                  end a backward relative jump from b to before the next
60358186Selan                  jump we're going to put in below (which jumps from
60458186Selan                  laststart to after this jump).  */
60558186Selan               GET_BUFFER_SPACE (3);
60658186Selan 	      store_jump (b, maybe_finalize_jump, laststart - 3);
60758186Selan 	      b += 3;  	/* Because store_jump put stuff here.  */
60858186Selan 	    }
60958186Selan           /* On failure, jump from laststart to b + 3, which will be the
61058186Selan              end of the buffer after this jump is inserted.  */
61158186Selan           GET_BUFFER_SPACE (3);
61258186Selan 	  insert_jump (on_failure_jump, laststart, b + 3, b);
61358186Selan 	  pending_exact = 0;
61458186Selan 	  b += 3;
61558186Selan 	  if (!zero_times_ok)
61658186Selan 	    {
61758186Selan 	      /* At least one repetition is required, so insert a
61858186Selan                  dummy-failure before the initial on-failure-jump
61958186Selan                  instruction of the loop. This effects a skip over that
62058186Selan                  instruction the first time we hit that loop.  */
62158186Selan               GET_BUFFER_SPACE (6);
62258186Selan               insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
62358186Selan 	      b += 3;
62458186Selan 	    }
62558186Selan 	  break;
62658186Selan 
62758186Selan 	case '.':
62858186Selan 	  laststart = b;
62958186Selan 	  BUFPUSH (anychar);
63058186Selan 	  break;
63158186Selan 
63258186Selan         case '[':
63358186Selan           if (p == pend)
63458186Selan             goto invalid_pattern;
63558186Selan 	  while (b - bufp->buffer
63658186Selan 		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
63758186Selan 	    EXTEND_BUFFER;
63858186Selan 
63958186Selan 	  laststart = b;
64058186Selan 	  if (*p == '^')
64158186Selan 	    {
64258186Selan               BUFPUSH (charset_not);
64358186Selan               p++;
64458186Selan             }
64558186Selan 	  else
64658186Selan 	    BUFPUSH (charset);
64758186Selan 	  p1 = p;
64858186Selan 
64958186Selan 	  BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
65058186Selan 	  /* Clear the whole map */
65158186Selan 	  memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
65258186Selan 
65358186Selan 	  if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
65458186Selan             SET_LIST_BIT ('\n');
65558186Selan 
65658186Selan 
65758186Selan 	  /* Read in characters and ranges, setting map bits.  */
65858186Selan 	  while (1)
65958186Selan 	    {
66058186Selan 	      /* Don't translate while fetching, in case it's a range bound.
66158186Selan 		 When we set the bit for the character, we translate it.  */
66258186Selan 	      PATFETCH_RAW (c);
66358186Selan 
66458186Selan 	      /* If set, \ escapes characters when inside [...].  */
66558186Selan 	      if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
66658186Selan 	        {
66758186Selan 	          PATFETCH(c1);
66858186Selan                   SET_LIST_BIT (c1);
66958186Selan 	          continue;
67058186Selan 	        }
67158186Selan               if (c == ']')
67258186Selan                 {
67358186Selan                   if (p == p1 + 1)
67458186Selan                     {
67558186Selan 		      /* If this is an empty bracket expression.  */
67658186Selan                       if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
67758186Selan                           && p == pend)
67858186Selan                         goto invalid_pattern;
67958186Selan                     }
68058186Selan                   else
68158186Selan 		    /* Stop if this isn't merely a ] inside a bracket
68258186Selan                        expression, but rather the end of a bracket
68358186Selan                        expression.  */
68458186Selan                     break;
68558186Selan                 }
68658186Selan               /* Get a range.  */
68758186Selan               if (p[0] == '-' && p[1] != ']')
68858186Selan 		{
68958186Selan                   PATFETCH (c1);
69058186Selan 		  /* Don't translate the range bounds while fetching them.  */
69158186Selan 		  PATFETCH_RAW (c1);
69258186Selan 
69358186Selan 		  if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
69458186Selan                     goto invalid_pattern;
69558186Selan 
69658186Selan 		  if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
69758186Selan                       && c1 == '-' && *p != ']')
69858186Selan                     goto invalid_pattern;
69958186Selan 
70058186Selan                   while (c <= c1)
70158186Selan 		    {
70258186Selan 		      /* Translate each char that's in the range.  */
70358186Selan 		      if (translate)
70458186Selan 			SET_LIST_BIT (translate[c]);
70558186Selan 		      else
70658186Selan 			SET_LIST_BIT (c);
70758186Selan                       c++;
70858186Selan 		    }
70958186Selan                 }
71058186Selan 	      else if ((obscure_syntax & RE_CHAR_CLASSES)
71158186Selan 			&&  c == '[' && p[0] == ':')
71258186Selan                 {
71358186Selan 		  /* Longest valid character class word has six characters.  */
71458186Selan                   char str[CHAR_CLASS_MAX_LENGTH];
71558186Selan 		  PATFETCH (c);
71658186Selan 		  c1 = 0;
71758186Selan 		  /* If no ] at end.  */
71858186Selan                   if (p == pend)
71958186Selan                     goto invalid_pattern;
72058186Selan 		  while (1)
72158186Selan 		    {
72258186Selan 		      /* Don't translate the ``character class'' characters.  */
72358186Selan                       PATFETCH_RAW (c);
72458186Selan 		      if (c == ':' || c == ']' || p == pend
72558186Selan                           || c1 == CHAR_CLASS_MAX_LENGTH)
72658186Selan 		        break;
72758186Selan 		      str[c1++] = c;
72858186Selan 		    }
72958186Selan 		  str[c1] = '\0';
73058186Selan 		  if (p == pend
73158186Selan 		      || c == ']'	/* End of the bracket expression.  */
73258186Selan                       || p[0] != ']'
73358186Selan 		      || p + 1 == pend
73458186Selan                       || (strcmp (str, "alpha") != 0
73558186Selan                           && strcmp (str, "upper") != 0
73658186Selan 			  && strcmp (str, "lower") != 0
73758186Selan                           && strcmp (str, "digit") != 0
73858186Selan 			  && strcmp (str, "alnum") != 0
73958186Selan                           && strcmp (str, "xdigit") != 0
74058186Selan 			  && strcmp (str, "space") != 0
74158186Selan                           && strcmp (str, "print") != 0
74258186Selan 			  && strcmp (str, "punct") != 0
74358186Selan                           && strcmp (str, "graph") != 0
74458186Selan 			  && strcmp (str, "cntrl") != 0))
74558186Selan 		    {
74658186Selan 		       /* Undo the ending character, the letters, and leave
74758186Selan                           the leading : and [ (but set bits for them).  */
74858186Selan                       c1++;
74958186Selan 		      while (c1--)
75058186Selan 			PATUNFETCH;
75158186Selan 		      SET_LIST_BIT ('[');
75258186Selan 		      SET_LIST_BIT (':');
75358186Selan 	            }
75458186Selan                   else
75558186Selan                     {
75658186Selan                       /* The ] at the end of the character class.  */
75758186Selan                       PATFETCH (c);
75858186Selan                       if (c != ']')
75958186Selan                         goto invalid_pattern;
76058186Selan 		      for (c = 0; c < (1 << BYTEWIDTH); c++)
76158186Selan 			{
76258186Selan 			  if ((strcmp (str, "alpha") == 0  && isalpha (c))
76358186Selan 			       || (strcmp (str, "upper") == 0  && isupper (c))
76458186Selan 			       || (strcmp (str, "lower") == 0  && islower (c))
76558186Selan 			       || (strcmp (str, "digit") == 0  && isdigit (c))
76658186Selan 			       || (strcmp (str, "alnum") == 0  && isalnum (c))
76758186Selan 			       || (strcmp (str, "xdigit") == 0  && isxdigit (c))
76858186Selan 			       || (strcmp (str, "space") == 0  && isspace (c))
76958186Selan 			       || (strcmp (str, "print") == 0  && isprint (c))
77058186Selan 			       || (strcmp (str, "punct") == 0  && ispunct (c))
77158186Selan 			       || (strcmp (str, "graph") == 0  && isgraph (c))
77258186Selan 			       || (strcmp (str, "cntrl") == 0  && iscntrl (c)))
77358186Selan 			    SET_LIST_BIT (c);
77458186Selan 			}
77558186Selan 		    }
77658186Selan                 }
77758186Selan               else if (translate)
77858186Selan 		SET_LIST_BIT (translate[c]);
77958186Selan 	      else
78058186Selan                 SET_LIST_BIT (c);
78158186Selan 	    }
78258186Selan 
78358186Selan           /* Discard any character set/class bitmap bytes that are all
78458186Selan              0 at the end of the map. Decrement the map-length byte too.  */
78558186Selan           while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
78658186Selan             b[-1]--;
78758186Selan           b += b[-1];
78858186Selan           break;
78958186Selan 
79058186Selan 	case '(':
79158186Selan 	  if (! (obscure_syntax & RE_NO_BK_PARENS))
79258186Selan 	    goto normal_char;
79358186Selan 	  else
79458186Selan 	    goto handle_open;
79558186Selan 
79658186Selan 	case ')':
79758186Selan 	  if (! (obscure_syntax & RE_NO_BK_PARENS))
79858186Selan 	    goto normal_char;
79958186Selan 	  else
80058186Selan 	    goto handle_close;
80158186Selan 
80258186Selan         case '\n':
80358186Selan 	  if (! (obscure_syntax & RE_NEWLINE_OR))
80458186Selan 	    goto normal_char;
80558186Selan 	  else
80658186Selan 	    goto handle_bar;
80758186Selan 
80858186Selan 	case '|':
80958186Selan 	  if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
81058186Selan               && (! laststart  ||  p == pend))
81158186Selan 	    goto invalid_pattern;
81258186Selan           else if (! (obscure_syntax & RE_NO_BK_VBAR))
81358186Selan 	    goto normal_char;
81458186Selan 	  else
81558186Selan 	    goto handle_bar;
81658186Selan 
81758186Selan 	case '{':
81858186Selan            if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
81958186Selan                   && (obscure_syntax & RE_INTERVALS)))
82058186Selan              goto normal_char;
82158186Selan            else
82258186Selan              goto handle_interval;
82358186Selan 
82458186Selan         case '\\':
82558186Selan 	  if (p == pend) goto invalid_pattern;
82658186Selan 	  PATFETCH_RAW (c);
82758186Selan 	  switch (c)
82858186Selan 	    {
82958186Selan 	    case '(':
83058186Selan 	      if (obscure_syntax & RE_NO_BK_PARENS)
83158186Selan 		goto normal_backsl;
83258186Selan 	    handle_open:
83358186Selan 	      if (stackp == stacke) goto nesting_too_deep;
83458186Selan 
83558186Selan               /* Laststart should point to the start_memory that we are about
83658186Selan                  to push (unless the pattern has RE_NREGS or more ('s).  */
83758186Selan               *stackp++ = b - bufp->buffer;
83858186Selan 	      if (regnum < RE_NREGS)
83958186Selan 	        {
84058186Selan 		  BUFPUSH (start_memory);
84158186Selan 		  BUFPUSH (regnum);
84258186Selan 	        }
84358186Selan 	      *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
84458186Selan 	      *stackp++ = regnum++;
84558186Selan 	      *stackp++ = begalt - bufp->buffer;
84658186Selan 	      fixup_jump = 0;
84758186Selan 	      laststart = 0;
84858186Selan 	      begalt = b;
84958186Selan 	      break;
85058186Selan 
85158186Selan 	    case ')':
85258186Selan 	      if (obscure_syntax & RE_NO_BK_PARENS)
85358186Selan 		goto normal_backsl;
85458186Selan 	    handle_close:
85558186Selan 	      if (stackp == stackb) goto unmatched_close;
85658186Selan 	      begalt = *--stackp + bufp->buffer;
85758186Selan 	      if (fixup_jump)
85858186Selan 		store_jump (fixup_jump, jump, b);
85958186Selan 	      if (stackp[-1] < RE_NREGS)
86058186Selan 		{
86158186Selan 		  BUFPUSH (stop_memory);
86258186Selan 		  BUFPUSH (stackp[-1]);
86358186Selan 		}
86458186Selan 	      stackp -= 2;
86558186Selan               fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
86658186Selan               laststart = *--stackp + bufp->buffer;
86758186Selan 	      break;
86858186Selan 
86958186Selan 	    case '|':
87058186Selan               if ((obscure_syntax & RE_LIMITED_OPS)
87158186Selan 	          || (obscure_syntax & RE_NO_BK_VBAR))
87258186Selan 		goto normal_backsl;
87358186Selan 	    handle_bar:
87458186Selan               if (obscure_syntax & RE_LIMITED_OPS)
87558186Selan                 goto normal_char;
87658186Selan 	      /* Insert before the previous alternative a jump which
87758186Selan                  jumps to this alternative if the former fails.  */
87858186Selan               GET_BUFFER_SPACE (6);
87958186Selan               insert_jump (on_failure_jump, begalt, b + 6, b);
88058186Selan 	      pending_exact = 0;
88158186Selan 	      b += 3;
88258186Selan 	      /* The alternative before the previous alternative has a
88358186Selan                  jump after it which gets executed if it gets matched.
88458186Selan                  Adjust that jump so it will jump to the previous
88558186Selan                  alternative's analogous jump (put in below, which in
88658186Selan                  turn will jump to the next (if any) alternative's such
88758186Selan                  jump, etc.).  The last such jump jumps to the correct
88858186Selan                  final destination.  */
88958186Selan               if (fixup_jump)
89058186Selan 		store_jump (fixup_jump, jump, b);
89158186Selan 
89258186Selan 	      /* Leave space for a jump after previous alternative---to be
89358186Selan                  filled in later.  */
89458186Selan               fixup_jump = b;
89558186Selan               b += 3;
89658186Selan 
89758186Selan               laststart = 0;
89858186Selan 	      begalt = b;
89958186Selan 	      break;
90058186Selan 
90158186Selan             case '{':
90258186Selan               if (! (obscure_syntax & RE_INTERVALS)
90358186Selan 		  /* Let \{ be a literal.  */
90458186Selan                   || ((obscure_syntax & RE_INTERVALS)
90558186Selan                       && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
90658186Selan 		  /* If it's the string "\{".  */
90758186Selan 		  || (p - 2 == pattern  &&  p == pend))
90858186Selan                 goto normal_backsl;
90958186Selan             handle_interval:
91058186Selan 	      beg_interval = p - 1;		/* The {.  */
91158186Selan               /* If there is no previous pattern, this isn't an interval.  */
91258186Selan 	      if (!laststart)
91358186Selan 	        {
91458186Selan                   if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
91558186Selan 		    goto invalid_pattern;
91658186Selan                   else
91758186Selan                     goto normal_backsl;
91858186Selan                 }
91958186Selan               /* It also isn't an interval if not preceded by an re
92058186Selan                  matching a single character or subexpression, or if
92158186Selan                  the current type of intervals can't handle back
92258186Selan                  references and the previous thing is a back reference.  */
92358186Selan               if (! (*laststart == anychar
92458186Selan 		     || *laststart == charset
92558186Selan 		     || *laststart == charset_not
92658186Selan 		     || *laststart == start_memory
92758186Selan 		     || (*laststart == exactn  &&  laststart[1] == 1)
92858186Selan 		     || (! (obscure_syntax & RE_NO_BK_REFS)
92958186Selan                          && *laststart == duplicate)))
93058186Selan                 {
93158186Selan                   if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
93258186Selan                     goto normal_char;
93358186Selan 
93458186Selan 		  /* Posix extended syntax is handled in previous
93558186Selan                      statement; this is for Posix basic syntax.  */
93658186Selan                   if (obscure_syntax & RE_INTERVALS)
93758186Selan                     goto invalid_pattern;
93858186Selan 
93958186Selan                   goto normal_backsl;
94058186Selan 		}
94158186Selan               lower_bound = -1;			/* So can see if are set.  */
94258186Selan 	      upper_bound = -1;
94358186Selan               GET_UNSIGNED_NUMBER (lower_bound);
94458186Selan 	      if (c == ',')
94558186Selan 		{
94658186Selan 		  GET_UNSIGNED_NUMBER (upper_bound);
94758186Selan 		  if (upper_bound < 0)
94858186Selan 		    upper_bound = RE_DUP_MAX;
94958186Selan 		}
95058186Selan 	      if (upper_bound < 0)
95158186Selan 		upper_bound = lower_bound;
95258186Selan               if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
95358186Selan                 {
95458186Selan                   if (c != '\\')
95558186Selan                     goto invalid_pattern;
95658186Selan                   PATFETCH (c);
95758186Selan                 }
95858186Selan 	      if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
95958186Selan 		  || lower_bound > upper_bound
96058186Selan                   || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
96158186Selan 		      && p != pend  && *p == '{'))
96258186Selan 	        {
96358186Selan 		  if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
96458186Selan                     goto unfetch_interval;
96558186Selan                   else
96658186Selan                     goto invalid_pattern;
96758186Selan 		}
96858186Selan 
96958186Selan 	      /* If upper_bound is zero, don't want to succeed at all;
97058186Selan  		 jump from laststart to b + 3, which will be the end of
97158186Selan                  the buffer after this jump is inserted.  */
97258186Selan 
97358186Selan                if (upper_bound == 0)
97458186Selan                  {
97558186Selan                    GET_BUFFER_SPACE (3);
97658186Selan                    insert_jump (jump, laststart, b + 3, b);
97758186Selan                    b += 3;
97858186Selan                  }
97958186Selan 
98058186Selan                /* Otherwise, after lower_bound number of succeeds, jump
98158186Selan                   to after the jump_n which will be inserted at the end
98258186Selan                   of the buffer, and insert that jump_n.  */
98358186Selan                else
98458186Selan 		 { /* Set to 5 if only one repetition is allowed and
98558186Selan 	              hence no jump_n is inserted at the current end of
98658186Selan                       the buffer; then only space for the succeed_n is
98758186Selan                       needed.  Otherwise, need space for both the
98858186Selan                       succeed_n and the jump_n.  */
98958186Selan 
99058186Selan                    unsigned slots_needed = upper_bound == 1 ? 5 : 10;
99158186Selan 
99258186Selan                    GET_BUFFER_SPACE ((int) slots_needed);
99358186Selan                    /* Initialize the succeed_n to n, even though it will
99458186Selan                       be set by its attendant set_number_at, because
99558186Selan                       re_compile_fastmap will need to know it.  Jump to
99658186Selan                       what the end of buffer will be after inserting
99758186Selan                       this succeed_n and possibly appending a jump_n.  */
99858186Selan                    insert_jump_n (succeed_n, laststart, b + slots_needed,
99958186Selan 		                  b, lower_bound);
100058186Selan                    b += 5; 	/* Just increment for the succeed_n here.  */
100158186Selan 
100258186Selan 		  /* More than one repetition is allowed, so put in at
100358186Selan 		     the end of the buffer a backward jump from b to the
100458186Selan                      succeed_n we put in above.  By the time we've gotten
100558186Selan                      to this jump when matching, we'll have matched once
100658186Selan                      already, so jump back only upper_bound - 1 times.  */
100758186Selan 
100858186Selan                    if (upper_bound > 1)
100958186Selan                      {
101058186Selan                        store_jump_n (b, jump_n, laststart, upper_bound - 1);
101158186Selan                        b += 5;
101258186Selan                        /* When hit this when matching, reset the
101358186Selan                           preceding jump_n's n to upper_bound - 1.  */
101458186Selan                        BUFPUSH (set_number_at);
101558186Selan 		       GET_BUFFER_SPACE (2);
101658186Selan                        STORE_NUMBER_AND_INCR (b, -5);
101758186Selan                        STORE_NUMBER_AND_INCR (b, upper_bound - 1);
101858186Selan                      }
101958186Selan 		   /* When hit this when matching, set the succeed_n's n.  */
102058186Selan                    GET_BUFFER_SPACE (5);
102158186Selan 		   insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
102258186Selan                    b += 5;
102358186Selan                  }
102458186Selan               pending_exact = 0;
102558186Selan 	      beg_interval = 0;
102658186Selan               break;
102758186Selan 
102858186Selan 
102958186Selan             unfetch_interval:
103058186Selan 	      /* If an invalid interval, match the characters as literals.  */
103158186Selan 	       if (beg_interval)
103258186Selan                  p = beg_interval;
103358186Selan   	       else
103458186Selan                  {
103558186Selan                    fprintf (stderr,
103658186Selan 		      "regex: no interval beginning to which to backtrack.\n");
103758186Selan 		   exit (1);
103858186Selan                  }
103958186Selan 
104058186Selan                beg_interval = 0;
104158186Selan                PATFETCH (c);		/* normal_char expects char in `c'.  */
104258186Selan 	       goto normal_char;
104358186Selan 	       break;
104458186Selan 
104558186Selan #ifdef emacs
104658186Selan 	    case '=':
104758186Selan 	      BUFPUSH (at_dot);
104858186Selan 	      break;
104958186Selan 
105058186Selan 	    case 's':
105158186Selan 	      laststart = b;
105258186Selan 	      BUFPUSH (syntaxspec);
105358186Selan 	      PATFETCH (c);
105458186Selan 	      BUFPUSH (syntax_spec_code[c]);
105558186Selan 	      break;
105658186Selan 
105758186Selan 	    case 'S':
105858186Selan 	      laststart = b;
105958186Selan 	      BUFPUSH (notsyntaxspec);
106058186Selan 	      PATFETCH (c);
106158186Selan 	      BUFPUSH (syntax_spec_code[c]);
106258186Selan 	      break;
106358186Selan #endif /* emacs */
106458186Selan 
106558186Selan 	    case 'w':
106658186Selan 	      laststart = b;
106758186Selan 	      BUFPUSH (wordchar);
106858186Selan 	      break;
106958186Selan 
107058186Selan 	    case 'W':
107158186Selan 	      laststart = b;
107258186Selan 	      BUFPUSH (notwordchar);
107358186Selan 	      break;
107458186Selan 
107558186Selan 	    case '<':
107658186Selan 	      BUFPUSH (wordbeg);
107758186Selan 	      break;
107858186Selan 
107958186Selan 	    case '>':
108058186Selan 	      BUFPUSH (wordend);
108158186Selan 	      break;
108258186Selan 
108358186Selan 	    case 'b':
108458186Selan 	      BUFPUSH (wordbound);
108558186Selan 	      break;
108658186Selan 
108758186Selan 	    case 'B':
108858186Selan 	      BUFPUSH (notwordbound);
108958186Selan 	      break;
109058186Selan 
109158186Selan 	    case '`':
109258186Selan 	      BUFPUSH (begbuf);
109358186Selan 	      break;
109458186Selan 
109558186Selan 	    case '\'':
109658186Selan 	      BUFPUSH (endbuf);
109758186Selan 	      break;
109858186Selan 
109958186Selan 	    case '1':
110058186Selan 	    case '2':
110158186Selan 	    case '3':
110258186Selan 	    case '4':
110358186Selan 	    case '5':
110458186Selan 	    case '6':
110558186Selan 	    case '7':
110658186Selan 	    case '8':
110758186Selan 	    case '9':
110858186Selan 	      if (obscure_syntax & RE_NO_BK_REFS)
110958186Selan                 goto normal_char;
111058186Selan               c1 = c - '0';
111158186Selan 	      if (c1 >= regnum)
111258186Selan 		{
111358186Selan   		  if (obscure_syntax & RE_NO_EMPTY_BK_REF)
111458186Selan                     goto invalid_pattern;
111558186Selan                   else
111658186Selan                     goto normal_char;
111758186Selan                 }
111858186Selan               /* Can't back reference to a subexpression if inside of it.  */
111958186Selan               for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
112058186Selan  		if (*stackt == c1)
112158186Selan 		  goto normal_char;
112258186Selan 	      laststart = b;
112358186Selan 	      BUFPUSH (duplicate);
112458186Selan 	      BUFPUSH (c1);
112558186Selan 	      break;
112658186Selan 
112758186Selan 	    case '+':
112858186Selan 	    case '?':
112958186Selan 	      if (obscure_syntax & RE_BK_PLUS_QM)
113058186Selan 		goto handle_plus;
113158186Selan 	      else
113258186Selan                 goto normal_backsl;
113358186Selan               break;
113458186Selan 
113558186Selan             default:
113658186Selan 	    normal_backsl:
113758186Selan 	      /* You might think it would be useful for \ to mean
113858186Selan 		 not to translate; but if we don't translate it
113958186Selan 		 it will never match anything.  */
114058186Selan 	      if (translate) c = translate[c];
114158186Selan 	      goto normal_char;
114258186Selan 	    }
114358186Selan 	  break;
114458186Selan 
114558186Selan 	default:
114658186Selan 	normal_char:		/* Expects the character in `c'.  */
114758186Selan 	  if (!pending_exact || pending_exact + *pending_exact + 1 != b
114858186Selan 	      || *pending_exact == 0177 || *p == '*' || *p == '^'
114958186Selan 	      || ((obscure_syntax & RE_BK_PLUS_QM)
115058186Selan 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
115158186Selan 		  : (*p == '+' || *p == '?'))
115258186Selan 	      || ((obscure_syntax & RE_INTERVALS)
115358186Selan                   && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
115458186Selan 		      ? *p == '{'
115558186Selan                       : (p[0] == '\\' && p[1] == '{'))))
115658186Selan 	    {
115758186Selan 	      laststart = b;
115858186Selan 	      BUFPUSH (exactn);
115958186Selan 	      pending_exact = b;
116058186Selan 	      BUFPUSH (0);
116158186Selan 	    }
116258186Selan 	  BUFPUSH (c);
116358186Selan 	  (*pending_exact)++;
116458186Selan 	}
116558186Selan     }
116658186Selan 
116758186Selan   if (fixup_jump)
116858186Selan     store_jump (fixup_jump, jump, b);
116958186Selan 
117058186Selan   if (stackp != stackb) goto unmatched_open;
117158186Selan 
117258186Selan   bufp->used = b - bufp->buffer;
117358186Selan   return 0;
117458186Selan 
117558186Selan  invalid_pattern:
117658186Selan   return "Invalid regular expression";
117758186Selan 
117858186Selan  unmatched_open:
117958186Selan   return "Unmatched \\(";
118058186Selan 
118158186Selan  unmatched_close:
118258186Selan   return "Unmatched \\)";
118358186Selan 
118458186Selan  end_of_pattern:
118558186Selan   return "Premature end of regular expression";
118658186Selan 
118758186Selan  nesting_too_deep:
118858186Selan   return "Nesting too deep";
118958186Selan 
119058186Selan  too_big:
119158186Selan   return "Regular expression too big";
119258186Selan 
119358186Selan  memory_exhausted:
119458186Selan   return "Memory exhausted";
119558186Selan }
119658186Selan 
119758186Selan 
119858186Selan /* Store a jump of the form <OPCODE> <relative address>.
119958186Selan    Store in the location FROM a jump operation to jump to relative
120058186Selan    address FROM - TO.  OPCODE is the opcode to store.  */
120158186Selan 
120258186Selan static void
store_jump(char * from,char opcode,char * to)120358186Selan store_jump (char *from, char opcode, char *to)
120458186Selan {
120558186Selan   from[0] = opcode;
120658186Selan   STORE_NUMBER(from + 1, to - (from + 3));
120758186Selan }
120858186Selan 
120958186Selan 
121058186Selan /* Open up space before char FROM, and insert there a jump to TO.
121158186Selan    CURRENT_END gives the end of the storage not in use, so we know
121258186Selan    how much data to copy up. OP is the opcode of the jump to insert.
121358186Selan 
121458186Selan    If you call this function, you must zero out pending_exact.  */
121558186Selan 
121658186Selan static void
insert_jump(char op,char * from,char * to,char * current_end)121758186Selan insert_jump (char op, char *from, char *to, char *current_end)
121858186Selan {
121958186Selan   register char *pfrom = current_end;		/* Copy from here...  */
122058186Selan   register char *pto = current_end + 3;		/* ...to here.  */
122158186Selan 
122258186Selan   while (pfrom != from)
122358186Selan     *--pto = *--pfrom;
122458186Selan   store_jump (from, op, to);
122558186Selan }
122658186Selan 
122758186Selan 
122858186Selan /* Store a jump of the form <opcode> <relative address> <n> .
122958186Selan 
123058186Selan    Store in the location FROM a jump operation to jump to relative
123158186Selan    address FROM - TO.  OPCODE is the opcode to store, N is a number the
123258186Selan    jump uses, say, to decide how many times to jump.
123358186Selan 
123458186Selan    If you call this function, you must zero out pending_exact.  */
123558186Selan 
123658186Selan static void
store_jump_n(char * from,char opcode,char * to,unsigned n)123758186Selan store_jump_n (char *from, char opcode, char *to, unsigned n)
123858186Selan {
123958186Selan   from[0] = opcode;
124058186Selan   STORE_NUMBER (from + 1, to - (from + 3));
124158186Selan   STORE_NUMBER (from + 3, n);
124258186Selan }
124358186Selan 
124458186Selan 
124558186Selan /* Similar to insert_jump, but handles a jump which needs an extra
124658186Selan    number to handle minimum and maximum cases.  Open up space at
124758186Selan    location FROM, and insert there a jump to TO.  CURRENT_END gives the
124858186Selan    end of the storage in use, so we know how much data to copy up. OP is
124958186Selan    the opcode of the jump to insert.
125058186Selan 
125158186Selan    If you call this function, you must zero out pending_exact.  */
125258186Selan 
125358186Selan static void
insert_jump_n(char op,char * from,char * to,char * current_end,unsigned n)125458186Selan insert_jump_n (char op, char *from, char *to, char *current_end, unsigned n)
125558186Selan {
125658186Selan   register char *pfrom = current_end;		/* Copy from here...  */
125758186Selan   register char *pto = current_end + 5;		/* ...to here.  */
125858186Selan 
125958186Selan   while (pfrom != from)
126058186Selan     *--pto = *--pfrom;
126158186Selan   store_jump_n (from, op, to, n);
126258186Selan }
126358186Selan 
126458186Selan 
126558186Selan /* Open up space at location THERE, and insert operation OP followed by
126658186Selan    NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
126758186Selan    we know how much data to copy up.
126858186Selan 
126958186Selan    If you call this function, you must zero out pending_exact.  */
127058186Selan 
127158186Selan static void
insert_op_2(char op,char * there,char * current_end,int num_1,int num_2)127258186Selan insert_op_2 (char op, char *there, char *current_end, int num_1, int num_2)
127358186Selan {
127458186Selan   register char *pfrom = current_end;		/* Copy from here...  */
127558186Selan   register char *pto = current_end + 5;		/* ...to here.  */
127658186Selan 
127758186Selan   while (pfrom != there)
127858186Selan     *--pto = *--pfrom;
127958186Selan 
128058186Selan   there[0] = op;
128158186Selan   STORE_NUMBER (there + 1, num_1);
128258186Selan   STORE_NUMBER (there + 3, num_2);
128358186Selan }
128458186Selan 
128558186Selan 
128658186Selan 
128758186Selan /* Given a pattern, compute a fastmap from it.  The fastmap records
128858186Selan    which of the (1 << BYTEWIDTH) possible characters can start a string
128958186Selan    that matches the pattern.  This fastmap is used by re_search to skip
129058186Selan    quickly over totally implausible text.
129158186Selan 
129258186Selan    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
129358186Selan    area as bufp->fastmap.
129458186Selan    The other components of bufp describe the pattern to be used.  */
129558186Selan 
129658186Selan void
re_compile_fastmap(struct re_pattern_buffer * bufp)129758186Selan re_compile_fastmap (struct re_pattern_buffer *bufp)
129858186Selan {
129958186Selan   unsigned char *pattern = (unsigned char *) bufp->buffer;
130058186Selan   int size = bufp->used;
130158186Selan   register char *fastmap = bufp->fastmap;
130258186Selan   register unsigned char *p = pattern;
130358186Selan   register unsigned char *pend = pattern + size;
130458186Selan   register int j, k;
130558186Selan   unsigned char *translate = (unsigned char *) bufp->translate;
130658186Selan 
130758186Selan   unsigned char *stackb[NFAILURES];
130858186Selan   unsigned char **stackp = stackb;
130958186Selan 
131058186Selan   unsigned is_a_succeed_n;
131158186Selan 
131258186Selan   memset (fastmap, 0, (1 << BYTEWIDTH));
131358186Selan   bufp->fastmap_accurate = 1;
131458186Selan   bufp->can_be_null = 0;
131558186Selan 
131658186Selan   while (p)
131758186Selan     {
131858186Selan       is_a_succeed_n = 0;
131958186Selan       if (p == pend)
132058186Selan 	{
132158186Selan 	  bufp->can_be_null = 1;
132258186Selan 	  break;
132358186Selan 	}
132458186Selan #ifdef SWITCH_ENUM_BUG
132558186Selan       switch ((int) ((enum regexpcode) *p++))
132658186Selan #else
132758186Selan       switch ((enum regexpcode) *p++)
132858186Selan #endif
132958186Selan 	{
133058186Selan 	case exactn:
133158186Selan 	  if (translate)
133258186Selan 	    fastmap[translate[p[1]]] = 1;
133358186Selan 	  else
133458186Selan 	    fastmap[p[1]] = 1;
133558186Selan 	  break;
133658186Selan 
133758186Selan         case unused:
133858186Selan         case begline:
133958186Selan #ifdef emacs
134058186Selan         case before_dot:
134158186Selan 	case at_dot:
134258186Selan 	case after_dot:
134358186Selan #endif
134458186Selan 	case begbuf:
134558186Selan 	case endbuf:
134658186Selan 	case wordbound:
134758186Selan 	case notwordbound:
134858186Selan 	case wordbeg:
134958186Selan 	case wordend:
135058186Selan           continue;
135158186Selan 
135258186Selan 	case endline:
135358186Selan 	  if (translate)
135458186Selan 	    fastmap[translate['\n']] = 1;
135558186Selan 	  else
135658186Selan 	    fastmap['\n'] = 1;
135758186Selan 
135858186Selan 	  if (bufp->can_be_null != 1)
135958186Selan 	    bufp->can_be_null = 2;
136058186Selan 	  break;
136158186Selan 
136258186Selan 	case jump_n:
136358186Selan         case finalize_jump:
136458186Selan 	case maybe_finalize_jump:
136558186Selan 	case jump:
136658186Selan 	case dummy_failure_jump:
136758186Selan           EXTRACT_NUMBER_AND_INCR (j, p);
136858186Selan 	  p += j;
136958186Selan 	  if (j > 0)
137058186Selan 	    continue;
137158186Selan           /* Jump backward reached implies we just went through
137258186Selan 	     the body of a loop and matched nothing.
137358186Selan 	     Opcode jumped to should be an on_failure_jump.
137458186Selan 	     Just treat it like an ordinary jump.
137558186Selan 	     For a * loop, it has pushed its failure point already;
137658186Selan 	     If so, discard that as redundant.  */
137758186Selan 
137858186Selan           if ((enum regexpcode) *p != on_failure_jump
137958186Selan 	      && (enum regexpcode) *p != succeed_n)
138058186Selan 	    continue;
138158186Selan           p++;
138258186Selan           EXTRACT_NUMBER_AND_INCR (j, p);
138358186Selan           p += j;
138458186Selan           if (stackp != stackb && *stackp == p)
138558186Selan             stackp--;
138658186Selan           continue;
138758186Selan 
138858186Selan         case on_failure_jump:
138958186Selan 	handle_on_failure_jump:
139058186Selan           EXTRACT_NUMBER_AND_INCR (j, p);
139158186Selan           *++stackp = p + j;
139258186Selan 	  if (is_a_succeed_n)
139358186Selan             EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
139458186Selan 	  continue;
139558186Selan 
139658186Selan 	case succeed_n:
139758186Selan 	  is_a_succeed_n = 1;
139858186Selan           /* Get to the number of times to succeed.  */
139958186Selan           p += 2;
140058186Selan 	  /* Increment p past the n for when k != 0.  */
140158186Selan           EXTRACT_NUMBER_AND_INCR (k, p);
140258186Selan           if (k == 0)
140358186Selan 	    {
140458186Selan               p -= 4;
140558186Selan               goto handle_on_failure_jump;
140658186Selan             }
140758186Selan           continue;
140858186Selan 
140958186Selan 	case set_number_at:
141058186Selan           p += 4;
141158186Selan           continue;
141258186Selan 
141358186Selan         case start_memory:
141458186Selan 	case stop_memory:
141558186Selan 	  p++;
141658186Selan 	  continue;
141758186Selan 
141858186Selan 	case duplicate:
141958186Selan 	  bufp->can_be_null = 1;
142058186Selan 	  fastmap['\n'] = 1;
142158186Selan 	case anychar:
142258186Selan 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
142358186Selan 	    if (j != '\n')
142458186Selan 	      fastmap[j] = 1;
142558186Selan 	  if (bufp->can_be_null)
142658186Selan 	    return;
142758186Selan 	  /* Don't return; check the alternative paths
142858186Selan 	     so we can set can_be_null if appropriate.  */
142958186Selan 	  break;
143058186Selan 
143158186Selan 	case wordchar:
143258186Selan 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
143358186Selan 	    if (SYNTAX (j) == Sword)
143458186Selan 	      fastmap[j] = 1;
143558186Selan 	  break;
143658186Selan 
143758186Selan 	case notwordchar:
143858186Selan 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
143958186Selan 	    if (SYNTAX (j) != Sword)
144058186Selan 	      fastmap[j] = 1;
144158186Selan 	  break;
144258186Selan 
144358186Selan #ifdef emacs
144458186Selan 	case syntaxspec:
144558186Selan 	  k = *p++;
144658186Selan 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
144758186Selan 	    if (SYNTAX (j) == (enum syntaxcode) k)
144858186Selan 	      fastmap[j] = 1;
144958186Selan 	  break;
145058186Selan 
145158186Selan 	case notsyntaxspec:
145258186Selan 	  k = *p++;
145358186Selan 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
145458186Selan 	    if (SYNTAX (j) != (enum syntaxcode) k)
145558186Selan 	      fastmap[j] = 1;
145658186Selan 	  break;
145758186Selan #endif /* not emacs */
145858186Selan 
145958186Selan 	case charset:
146058186Selan 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
146158186Selan 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
146258186Selan 	      {
146358186Selan 		if (translate)
146458186Selan 		  fastmap[translate[j]] = 1;
146558186Selan 		else
146658186Selan 		  fastmap[j] = 1;
146758186Selan 	      }
146858186Selan 	  break;
146958186Selan 
147058186Selan 	case charset_not:
147158186Selan 	  /* Chars beyond end of map must be allowed */
147258186Selan 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
147358186Selan 	    if (translate)
147458186Selan 	      fastmap[translate[j]] = 1;
147558186Selan 	    else
147658186Selan 	      fastmap[j] = 1;
147758186Selan 
147858186Selan 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
147958186Selan 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
148058186Selan 	      {
148158186Selan 		if (translate)
148258186Selan 		  fastmap[translate[j]] = 1;
148358186Selan 		else
148458186Selan 		  fastmap[j] = 1;
148558186Selan 	      }
148658186Selan 	  break;
148758186Selan 	}
148858186Selan 
148958186Selan       /* Get here means we have successfully found the possible starting
149058186Selan          characters of one path of the pattern.  We need not follow this
149158186Selan          path any farther.  Instead, look at the next alternative
149258186Selan          remembered in the stack.  */
149358186Selan    if (stackp != stackb)
149458186Selan 	p = *stackp--;
149558186Selan       else
149658186Selan 	break;
149758186Selan     }
149858186Selan }
149958186Selan 
150058186Selan 
150158186Selan 
150258186Selan /* Like re_search_2, below, but only one string is specified, and
150358186Selan    doesn't let you say where to stop matching. */
150458186Selan 
150558186Selan int
re_search(struct re_pattern_buffer * pbufp,char * string,int size,int startpos,int range,struct re_registers * regs)150658186Selan re_search (struct re_pattern_buffer *pbufp,
150758186Selan 	   char *string,
150858186Selan 	   int size,
150958186Selan 	   int startpos,
151058186Selan 	   int range,
151158186Selan 	   struct re_registers *regs)
151258186Selan {
151358186Selan   return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
151458186Selan 		      regs, size);
151558186Selan }
151658186Selan 
151758186Selan 
151858186Selan /* Using the compiled pattern in PBUFP->buffer, first tries to match the
151958186Selan    virtual concatenation of STRING1 and STRING2, starting first at index
152058186Selan    STARTPOS, then at STARTPOS + 1, and so on.  RANGE is the number of
152158186Selan    places to try before giving up.  If RANGE is negative, it searches
152258186Selan    backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
152358186Selan    - 1, etc.  STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
152458186Selan    In REGS, return the indices of the virtual concatenation of STRING1
152558186Selan    and STRING2 that matched the entire PBUFP->buffer and its contained
152658186Selan    subexpressions.  Do not consider matching one past the index MSTOP in
152758186Selan    the virtual concatenation of STRING1 and STRING2.
152858186Selan 
152958186Selan    The value returned is the position in the strings at which the match
153058186Selan    was found, or -1 if no match was found, or -2 if error (such as
153158186Selan    failure stack overflow).  */
153258186Selan 
153358186Selan int
re_search_2(struct re_pattern_buffer * pbufp,char * string1,int size1,char * string2,int size2,int startpos,register int range,struct re_registers * regs,int mstop)153458186Selan re_search_2 (struct re_pattern_buffer *pbufp,
153558186Selan 	     char *string1, int size1,
153658186Selan 	     char *string2, int size2,
153758186Selan 	     int startpos,
153858186Selan 	     register int range,
153958186Selan 	     struct re_registers *regs,
154058186Selan 	     int mstop)
154158186Selan {
154258186Selan   register char *fastmap = pbufp->fastmap;
154358186Selan   register unsigned char *translate = (unsigned char *) pbufp->translate;
154458186Selan   int total_size = size1 + size2;
154558186Selan   int endpos = startpos + range;
154658186Selan   int val;
154758186Selan 
154858186Selan   /* Check for out-of-range starting position.  */
154958186Selan   if (startpos < 0  ||  startpos > total_size)
155058186Selan     return -1;
155158186Selan 
155258186Selan   /* Fix up range if it would eventually take startpos outside of the
155358186Selan      virtual concatenation of string1 and string2.  */
155458186Selan   if (endpos < -1)
155558186Selan     range = -1 - startpos;
155658186Selan   else if (endpos > total_size)
155758186Selan     range = total_size - startpos;
155858186Selan 
155958186Selan   /* Update the fastmap now if not correct already.  */
156058186Selan   if (fastmap && !pbufp->fastmap_accurate)
156158186Selan     re_compile_fastmap (pbufp);
156258186Selan 
156358186Selan   /* If the search isn't to be a backwards one, don't waste time in a
156458186Selan      long search for a pattern that says it is anchored.  */
156558186Selan   if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
156658186Selan       && range > 0)
156758186Selan     {
156858186Selan       if (startpos > 0)
156958186Selan 	return -1;
157058186Selan       else
157158186Selan 	range = 1;
157258186Selan     }
157358186Selan 
157458186Selan   while (1)
157558186Selan     {
157658186Selan       /* If a fastmap is supplied, skip quickly over characters that
157758186Selan          cannot possibly be the start of a match.  Note, however, that
157858186Selan          if the pattern can possibly match the null string, we must
157958186Selan          test it at each starting point so that we take the first null
158058186Selan          string we get.  */
158158186Selan 
158258186Selan       if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
158358186Selan 	{
158458186Selan 	  if (range > 0)	/* Searching forwards.  */
158558186Selan 	    {
158658186Selan 	      register int lim = 0;
158758186Selan 	      register unsigned char *p;
158858186Selan 	      int irange = range;
158958186Selan 	      if (startpos < size1 && startpos + range >= size1)
159058186Selan 		lim = range - (size1 - startpos);
159158186Selan 
159258186Selan 	      p = ((unsigned char *)
159358186Selan 		   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
159458186Selan 
159558186Selan               while (range > lim && !fastmap[translate
159658186Selan                                              ? translate[*p++]
159758186Selan                                              : *p++])
159858186Selan 		    range--;
159958186Selan 	      startpos += irange - range;
160058186Selan 	    }
160158186Selan 	  else				/* Searching backwards.  */
160258186Selan 	    {
160358186Selan 	      register unsigned char c;
160458186Selan 
160558186Selan               if (string1 == 0 || startpos >= size1)
160658186Selan 		c = string2[startpos - size1];
160758186Selan 	      else
160858186Selan 		c = string1[startpos];
160958186Selan 
161058186Selan               c &= 0xff;
161158186Selan 	      if (translate ? !fastmap[translate[c]] : !fastmap[c])
161258186Selan 		goto advance;
161358186Selan 	    }
161458186Selan 	}
161558186Selan 
161658186Selan       if (range >= 0 && startpos == total_size
161758186Selan 	  && fastmap && pbufp->can_be_null == 0)
161858186Selan 	return -1;
161958186Selan 
162058186Selan       val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
162158186Selan 			regs, mstop);
162258186Selan       if (val >= 0)
162358186Selan 	return startpos;
162458186Selan       if (val == -2)
162558186Selan 	return -2;
162658186Selan 
162758186Selan #ifdef C_ALLOCA
162858186Selan       alloca (0);
162958186Selan #endif /* C_ALLOCA */
163058186Selan 
163158186Selan     advance:
163258186Selan       if (!range)
163358186Selan         break;
163458186Selan       else if (range > 0)
163558186Selan         {
163658186Selan           range--;
163758186Selan           startpos++;
163858186Selan         }
163958186Selan       else
164058186Selan         {
164158186Selan           range++;
164258186Selan           startpos--;
164358186Selan         }
164458186Selan     }
164558186Selan   return -1;
164658186Selan }
164758186Selan 
164858186Selan 
164958186Selan 
165058186Selan #ifndef emacs   /* emacs never uses this.  */
165158186Selan int
re_match(struct re_pattern_buffer * pbufp,char * string,int size,int pos,struct re_registers * regs)165258186Selan re_match (struct re_pattern_buffer *pbufp,
165358186Selan 	  char *string,
165458186Selan 	  int size,
165558186Selan 	  int pos,
165658186Selan 	  struct re_registers *regs)
165758186Selan {
165858186Selan   return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
165958186Selan }
166058186Selan #endif /* not emacs */
166158186Selan 
166258186Selan 
166358186Selan /* The following are used for re_match_2, defined below:  */
166458186Selan 
166558186Selan /* Roughly the maximum number of failure points on the stack.  Would be
166658186Selan    exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed.  */
166758186Selan 
166858186Selan int re_max_failures = 2000;
166958186Selan 
167058186Selan /* Routine used by re_match_2.  */
167158186Selan static int bcmp_translate (char *, char *, int, unsigned char *);
167258186Selan 
167358186Selan 
167458186Selan /* Structure and accessing macros used in re_match_2:  */
167558186Selan 
167658186Selan struct register_info
167758186Selan {
167858186Selan   unsigned is_active : 1;
167958186Selan   unsigned matched_something : 1;
168058186Selan };
168158186Selan 
168258186Selan #define IS_ACTIVE(R)  ((R).is_active)
168358186Selan #define MATCHED_SOMETHING(R)  ((R).matched_something)
168458186Selan 
168558186Selan 
168658186Selan /* Macros used by re_match_2:  */
168758186Selan 
168858186Selan 
168958186Selan /* I.e., regstart, regend, and reg_info.  */
169058186Selan 
169158186Selan #define NUM_REG_ITEMS  3
169258186Selan 
169358186Selan /* We push at most this many things on the stack whenever we
169458186Selan    fail.  The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
169558186Selan    arguments to the PUSH_FAILURE_POINT macro.  */
169658186Selan 
169758186Selan #define MAX_NUM_FAILURE_ITEMS   (RE_NREGS * NUM_REG_ITEMS + 2)
169858186Selan 
169958186Selan 
170058186Selan /* We push this many things on the stack whenever we fail.  */
170158186Selan 
170258186Selan #define NUM_FAILURE_ITEMS  (last_used_reg * NUM_REG_ITEMS + 2)
170358186Selan 
170458186Selan 
170558186Selan /* This pushes most of the information about the current state we will want
170658186Selan    if we ever fail back to it.  */
170758186Selan 
170858186Selan #define PUSH_FAILURE_POINT(pattern_place, string_place)			\
170958186Selan   {									\
171058186Selan     short last_used_reg, this_reg;					\
171158186Selan 									\
171258186Selan     /* Find out how many registers are active or have been matched.	\
171358186Selan        (Aside from register zero, which is only set at the end.)  */	\
171458186Selan     for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
171558186Selan       if (regstart[last_used_reg] != (unsigned char *) -1)		\
171658186Selan         break;								\
171758186Selan 									\
171858186Selan     if (stacke - stackp < NUM_FAILURE_ITEMS)				\
171958186Selan       {									\
172058186Selan 	unsigned char **stackx;						\
172158186Selan 	int len = stacke - stackb;				\
172258186Selan 	if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS)		\
172358186Selan 	  return -2;							\
172458186Selan 									\
172558186Selan         /* Roughly double the size of the stack.  */			\
172658186Selan         stackx = (unsigned char **) alloca (2 * len			\
172758186Selan                                             * sizeof (unsigned char *));\
172858186Selan 	/* Only copy what is in use.  */				\
172958186Selan         memcpy (stackx, stackb, len * sizeof (char *));			\
173058186Selan 	stackp = stackx + (stackp - stackb);				\
173158186Selan 	stackb = stackx;						\
173258186Selan 	stacke = stackb + 2 * len;					\
173358186Selan       }									\
173458186Selan 									\
173558186Selan     /* Now push the info for each of those registers.  */		\
173658186Selan     for (this_reg = 1; this_reg <= last_used_reg; this_reg++)		\
173758186Selan       {									\
173858186Selan         *stackp++ = regstart[this_reg];					\
173958186Selan         *stackp++ = regend[this_reg];					\
174058186Selan         *stackp++ = (unsigned char *) &reg_info[this_reg];		\
174158186Selan       }									\
174258186Selan 									\
174358186Selan     /* Push how many registers we saved.  */				\
174458186Selan     *stackp++ = (unsigned char *) last_used_reg;			\
174558186Selan 									\
174658186Selan     *stackp++ = pattern_place;                                          \
174758186Selan     *stackp++ = string_place;                                           \
174858186Selan   }
174958186Selan 
175058186Selan 
175158186Selan /* This pops what PUSH_FAILURE_POINT pushes.  */
175258186Selan 
175358186Selan #define POP_FAILURE_POINT()						\
175458186Selan   {									\
175558186Selan     int temp;								\
175658186Selan     stackp -= 2;		/* Remove failure points.  */		\
175758186Selan     temp = (int) *--stackp;	/* How many regs pushed.  */	        \
175858186Selan     temp *= NUM_REG_ITEMS;	/* How much to take off the stack.  */	\
175958186Selan     stackp -= temp; 		/* Remove the register info.  */	\
176058186Selan   }
176158186Selan 
176258186Selan 
176358186Selan #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
176458186Selan 
176558186Selan /* Is true if there is a first string and if PTR is pointing anywhere
176658186Selan    inside it or just past the end.  */
176758186Selan 
176858186Selan #define IS_IN_FIRST_STRING(ptr) 					\
176958186Selan 	(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
177058186Selan 
177158186Selan /* Call before fetching a character with *d.  This switches over to
177258186Selan    string2 if necessary.  */
177358186Selan 
177458186Selan #define PREFETCH							\
177558186Selan  while (d == dend)						    	\
177658186Selan   {									\
177758186Selan     /* end of string2 => fail.  */					\
177858186Selan     if (dend == end_match_2) 						\
177958186Selan       goto fail;							\
178058186Selan     /* end of string1 => advance to string2.  */ 			\
178158186Selan     d = string2;						        \
178258186Selan     dend = end_match_2;							\
178358186Selan   }
178458186Selan 
178558186Selan 
178658186Selan /* Call this when have matched something; it sets `matched' flags for the
178758186Selan    registers corresponding to the subexpressions of which we currently
178858186Selan    are inside.  */
178958186Selan #define SET_REGS_MATCHED 						\
179058186Selan   { unsigned this_reg; 							\
179158186Selan     for (this_reg = 0; this_reg < RE_NREGS; this_reg++) 		\
179258186Selan       { 								\
179358186Selan         if (IS_ACTIVE(reg_info[this_reg]))				\
179458186Selan           MATCHED_SOMETHING(reg_info[this_reg]) = 1;			\
179558186Selan         else								\
179658186Selan           MATCHED_SOMETHING(reg_info[this_reg]) = 0;			\
179758186Selan       } 								\
179858186Selan   }
179958186Selan 
180058186Selan /* Test if at very beginning or at very end of the virtual concatenation
180158186Selan    of string1 and string2.  If there is only one string, we've put it in
180258186Selan    string2.  */
180358186Selan 
180458186Selan #define AT_STRINGS_BEG  (d == (size1 ? string1 : string2)  ||  !size2)
180558186Selan #define AT_STRINGS_END  (d == end2)
180658186Selan 
180758186Selan #define AT_WORD_BOUNDARY						\
180858186Selan   (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
180958186Selan 
181058186Selan /* We have two special cases to check for:
181158186Selan      1) if we're past the end of string1, we have to look at the first
181258186Selan         character in string2;
181358186Selan      2) if we're before the beginning of string2, we have to look at the
181458186Selan         last character in string1; we assume there is a string1, so use
181558186Selan         this in conjunction with AT_STRINGS_BEG.  */
181658186Selan #define IS_A_LETTER(d)							\
181758186Selan   (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
181858186Selan    == Sword)
181958186Selan 
182058186Selan 
182158186Selan /* Match the pattern described by PBUFP against the virtual
182258186Selan    concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
182358186Selan    respectively.  Start the match at index POS in the virtual
182458186Selan    concatenation of STRING1 and STRING2.  In REGS, return the indices of
182558186Selan    the virtual concatenation of STRING1 and STRING2 that matched the
182658186Selan    entire PBUFP->buffer and its contained subexpressions.  Do not
182758186Selan    consider matching one past the index MSTOP in the virtual
182858186Selan    concatenation of STRING1 and STRING2.
182958186Selan 
183058186Selan    If pbufp->fastmap is nonzero, then it had better be up to date.
183158186Selan 
183258186Selan    The reason that the data to match are specified as two components
183358186Selan    which are to be regarded as concatenated is so this function can be
183458186Selan    used directly on the contents of an Emacs buffer.
183558186Selan 
183658186Selan    -1 is returned if there is no match.  -2 is returned if there is an
183758186Selan    error (such as match stack overflow).  Otherwise the value is the
183858186Selan    length of the substring which was matched.  */
183958186Selan 
184058186Selan int
re_match_2(struct re_pattern_buffer * pbufp,char * string1_arg,int size1,char * string2_arg,int size2,int pos,struct re_registers * regs,int mstop)184158186Selan re_match_2 (struct re_pattern_buffer *pbufp,
184258186Selan 	    char *string1_arg, int size1,
184358186Selan 	    char *string2_arg, int size2,
184458186Selan 	    int pos,
184558186Selan 	    struct re_registers *regs,
184658186Selan 	    int mstop)
184758186Selan {
184858186Selan   register unsigned char *p = (unsigned char *) pbufp->buffer;
184958186Selan 
185058186Selan   /* Pointer to beyond end of buffer.  */
185158186Selan   register unsigned char *pend = p + pbufp->used;
185258186Selan 
185358186Selan   unsigned char *string1 = (unsigned char *) string1_arg;
185458186Selan   unsigned char *string2 = (unsigned char *) string2_arg;
185558186Selan   unsigned char *end1;		/* Just past end of first string.  */
185658186Selan   unsigned char *end2;		/* Just past end of second string.  */
185758186Selan 
185858186Selan   /* Pointers into string1 and string2, just past the last characters in
185958186Selan      each to consider matching.  */
186058186Selan   unsigned char *end_match_1, *end_match_2;
186158186Selan 
186258186Selan   register unsigned char *d, *dend;
186358186Selan   register int mcnt;			/* Multipurpose.  */
186458186Selan   unsigned char *translate = (unsigned char *) pbufp->translate;
186558186Selan   unsigned is_a_jump_n = 0;
186658186Selan 
186758186Selan  /* Failure point stack.  Each place that can handle a failure further
186858186Selan     down the line pushes a failure point on this stack.  It consists of
186958186Selan     restart, regend, and reg_info for all registers corresponding to the
187058186Selan     subexpressions we're currently inside, plus the number of such
187158186Selan     registers, and, finally, two char *'s.  The first char * is where to
187258186Selan     resume scanning the pattern; the second one is where to resume
187358186Selan     scanning the strings.  If the latter is zero, the failure point is a
187458186Selan     ``dummy''; if a failure happens and the failure point is a dummy, it
187558186Selan     gets discarded and the next next one is tried.  */
187658186Selan 
187758186Selan   unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
187858186Selan   unsigned char **stackb = initial_stack;
187958186Selan   unsigned char **stackp = stackb;
188058186Selan   unsigned char **stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
188158186Selan 
188258186Selan 
188358186Selan   /* Information on the contents of registers. These are pointers into
188458186Selan      the input strings; they record just what was matched (on this
188558186Selan      attempt) by a subexpression part of the pattern, that is, the
188658186Selan      regnum-th regstart pointer points to where in the pattern we began
188758186Selan      matching and the regnum-th regend points to right after where we
188858186Selan      stopped matching the regnum-th subexpression.  (The zeroth register
188958186Selan      keeps track of what the whole pattern matches.)  */
189058186Selan 
189158186Selan   unsigned char *regstart[RE_NREGS];
189258186Selan   unsigned char *regend[RE_NREGS];
189358186Selan 
189458186Selan   /* The is_active field of reg_info helps us keep track of which (possibly
189558186Selan      nested) subexpressions we are currently in. The matched_something
189658186Selan      field of reg_info[reg_num] helps us tell whether or not we have
189758186Selan      matched any of the pattern so far this time through the reg_num-th
189858186Selan      subexpression.  These two fields get reset each time through any
189958186Selan      loop their register is in.  */
190058186Selan 
190158186Selan   struct register_info reg_info[RE_NREGS];
190258186Selan 
190358186Selan 
190458186Selan   /* The following record the register info as found in the above
190558186Selan      variables when we find a match better than any we've seen before.
190658186Selan      This happens as we backtrack through the failure points, which in
190758186Selan      turn happens only if we have not yet matched the entire string.  */
190858186Selan 
190958186Selan   unsigned best_regs_set = 0;
191058186Selan   unsigned char *best_regstart[RE_NREGS];
191158186Selan   unsigned char *best_regend[RE_NREGS];
191258186Selan 
191358186Selan   /* Initialize subexpression text positions to -1 to mark ones that no
191458186Selan      \( or ( and \) or ) has been seen for. Also set all registers to
191558186Selan      inactive and mark them as not having matched anything or ever
191658186Selan      failed.  */
191758186Selan   for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
191858186Selan     {
191958186Selan       regstart[mcnt] = regend[mcnt] = (unsigned char *) -1;
192058186Selan       IS_ACTIVE (reg_info[mcnt]) = 0;
192158186Selan       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
192258186Selan     }
192358186Selan 
192458186Selan   if (regs)
192558186Selan     for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
192658186Selan       regs->start[mcnt] = regs->end[mcnt] = -1;
192758186Selan 
192858186Selan   /* Set up pointers to ends of strings.
192958186Selan      Don't allow the second string to be empty unless both are empty.  */
193058186Selan   if (size2 == 0)
193158186Selan     {
193258186Selan       string2 = string1;
193358186Selan       size2 = size1;
193458186Selan       string1 = 0;
193558186Selan       size1 = 0;
193658186Selan     }
193758186Selan   end1 = string1 + size1;
193858186Selan   end2 = string2 + size2;
193958186Selan 
194058186Selan   /* Compute where to stop matching, within the two strings.  */
194158186Selan   if (mstop <= size1)
194258186Selan     {
194358186Selan       end_match_1 = string1 + mstop;
194458186Selan       end_match_2 = string2;
194558186Selan     }
194658186Selan   else
194758186Selan     {
194858186Selan       end_match_1 = end1;
194958186Selan       end_match_2 = string2 + mstop - size1;
195058186Selan     }
195158186Selan 
195258186Selan   /* `p' scans through the pattern as `d' scans through the data. `dend'
195358186Selan      is the end of the input string that `d' points within. `d' is
195458186Selan      advanced into the following input string whenever necessary, but
195558186Selan      this happens before fetching; therefore, at the beginning of the
195658186Selan      loop, `d' can be pointing at the end of a string, but it cannot
195758186Selan      equal string2.  */
195858186Selan 
195958186Selan   if (size1 != 0 && pos <= size1)
196058186Selan     d = string1 + pos, dend = end_match_1;
196158186Selan   else
196258186Selan     d = string2 + pos - size1, dend = end_match_2;
196358186Selan 
196458186Selan 
196558186Selan   /* This loops over pattern commands.  It exits by returning from the
196658186Selan      function if match is complete, or it drops through if match fails
196758186Selan      at this starting point in the input data.  */
196858186Selan 
196958186Selan   while (1)
197058186Selan     {
197158186Selan       is_a_jump_n = 0;
197258186Selan       /* End of pattern means we might have succeeded.  */
197358186Selan       if (p == pend)
197458186Selan 	{
197558186Selan 	  /* If not end of string, try backtracking.  Otherwise done.  */
197658186Selan           if (d != end_match_2)
197758186Selan 	    {
197858186Selan               if (stackp != stackb)
197958186Selan                 {
198058186Selan                   /* More failure points to try.  */
198158186Selan 
198258186Selan                   unsigned in_same_string =
198358186Selan         	          	IS_IN_FIRST_STRING (best_regend[0])
198458186Selan 	        	        == MATCHING_IN_FIRST_STRING;
198558186Selan 
198658186Selan                   /* If exceeds best match so far, save it.  */
198758186Selan                   if (! best_regs_set
198858186Selan                       || (in_same_string && d > best_regend[0])
198958186Selan                       || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
199058186Selan                     {
199158186Selan                       best_regs_set = 1;
199258186Selan                       best_regend[0] = d;	/* Never use regstart[0].  */
199358186Selan 
199458186Selan                       for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
199558186Selan                         {
199658186Selan                           best_regstart[mcnt] = regstart[mcnt];
199758186Selan                           best_regend[mcnt] = regend[mcnt];
199858186Selan                         }
199958186Selan                     }
200058186Selan                   goto fail;
200158186Selan                 }
200258186Selan               /* If no failure points, don't restore garbage.  */
200358186Selan               else if (best_regs_set)
200458186Selan                 {
200558186Selan 	      restore_best_regs:
200658186Selan                   /* Restore best match.  */
200758186Selan                   d = best_regend[0];
200858186Selan 
200958186Selan 		  for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
201058186Selan 		    {
201158186Selan 		      regstart[mcnt] = best_regstart[mcnt];
201258186Selan 		      regend[mcnt] = best_regend[mcnt];
201358186Selan 		    }
201458186Selan                 }
201558186Selan             }
201658186Selan 
201758186Selan 	  /* If caller wants register contents data back, convert it
201858186Selan 	     to indices.  */
201958186Selan 	  if (regs)
202058186Selan 	    {
202158186Selan 	      regs->start[0] = pos;
202258186Selan 	      if (MATCHING_IN_FIRST_STRING)
202358186Selan 		regs->end[0] = d - string1;
202458186Selan 	      else
202558186Selan 		regs->end[0] = d - string2 + size1;
202658186Selan 	      for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
202758186Selan 		{
202858186Selan 		  if (regend[mcnt] == (unsigned char *) -1)
202958186Selan 		    {
203058186Selan 		      regs->start[mcnt] = -1;
203158186Selan 		      regs->end[mcnt] = -1;
203258186Selan 		      continue;
203358186Selan 		    }
203458186Selan 		  if (IS_IN_FIRST_STRING (regstart[mcnt]))
203558186Selan 		    regs->start[mcnt] = regstart[mcnt] - string1;
203658186Selan 		  else
203758186Selan 		    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
203858186Selan 
203958186Selan 		  if (IS_IN_FIRST_STRING (regend[mcnt]))
204058186Selan 		    regs->end[mcnt] = regend[mcnt] - string1;
204158186Selan 		  else
204258186Selan 		    regs->end[mcnt] = regend[mcnt] - string2 + size1;
204358186Selan 		}
204458186Selan 	    }
204558186Selan 	  return d - pos - (MATCHING_IN_FIRST_STRING
204658186Selan 			    ? string1
204758186Selan 			    : string2 - size1);
204858186Selan         }
204958186Selan 
205058186Selan       /* Otherwise match next pattern command.  */
205158186Selan #ifdef SWITCH_ENUM_BUG
205258186Selan       switch ((int) ((enum regexpcode) *p++))
205358186Selan #else
205458186Selan       switch ((enum regexpcode) *p++)
205558186Selan #endif
205658186Selan 	{
205758186Selan 
205858186Selan 	/* \( [or `(', as appropriate] is represented by start_memory,
205958186Selan            \) by stop_memory.  Both of those commands are followed by
206058186Selan            a register number in the next byte.  The text matched
206158186Selan            within the \( and \) is recorded under that number.  */
206258186Selan 	case start_memory:
206358186Selan           regstart[*p] = d;
206458186Selan           IS_ACTIVE (reg_info[*p]) = 1;
206558186Selan           MATCHED_SOMETHING (reg_info[*p]) = 0;
206658186Selan           p++;
206758186Selan           break;
206858186Selan 
206958186Selan 	case stop_memory:
207058186Selan           regend[*p] = d;
207158186Selan           IS_ACTIVE (reg_info[*p]) = 0;
207258186Selan 
207358186Selan           /* If just failed to match something this time around with a sub-
207458186Selan 	     expression that's in a loop, try to force exit from the loop.  */
207558186Selan           if ((! MATCHED_SOMETHING (reg_info[*p])
207658186Selan 	       || (enum regexpcode) p[-3] == start_memory)
207758186Selan 	      && (p + 1) != pend)
207858186Selan             {
207958186Selan 	      register unsigned char *p2 = p + 1;
208058186Selan               mcnt = 0;
208158186Selan               switch (*p2++)
208258186Selan                 {
208358186Selan                   case jump_n:
208458186Selan 		    is_a_jump_n = 1;
208558186Selan                   case finalize_jump:
208658186Selan 		  case maybe_finalize_jump:
208758186Selan 		  case jump:
208858186Selan 		  case dummy_failure_jump:
208958186Selan                     EXTRACT_NUMBER_AND_INCR (mcnt, p2);
209058186Selan 		    if (is_a_jump_n)
209158186Selan 		      p2 += 2;
209258186Selan                     break;
209358186Selan                 }
209458186Selan 	      p2 += mcnt;
209558186Selan 
209658186Selan               /* If the next operation is a jump backwards in the pattern
209758186Selan 	         to an on_failure_jump, exit from the loop by forcing a
209858186Selan                  failure after pushing on the stack the on_failure_jump's
209958186Selan                  jump in the pattern, and d.  */
210058186Selan 	      if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
210158186Selan 		{
210258186Selan                   EXTRACT_NUMBER_AND_INCR (mcnt, p2);
210358186Selan                   PUSH_FAILURE_POINT (p2 + mcnt, d);
210458186Selan                   goto fail;
210558186Selan                 }
210658186Selan             }
210758186Selan           p++;
210858186Selan           break;
210958186Selan 
211058186Selan 	/* \<digit> has been turned into a `duplicate' command which is
211158186Selan            followed by the numeric value of <digit> as the register number.  */
211258186Selan         case duplicate:
211358186Selan 	  {
211458186Selan 	    int regno = *p++;   /* Get which register to match against */
211558186Selan 	    register unsigned char *d2, *dend2;
211658186Selan 
211758186Selan 	    /* Where in input to try to start matching.  */
211858186Selan             d2 = regstart[regno];
211958186Selan 
212058186Selan             /* Where to stop matching; if both the place to start and
212158186Selan                the place to stop matching are in the same string, then
212258186Selan                set to the place to stop, otherwise, for now have to use
212358186Selan                the end of the first string.  */
212458186Selan 
212558186Selan             dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
212658186Selan 		      == IS_IN_FIRST_STRING (regend[regno]))
212758186Selan 		     ? regend[regno] : end_match_1);
212858186Selan 	    while (1)
212958186Selan 	      {
213058186Selan 		/* If necessary, advance to next segment in register
213158186Selan                    contents.  */
213258186Selan 		while (d2 == dend2)
213358186Selan 		  {
213458186Selan 		    if (dend2 == end_match_2) break;
213558186Selan 		    if (dend2 == regend[regno]) break;
213658186Selan 		    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
213758186Selan 		  }
213858186Selan 		/* At end of register contents => success */
213958186Selan 		if (d2 == dend2) break;
214058186Selan 
214158186Selan 		/* If necessary, advance to next segment in data.  */
214258186Selan 		PREFETCH;
214358186Selan 
214458186Selan 		/* How many characters left in this segment to match.  */
214558186Selan 		mcnt = dend - d;
214658186Selan 
214758186Selan 		/* Want how many consecutive characters we can match in
214858186Selan                    one shot, so, if necessary, adjust the count.  */
214958186Selan                 if (mcnt > dend2 - d2)
215058186Selan 		  mcnt = dend2 - d2;
215158186Selan 
215258186Selan 		/* Compare that many; failure if mismatch, else move
215358186Selan                    past them.  */
215458186Selan 		if (translate
215558186Selan                     ? bcmp_translate ((char*)d, (char*)d2, mcnt, translate)
215658186Selan                     : memcmp (d, d2, mcnt))
215758186Selan 		  goto fail;
215858186Selan 		d += mcnt, d2 += mcnt;
215958186Selan 	      }
216058186Selan 	  }
216158186Selan 	  break;
216258186Selan 
216358186Selan 	case anychar:
216458186Selan 	  PREFETCH;	  /* Fetch a data character. */
216558186Selan 	  /* Match anything but a newline, maybe even a null.  */
216658186Selan 	  if ((translate ? translate[*d] : *d) == '\n'
216758186Selan               || ((obscure_syntax & RE_DOT_NOT_NULL)
216858186Selan                   && (translate ? translate[*d] : *d) == '\000'))
216958186Selan 	    goto fail;
217058186Selan 	  SET_REGS_MATCHED;
217158186Selan           d++;
217258186Selan 	  break;
217358186Selan 
217458186Selan 	case charset:
217558186Selan 	case charset_not:
217658186Selan 	  {
217758186Selan 	    int not = 0;	    /* Nonzero for charset_not.  */
217858186Selan 	    register int c;
217958186Selan 	    if (*(p - 1) == (unsigned char) charset_not)
218058186Selan 	      not = 1;
218158186Selan 
218258186Selan 	    PREFETCH;	    /* Fetch a data character. */
218358186Selan 
218458186Selan 	    if (translate)
218558186Selan 	      c = translate[*d];
218658186Selan 	    else
218758186Selan 	      c = *d;
218858186Selan 
218958186Selan 	    if (c < *p * BYTEWIDTH
219058186Selan 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
219158186Selan 	      not = !not;
219258186Selan 
219358186Selan 	    p += 1 + *p;
219458186Selan 
219558186Selan 	    if (!not) goto fail;
219658186Selan 	    SET_REGS_MATCHED;
219758186Selan             d++;
219858186Selan 	    break;
219958186Selan 	  }
220058186Selan 
220158186Selan 	case begline:
220258186Selan           if ((size1 != 0 && d == string1)
220358186Selan               || (size1 == 0 && size2 != 0 && d == string2)
220458186Selan               || (d && d[-1] == '\n')
220558186Selan               || (size1 == 0 && size2 == 0))
220658186Selan             break;
220758186Selan           else
220858186Selan             goto fail;
220958186Selan 
221058186Selan 	case endline:
221158186Selan 	  if (d == end2
221258186Selan 	      || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
221358186Selan 	    break;
221458186Selan 	  goto fail;
221558186Selan 
221658186Selan 	/* `or' constructs are handled by starting each alternative with
221758186Selan            an on_failure_jump that points to the start of the next
221858186Selan            alternative.  Each alternative except the last ends with a
221958186Selan            jump to the joining point.  (Actually, each jump except for
222058186Selan            the last one really jumps to the following jump, because
222158186Selan            tensioning the jumps is a hassle.)  */
222258186Selan 
222358186Selan 	/* The start of a stupid repeat has an on_failure_jump that points
222458186Selan 	   past the end of the repeat text. This makes a failure point so
222558186Selan            that on failure to match a repetition, matching restarts past
222658186Selan            as many repetitions have been found with no way to fail and
222758186Selan            look for another one.  */
222858186Selan 
222958186Selan 	/* A smart repeat is similar but loops back to the on_failure_jump
223058186Selan 	   so that each repetition makes another failure point.  */
223158186Selan 
223258186Selan 	case on_failure_jump:
223358186Selan         on_failure:
223458186Selan           EXTRACT_NUMBER_AND_INCR (mcnt, p);
223558186Selan           PUSH_FAILURE_POINT (p + mcnt, d);
223658186Selan           break;
223758186Selan 
223858186Selan 	/* The end of a smart repeat has a maybe_finalize_jump back.
223958186Selan 	   Change it either to a finalize_jump or an ordinary jump.  */
224058186Selan 	case maybe_finalize_jump:
224158186Selan           EXTRACT_NUMBER_AND_INCR (mcnt, p);
224258186Selan 	  {
224358186Selan 	    register unsigned char *p2 = p;
224458186Selan 	    /* Compare what follows with the beginning of the repeat.
224558186Selan 	       If we can establish that there is nothing that they would
224658186Selan 	       both match, we can change to finalize_jump.  */
224758186Selan 	    while (p2 + 1 != pend
224858186Selan 		   && (*p2 == (unsigned char) stop_memory
224958186Selan 		       || *p2 == (unsigned char) start_memory))
225058186Selan 	      p2 += 2;				/* Skip over reg number.  */
225158186Selan 	    if (p2 == pend)
225258186Selan 	      p[-3] = (unsigned char) finalize_jump;
225358186Selan 	    else if (*p2 == (unsigned char) exactn
225458186Selan 		     || *p2 == (unsigned char) endline)
225558186Selan 	      {
225658186Selan 		register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
225758186Selan 		register unsigned char *p1 = p + mcnt;
225858186Selan 		/* p1[0] ... p1[2] are an on_failure_jump.
225958186Selan 		   Examine what follows that.  */
226058186Selan 		if (p1[3] == (unsigned char) exactn && p1[5] != c)
226158186Selan 		  p[-3] = (unsigned char) finalize_jump;
226258186Selan 		else if (p1[3] == (unsigned char) charset
226358186Selan 			 || p1[3] == (unsigned char) charset_not)
226458186Selan 		  {
226558186Selan 		    int not = p1[3] == (unsigned char) charset_not;
226658186Selan 		    if (c < p1[4] * BYTEWIDTH
226758186Selan 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
226858186Selan 		      not = !not;
226958186Selan 		    /* `not' is 1 if c would match.  */
227058186Selan 		    /* That means it is not safe to finalize.  */
227158186Selan 		    if (!not)
227258186Selan 		      p[-3] = (unsigned char) finalize_jump;
227358186Selan 		  }
227458186Selan 	      }
227558186Selan 	  }
227658186Selan 	  p -= 2;		/* Point at relative address again.  */
227758186Selan 	  if (p[-1] != (unsigned char) finalize_jump)
227858186Selan 	    {
227958186Selan 	      p[-1] = (unsigned char) jump;
228058186Selan 	      goto nofinalize;
228158186Selan 	    }
228258186Selan         /* Note fall through.  */
228358186Selan 
228458186Selan 	/* The end of a stupid repeat has a finalize_jump back to the
228558186Selan            start, where another failure point will be made which will
228658186Selan            point to after all the repetitions found so far.  */
228758186Selan 
228858186Selan         /* Take off failure points put on by matching on_failure_jump
228958186Selan            because didn't fail.  Also remove the register information
229058186Selan            put on by the on_failure_jump.  */
229158186Selan         case finalize_jump:
229258186Selan           POP_FAILURE_POINT ();
229358186Selan         /* Note fall through.  */
229458186Selan 
229558186Selan 	/* Jump without taking off any failure points.  */
229658186Selan         case jump:
229758186Selan 	nofinalize:
229858186Selan 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
229958186Selan 	  p += mcnt;
230058186Selan 	  break;
230158186Selan 
230258186Selan         case dummy_failure_jump:
230358186Selan           /* Normally, the on_failure_jump pushes a failure point, which
230458186Selan              then gets popped at finalize_jump.  We will end up at
230558186Selan              finalize_jump, also, and with a pattern of, say, `a+', we
230658186Selan              are skipping over the on_failure_jump, so we have to push
230758186Selan              something meaningless for finalize_jump to pop.  */
230858186Selan           PUSH_FAILURE_POINT (0, 0);
230958186Selan           goto nofinalize;
231058186Selan 
231158186Selan 
231258186Selan         /* Have to succeed matching what follows at least n times.  Then
231358186Selan           just handle like an on_failure_jump.  */
231458186Selan         case succeed_n:
231558186Selan           EXTRACT_NUMBER (mcnt, p + 2);
231658186Selan           /* Originally, this is how many times we HAVE to succeed.  */
231758186Selan           if (mcnt)
231858186Selan             {
231958186Selan                mcnt--;
232058186Selan 	       p += 2;
232158186Selan                STORE_NUMBER_AND_INCR (p, mcnt);
232258186Selan             }
232358186Selan 	  else if (mcnt == 0)
232458186Selan             {
232558186Selan 	      p[2] = unused;
232658186Selan               p[3] = unused;
232758186Selan               goto on_failure;
232858186Selan             }
232958186Selan           else
233058186Selan 	    {
233158186Selan               fprintf (stderr, "regex: the succeed_n's n is not set.\n");
233258186Selan               exit (1);
233358186Selan 	    }
233458186Selan           break;
233558186Selan 
233658186Selan         case jump_n:
233758186Selan           EXTRACT_NUMBER (mcnt, p + 2);
233858186Selan           /* Originally, this is how many times we CAN jump.  */
233958186Selan           if (mcnt)
234058186Selan             {
234158186Selan                mcnt--;
234258186Selan                STORE_NUMBER(p + 2, mcnt);
234358186Selan 	       goto nofinalize;	     /* Do the jump without taking off
234458186Selan 			                any failure points.  */
234558186Selan             }
234658186Selan           /* If don't have to jump any more, skip over the rest of command.  */
234758186Selan 	  else
234858186Selan 	    p += 4;
234958186Selan           break;
235058186Selan 
235158186Selan 	case set_number_at:
235258186Selan 	  {
235358186Selan   	    register unsigned char *p1;
235458186Selan 
235558186Selan             EXTRACT_NUMBER_AND_INCR (mcnt, p);
235658186Selan             p1 = p + mcnt;
235758186Selan             EXTRACT_NUMBER_AND_INCR (mcnt, p);
235858186Selan 	    STORE_NUMBER (p1, mcnt);
235958186Selan             break;
236058186Selan           }
236158186Selan 
236258186Selan         /* Ignore these.  Used to ignore the n of succeed_n's which
236358186Selan            currently have n == 0.  */
236458186Selan         case unused:
236558186Selan           break;
236658186Selan 
236758186Selan         case wordbound:
236858186Selan 	  if (AT_WORD_BOUNDARY)
236958186Selan 	    break;
237058186Selan 	  goto fail;
237158186Selan 
237258186Selan 	case notwordbound:
237358186Selan 	  if (AT_WORD_BOUNDARY)
237458186Selan 	    goto fail;
237558186Selan 	  break;
237658186Selan 
237758186Selan 	case wordbeg:
237858186Selan           /* Have to check if AT_STRINGS_BEG before looking at d - 1.  */
237958186Selan 	  if (IS_A_LETTER (d) && (AT_STRINGS_BEG || !IS_A_LETTER (d - 1)))
238058186Selan 	    break;
238158186Selan 	  goto fail;
238258186Selan 
238358186Selan 	case wordend:
238458186Selan           /* Have to check if AT_STRINGS_BEG before looking at d - 1.  */
238558186Selan 	  if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
238658186Selan               && (!IS_A_LETTER (d) || AT_STRINGS_END))
238758186Selan 	    break;
238858186Selan 	  goto fail;
238958186Selan 
239058186Selan #ifdef emacs
239158186Selan 	case before_dot:
239258186Selan 	  if (PTR_CHAR_POS (d) >= point)
239358186Selan 	    goto fail;
239458186Selan 	  break;
239558186Selan 
239658186Selan 	case at_dot:
239758186Selan 	  if (PTR_CHAR_POS (d) != point)
239858186Selan 	    goto fail;
239958186Selan 	  break;
240058186Selan 
240158186Selan 	case after_dot:
240258186Selan 	  if (PTR_CHAR_POS (d) <= point)
240358186Selan 	    goto fail;
240458186Selan 	  break;
240558186Selan 
240658186Selan 	case wordchar:
240758186Selan 	  mcnt = (int) Sword;
240858186Selan 	  goto matchsyntax;
240958186Selan 
241058186Selan 	case syntaxspec:
241158186Selan 	  mcnt = *p++;
241258186Selan 	matchsyntax:
241358186Selan 	  PREFETCH;
241458186Selan 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
241558186Selan           SET_REGS_MATCHED;
241658186Selan 	  break;
241758186Selan 
241858186Selan 	case notwordchar:
241958186Selan 	  mcnt = (int) Sword;
242058186Selan 	  goto matchnotsyntax;
242158186Selan 
242258186Selan 	case notsyntaxspec:
242358186Selan 	  mcnt = *p++;
242458186Selan 	matchnotsyntax:
242558186Selan 	  PREFETCH;
242658186Selan 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
242758186Selan 	  SET_REGS_MATCHED;
242858186Selan           break;
242958186Selan 
243058186Selan #else /* not emacs */
243158186Selan 
243258186Selan 	case wordchar:
243358186Selan 	  PREFETCH;
243458186Selan           if (!IS_A_LETTER (d))
243558186Selan             goto fail;
243658186Selan 	  SET_REGS_MATCHED;
243758186Selan 	  break;
243858186Selan 
243958186Selan 	case notwordchar:
244058186Selan 	  PREFETCH;
244158186Selan 	  if (IS_A_LETTER (d))
244258186Selan             goto fail;
244358186Selan           SET_REGS_MATCHED;
244458186Selan 	  break;
244558186Selan 
244658186Selan #endif /* not emacs */
244758186Selan 
244858186Selan 	case begbuf:
244958186Selan           if (AT_STRINGS_BEG)
245058186Selan             break;
245158186Selan           goto fail;
245258186Selan 
245358186Selan         case endbuf:
245458186Selan 	  if (AT_STRINGS_END)
245558186Selan 	    break;
245658186Selan 	  goto fail;
245758186Selan 
245858186Selan 	case exactn:
245958186Selan 	  /* Match the next few pattern characters exactly.
246058186Selan 	     mcnt is how many characters to match.  */
246158186Selan 	  mcnt = *p++;
246258186Selan 	  /* This is written out as an if-else so we don't waste time
246358186Selan              testing `translate' inside the loop.  */
246458186Selan           if (translate)
246558186Selan 	    {
246658186Selan 	      do
246758186Selan 		{
246858186Selan 		  PREFETCH;
246958186Selan 		  if (translate[*d++] != *p++) goto fail;
247058186Selan 		}
247158186Selan 	      while (--mcnt);
247258186Selan 	    }
247358186Selan 	  else
247458186Selan 	    {
247558186Selan 	      do
247658186Selan 		{
247758186Selan 		  PREFETCH;
247858186Selan 		  if (*d++ != *p++) goto fail;
247958186Selan 		}
248058186Selan 	      while (--mcnt);
248158186Selan 	    }
248258186Selan 	  SET_REGS_MATCHED;
248358186Selan           break;
248458186Selan 	}
248558186Selan       continue;  /* Successfully executed one pattern command; keep going.  */
248658186Selan 
248758186Selan     /* Jump here if any matching operation fails. */
248858186Selan     fail:
248958186Selan       if (stackp != stackb)
249058186Selan 	/* A restart point is known.  Restart there and pop it. */
249158186Selan 	{
249258186Selan           short last_used_reg, this_reg;
249358186Selan 
249458186Selan           /* If this failure point is from a dummy_failure_point, just
249558186Selan              skip it.  */
249658186Selan 	  if (!stackp[-2])
249758186Selan             {
249858186Selan               POP_FAILURE_POINT ();
249958186Selan               goto fail;
250058186Selan             }
250158186Selan 
250258186Selan           d = *--stackp;
250358186Selan 	  p = *--stackp;
250458186Selan           if (d >= string1 && d <= end1)
250558186Selan 	    dend = end_match_1;
250658186Selan           /* Restore register info.  */
250758186Selan           last_used_reg = (short) *--stackp;
250858186Selan 
250958186Selan           /* Make the ones that weren't saved -1 or 0 again.  */
251058186Selan           for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
251158186Selan             {
251258186Selan               regend[this_reg] = (unsigned char *) -1;
251358186Selan               regstart[this_reg] = (unsigned char *) -1;
251458186Selan               IS_ACTIVE (reg_info[this_reg]) = 0;
251558186Selan               MATCHED_SOMETHING (reg_info[this_reg]) = 0;
251658186Selan             }
251758186Selan 
251858186Selan           /* And restore the rest from the stack.  */
251958186Selan           for ( ; this_reg > 0; this_reg--)
252058186Selan             {
252158186Selan               reg_info[this_reg] = *(struct register_info *) *--stackp;
252258186Selan               regend[this_reg] = *--stackp;
252358186Selan               regstart[this_reg] = *--stackp;
252458186Selan             }
252558186Selan 	}
252658186Selan       else
252758186Selan         break;   /* Matching at this starting point really fails.  */
252858186Selan     }
252958186Selan 
253058186Selan   if (best_regs_set)
253158186Selan     goto restore_best_regs;
253258186Selan   return -1;         			/* Failure to match.  */
253358186Selan }
253458186Selan 
253558186Selan 
253658186Selan static int
bcmp_translate(char * s1,char * s2,int len,unsigned char * translate)253758186Selan bcmp_translate (char *s1, char *s2, int len, unsigned char *translate)
253858186Selan {
253958186Selan   register unsigned char *p1 = (unsigned char*)s1;
254058186Selan   register unsigned char *p2 = (unsigned char*)s2;
254158186Selan   while (len)
254258186Selan     {
254358186Selan       if (translate [*p1++] != translate [*p2++]) return 1;
254458186Selan       len--;
254558186Selan     }
254658186Selan   return 0;
254758186Selan }
254858186Selan 
254958186Selan 
255058186Selan 
255158186Selan /* Entry points compatible with 4.2 BSD regex library.  */
255258186Selan 
255358186Selan #ifndef emacs
255458186Selan 
255558186Selan static struct re_pattern_buffer re_comp_buf;
255658186Selan 
255758186Selan char *
re_comp(const char * s)2558*60385Selan re_comp (const char *s)
255958186Selan {
256058186Selan   if (!s)
256158186Selan     {
256258186Selan       if (!re_comp_buf.buffer)
256358186Selan 	return "No previous regular expression";
256458186Selan       return 0;
256558186Selan     }
256658186Selan 
256758186Selan   if (!re_comp_buf.buffer)
256858186Selan     {
256958186Selan       if (!(re_comp_buf.buffer = (char *) malloc (200)))
257058186Selan 	return "Memory exhausted";
257158186Selan       re_comp_buf.allocated = 200;
257258186Selan       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
257358186Selan 	return "Memory exhausted";
257458186Selan     }
257558186Selan   return re_compile_pattern (s, strlen (s), &re_comp_buf);
257658186Selan }
257758186Selan 
257858186Selan int
re_exec(const char * s)2579*60385Selan re_exec (const char *s)
258058186Selan {
258158186Selan   int len = strlen (s);
258258186Selan   return 0 <= re_search (&re_comp_buf, s, len, 0, len,
258358186Selan 			 (struct re_registers *) 0);
258458186Selan }
258558186Selan #endif /* not emacs */
258658186Selan 
258758186Selan 
258858186Selan 
258958186Selan #ifdef test
259058186Selan 
259158186Selan #include <stdio.h>
259258186Selan 
259358186Selan /* Indexed by a character, gives the upper case equivalent of the
259458186Selan    character.  */
259558186Selan 
259658186Selan char upcase[0400] =
259758186Selan   { 000, 001, 002, 003, 004, 005, 006, 007,
259858186Selan     010, 011, 012, 013, 014, 015, 016, 017,
259958186Selan     020, 021, 022, 023, 024, 025, 026, 027,
260058186Selan     030, 031, 032, 033, 034, 035, 036, 037,
260158186Selan     040, 041, 042, 043, 044, 045, 046, 047,
260258186Selan     050, 051, 052, 053, 054, 055, 056, 057,
260358186Selan     060, 061, 062, 063, 064, 065, 066, 067,
260458186Selan     070, 071, 072, 073, 074, 075, 076, 077,
260558186Selan     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
260658186Selan     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
260758186Selan     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
260858186Selan     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
260958186Selan     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
261058186Selan     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
261158186Selan     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
261258186Selan     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
261358186Selan     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
261458186Selan     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
261558186Selan     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
261658186Selan     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
261758186Selan     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
261858186Selan     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
261958186Selan     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
262058186Selan     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
262158186Selan     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
262258186Selan     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
262358186Selan     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
262458186Selan     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
262558186Selan     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
262658186Selan     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
262758186Selan     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
262858186Selan     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
262958186Selan   };
263058186Selan 
263158186Selan #ifdef canned
263258186Selan 
263358186Selan #include "tests.h"
263458186Selan 
263558186Selan typedef enum { extended_test, basic_test } test_type;
263658186Selan 
263758186Selan /* Use this to run the tests we've thought of.  */
263858186Selan 
263958186Selan void
main()264058186Selan main ()
264158186Selan {
264258186Selan   test_type t = extended_test;
264358186Selan 
264458186Selan   if (t == basic_test)
264558186Selan     {
264658186Selan       printf ("Running basic tests:\n\n");
264758186Selan       test_posix_basic ();
264858186Selan     }
264958186Selan   else if (t == extended_test)
265058186Selan     {
265158186Selan       printf ("Running extended tests:\n\n");
265258186Selan       test_posix_extended ();
265358186Selan     }
265458186Selan }
265558186Selan 
265658186Selan #else /* not canned */
265758186Selan 
265858186Selan /* Use this to run interactive tests.  */
265958186Selan 
266058186Selan void
main(int argc,char ** argv)266158186Selan main (int argc, char **argv)
266258186Selan {
266358186Selan   char pat[80];
266458186Selan   struct re_pattern_buffer buf;
266558186Selan   int i;
266658186Selan   char c;
266758186Selan   char fastmap[(1 << BYTEWIDTH)];
266858186Selan 
266958186Selan   /* Allow a command argument to specify the style of syntax.  */
267058186Selan   if (argc > 1)
267158186Selan     obscure_syntax = atoi (argv[1]);
267258186Selan 
267358186Selan   buf.allocated = 40;
267458186Selan   buf.buffer = (char *) malloc (buf.allocated);
267558186Selan   buf.fastmap = fastmap;
267658186Selan   buf.translate = upcase;
267758186Selan 
267858186Selan   while (1)
267958186Selan     {
268058186Selan       gets (pat);
268158186Selan 
268258186Selan       if (*pat)
268358186Selan 	{
268458186Selan           re_compile_pattern (pat, strlen(pat), &buf);
268558186Selan 
268658186Selan 	  for (i = 0; i < buf.used; i++)
268758186Selan 	    printchar (buf.buffer[i]);
268858186Selan 
268958186Selan 	  putchar ('\n');
269058186Selan 
269158186Selan 	  printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
269258186Selan 
269358186Selan 	  re_compile_fastmap (&buf);
269458186Selan 	  printf ("Allowed by fastmap: ");
269558186Selan 	  for (i = 0; i < (1 << BYTEWIDTH); i++)
269658186Selan 	    if (fastmap[i]) printchar (i);
269758186Selan 	  putchar ('\n');
269858186Selan 	}
269958186Selan 
270058186Selan       gets (pat);	/* Now read the string to match against */
270158186Selan 
270258186Selan       i = re_match (&buf, pat, strlen (pat), 0, 0);
270358186Selan       printf ("Match value %d.\n", i);
270458186Selan     }
270558186Selan }
270658186Selan 
270758186Selan #endif
270858186Selan 
270958186Selan 
271058186Selan #ifdef NOTDEF
271158186Selan void
print_buf(struct re_pattern_buffer * bufpbufp)271258186Selan print_buf (struct re_pattern_buffer *bufpbufp)
271358186Selan {
271458186Selan   int i;
271558186Selan 
271658186Selan   printf ("buf is :\n----------------\n");
271758186Selan   for (i = 0; i < bufp->used; i++)
271858186Selan     printchar (bufp->buffer[i]);
271958186Selan 
272058186Selan   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
272158186Selan 
272258186Selan   printf ("Allowed by fastmap: ");
272358186Selan   for (i = 0; i < (1 << BYTEWIDTH); i++)
272458186Selan     if (bufp->fastmap[i])
272558186Selan       printchar (i);
272658186Selan   printf ("\nAllowed by translate: ");
272758186Selan   if (bufp->translate)
272858186Selan     for (i = 0; i < (1 << BYTEWIDTH); i++)
272958186Selan       if (bufp->translate[i])
273058186Selan 	printchar (i);
273158186Selan   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
273258186Selan   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
273358186Selan }
273458186Selan #endif /* NOTDEF */
273558186Selan 
273658186Selan void
printchar(char c)273758186Selan printchar (char c)
273858186Selan {
273958186Selan   if (c < 040 || c >= 0177)
274058186Selan     {
274158186Selan       putchar ('\\');
274258186Selan       putchar (((c >> 6) & 3) + '0');
274358186Selan       putchar (((c >> 3) & 7) + '0');
274458186Selan       putchar ((c & 7) + '0');
274558186Selan     }
274658186Selan   else
274758186Selan     putchar (c);
274858186Selan }
274958186Selan 
275058186Selan void
error(char * string)275158186Selan error (char *string)
275258186Selan {
275358186Selan   puts (string);
275458186Selan   exit (1);
275558186Selan }
275658186Selan #endif /* test */
2757