158186Selan /* Extended regular expression matching and search library.
258186Selan Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
358186Selan
458186Selan This program is free software; you can redistribute it and/or modify
558186Selan it under the terms of the GNU General Public License as published by
658186Selan the Free Software Foundation; either version 2, or (at your option)
758186Selan any later version.
858186Selan
958186Selan This program is distributed in the hope that it will be useful,
1058186Selan but WITHOUT ANY WARRANTY; without even the implied warranty of
1158186Selan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1258186Selan GNU General Public License for more details.
1358186Selan
1458186Selan You should have received a copy of the GNU General Public License
1558186Selan along with this program; if not, write to the Free Software
1658186Selan Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
1758186Selan
1858186Selan
1958186Selan // This is a translation into C++ of regex.c, the GNU regexp package.
2058186Selan
2158186Selan /* To test, compile with -Dtest. This Dtestable feature turns this into
2258186Selan a self-contained program which reads a pattern, describes how it
2358186Selan compiles, then reads a string and searches for it.
2458186Selan
2558186Selan On the other hand, if you compile with both -Dtest and -Dcanned you
2658186Selan can run some tests we've already thought of. */
2758186Selan
2858186Selan /* AIX requires the alloca decl to be the first thing in the file. */
2958186Selan #ifdef __GNUC__
30*60385Selan #define alloca alloca
3158186Selan #else
3258186Selan #ifdef sparc
3358186Selan #include <alloca.h>
3458186Selan #else
3558186Selan #ifdef _AIX
3658186Selan #pragma alloca
3758186Selan #else
3858186Selan char *alloca ();
3958186Selan #endif
4058186Selan #endif
4158186Selan #endif
4258186Selan
4358186Selan #ifdef emacs
4458186Selan
4558186Selan /* The `emacs' switch turns on certain special matching commands
4658186Selan that make sense only in emacs. */
4758186Selan
4858186Selan #include "config.h"
4958186Selan #include "lisp.h"
5058186Selan #include "buffer.h"
5158186Selan #include "syntax.h"
5258186Selan
5358186Selan #else /* not emacs */
5458186Selan
5558186Selan #include <_G_config.h>
5658186Selan #include <string.h>
5758186Selan #include <stdlib.h>
5858186Selan
5958186Selan /* Define the syntax stuff, so we can do the \<, \>, etc. */
6058186Selan
6158186Selan /* This must be nonzero for the wordchar and notwordchar pattern
6258186Selan commands in re_match_2. */
6358186Selan #ifndef Sword
6458186Selan #define Sword 1
6558186Selan #endif
6658186Selan
6758186Selan #define SYNTAX(c) re_syntax_table[c]
6858186Selan
6958186Selan
7058186Selan #ifdef SYNTAX_TABLE
7158186Selan
7258186Selan char *re_syntax_table;
7358186Selan
7458186Selan #else /* not SYNTAX_TABLE */
7558186Selan
7658186Selan static char re_syntax_table[256];
7758186Selan
7858186Selan
7958186Selan static void
init_syntax_once()8058186Selan init_syntax_once ()
8158186Selan {
8258186Selan register int c;
8358186Selan static int done = 0;
8458186Selan
8558186Selan if (done)
8658186Selan return;
8758186Selan
8858186Selan memset (re_syntax_table, 0, sizeof re_syntax_table);
8958186Selan
9058186Selan for (c = 'a'; c <= 'z'; c++)
9158186Selan re_syntax_table[c] = Sword;
9258186Selan
9358186Selan for (c = 'A'; c <= 'Z'; c++)
9458186Selan re_syntax_table[c] = Sword;
9558186Selan
9658186Selan for (c = '0'; c <= '9'; c++)
9758186Selan re_syntax_table[c] = Sword;
9858186Selan
9958186Selan done = 1;
10058186Selan }
10158186Selan
10258186Selan #endif /* SYNTAX_TABLE */
10358186Selan #endif /* emacs */
10458186Selan
10558186Selan /* We write fatal error messages on standard error. */
10658186Selan #include <stdio.h>
10758186Selan
10858186Selan /* isalpha(3) etc. are used for the character classes. */
10958186Selan #include <ctype.h>
11058186Selan /* Sequents are missing isgraph. */
11158186Selan #ifndef isgraph
11258186Selan #define isgraph(c) (isprint((c)) && !isspace((c)))
11358186Selan #endif
11458186Selan
11558186Selan /* Get the interface, including the syntax bits. */
11658186Selan #include "regex.h"
11758186Selan
11858186Selan
11958186Selan /* These are the command codes that appear in compiled regular
12058186Selan expressions, one per byte. Some command codes are followed by
12158186Selan argument bytes. A command code can specify any interpretation
12258186Selan whatsoever for its arguments. Zero-bytes may appear in the compiled
12358186Selan regular expression.
12458186Selan
12558186Selan The value of `exactn' is needed in search.c (search_buffer) in emacs.
12658186Selan So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
12758186Selan `exactn' we use here must also be 1. */
12858186Selan
12958186Selan enum regexpcode
13058186Selan {
13158186Selan unused=0,
13258186Selan exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
13358186Selan begline, /* Fail unless at beginning of line. */
13458186Selan endline, /* Fail unless at end of line. */
13558186Selan jump, /* Followed by two bytes giving relative address to jump to. */
13658186Selan on_failure_jump, /* Followed by two bytes giving relative address of
13758186Selan place to resume at in case of failure. */
13858186Selan finalize_jump, /* Throw away latest failure point and then jump to
13958186Selan address. */
14058186Selan maybe_finalize_jump, /* Like jump but finalize if safe to do so.
14158186Selan This is used to jump back to the beginning
14258186Selan of a repeat. If the command that follows
14358186Selan this jump is clearly incompatible with the
14458186Selan one at the beginning of the repeat, such that
14558186Selan we can be sure that there is no use backtracking
14658186Selan out of repetitions already completed,
14758186Selan then we finalize. */
14858186Selan dummy_failure_jump, /* Jump, and push a dummy failure point. This
14958186Selan failure point will be thrown away if an attempt
15058186Selan is made to use it for a failure. A + construct
15158186Selan makes this before the first repeat. Also
15258186Selan use it as an intermediary kind of jump when
15358186Selan compiling an or construct. */
15458186Selan succeed_n, /* Used like on_failure_jump except has to succeed n times;
15558186Selan then gets turned into an on_failure_jump. The relative
15658186Selan address following it is useless until then. The
15758186Selan address is followed by two bytes containing n. */
15858186Selan jump_n, /* Similar to jump, but jump n times only; also the relative
15958186Selan address following is in turn followed by yet two more bytes
16058186Selan containing n. */
16158186Selan set_number_at, /* Set the following relative location to the
16258186Selan subsequent number. */
16358186Selan anychar, /* Matches any (more or less) one character. */
16458186Selan charset, /* Matches any one char belonging to specified set.
16558186Selan First following byte is number of bitmap bytes.
16658186Selan Then come bytes for a bitmap saying which chars are in.
16758186Selan Bits in each byte are ordered low-bit-first.
16858186Selan A character is in the set if its bit is 1.
16958186Selan A character too large to have a bit in the map
17058186Selan is automatically not in the set. */
17158186Selan charset_not, /* Same parameters as charset, but match any character
17258186Selan that is not one of those specified. */
17358186Selan start_memory, /* Start remembering the text that is matched, for
17458186Selan storing in a memory register. Followed by one
17558186Selan byte containing the register number. Register numbers
17658186Selan must be in the range 0 through RE_NREGS. */
17758186Selan stop_memory, /* Stop remembering the text that is matched
17858186Selan and store it in a memory register. Followed by
17958186Selan one byte containing the register number. Register
18058186Selan numbers must be in the range 0 through RE_NREGS. */
18158186Selan duplicate, /* Match a duplicate of something remembered.
18258186Selan Followed by one byte containing the index of the memory
18358186Selan register. */
18458186Selan #ifdef emacs
18558186Selan before_dot, /* Succeeds if before point. */
18658186Selan at_dot, /* Succeeds if at point. */
18758186Selan after_dot, /* Succeeds if after point. */
18858186Selan #endif
18958186Selan begbuf, /* Succeeds if at beginning of buffer. */
19058186Selan endbuf, /* Succeeds if at end of buffer. */
19158186Selan wordchar, /* Matches any word-constituent character. */
19258186Selan notwordchar, /* Matches any char that is not a word-constituent. */
19358186Selan wordbeg, /* Succeeds if at word beginning. */
19458186Selan wordend, /* Succeeds if at word end. */
19558186Selan wordbound, /* Succeeds if at a word boundary. */
19658186Selan notwordbound,/* Succeeds if not at a word boundary. */
19758186Selan #ifdef emacs
19858186Selan syntaxspec, /* Matches any character whose syntax is specified.
19958186Selan followed by a byte which contains a syntax code,
20058186Selan e.g., Sword. */
20158186Selan notsyntaxspec /* Matches any character whose syntax differs from
20258186Selan that specified. */
20358186Selan #endif
20458186Selan };
20558186Selan
20658186Selan
20758186Selan /* Number of failure points to allocate space for initially,
20858186Selan when matching. If this number is exceeded, more space is allocated,
20958186Selan so it is not a hard limit. */
21058186Selan
21158186Selan #ifndef NFAILURES
21258186Selan #define NFAILURES 80
21358186Selan #endif
21458186Selan
21558186Selan
21658186Selan #ifndef SIGN_EXTEND_CHAR
21758186Selan #ifdef __STDC__
21858186Selan #define SIGN_EXTEND_CHAR(c) ((signed char)(c))
21958186Selan #else
22058186Selan #define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele. */
22158186Selan #endif
22258186Selan #endif /* not SIGN_EXTEND_CHAR */
22358186Selan
22458186Selan /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
22558186Selan #define STORE_NUMBER(destination, number) \
22658186Selan { (destination)[0] = (number) & 0377; \
22758186Selan (destination)[1] = (number) >> 8; }
22858186Selan
22958186Selan /* Same as STORE_NUMBER, except increment the destination pointer to
23058186Selan the byte after where the number is stored. Watch out that values for
23158186Selan DESTINATION such as p + 1 won't work, whereas p will. */
23258186Selan #define STORE_NUMBER_AND_INCR(destination, number) \
23358186Selan { STORE_NUMBER(destination, number); \
23458186Selan (destination) += 2; }
23558186Selan
23658186Selan
23758186Selan /* Put into DESTINATION a number stored in two contingous bytes starting
23858186Selan at SOURCE. */
23958186Selan #define EXTRACT_NUMBER(destination, source) \
24058186Selan { (destination) = *(source) & 0377; \
24158186Selan (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
24258186Selan
24358186Selan /* Same as EXTRACT_NUMBER, except increment the pointer for source to
24458186Selan point to second byte of SOURCE. Note that SOURCE has to be a value
24558186Selan such as p, not, e.g., p + 1. */
24658186Selan #define EXTRACT_NUMBER_AND_INCR(destination, source) \
24758186Selan { EXTRACT_NUMBER (destination, source); \
24858186Selan (source) += 2; }
24958186Selan
25058186Selan
25158186Selan /* Specify the precise syntax of regexps for compilation. This provides
25258186Selan for compatibility for various utilities which historically have
25358186Selan different, incompatible syntaxes.
25458186Selan
25558186Selan The argument SYNTAX is a bit-mask comprised of the various bits
25658186Selan defined in regex.h. */
25758186Selan
25858186Selan int
re_set_syntax(int syntax)25958186Selan re_set_syntax (int syntax)
26058186Selan {
26158186Selan int ret;
26258186Selan
26358186Selan ret = obscure_syntax;
26458186Selan obscure_syntax = syntax;
26558186Selan return ret;
26658186Selan }
26758186Selan
26858186Selan /* Set by re_set_syntax to the current regexp syntax to recognize. */
26958186Selan int obscure_syntax = 0;
27058186Selan
27158186Selan
27258186Selan
27358186Selan /* Macros for re_compile_pattern, which is found below these definitions. */
27458186Selan
27558186Selan #define CHAR_CLASS_MAX_LENGTH 6
27658186Selan
27758186Selan /* Fetch the next character in the uncompiled pattern, translating it if
27858186Selan necessary. */
27958186Selan #define PATFETCH(c) \
28058186Selan {if (p == pend) goto end_of_pattern; \
28158186Selan c = * (const unsigned char *) p++; \
28258186Selan if (translate) c = translate[c]; }
28358186Selan
28458186Selan /* Fetch the next character in the uncompiled pattern, with no
28558186Selan translation. */
28658186Selan #define PATFETCH_RAW(c) \
28758186Selan {if (p == pend) goto end_of_pattern; \
28858186Selan c = * (const unsigned char *) p++; }
28958186Selan
29058186Selan #define PATUNFETCH p--
29158186Selan
29258186Selan
29358186Selan /* If the buffer isn't allocated when it comes in, use this. */
29458186Selan #define INIT_BUF_SIZE 28
29558186Selan
29658186Selan /* Make sure we have at least N more bytes of space in buffer. */
29758186Selan #define GET_BUFFER_SPACE(n) \
29858186Selan { \
29958186Selan while (b - bufp->buffer + (n) >= bufp->allocated) \
30058186Selan EXTEND_BUFFER; \
30158186Selan }
30258186Selan
30358186Selan /* Make sure we have one more byte of buffer space and then add CH to it. */
30458186Selan #define BUFPUSH(ch) \
30558186Selan { \
30658186Selan GET_BUFFER_SPACE (1); \
30758186Selan *b++ = (char) (ch); \
30858186Selan }
30958186Selan
31058186Selan /* Extend the buffer by twice its current size via reallociation and
31158186Selan reset the pointers that pointed into the old allocation to point to
31258186Selan the correct places in the new allocation. If extending the buffer
31358186Selan results in it being larger than 1 << 16, then flag memory exhausted. */
31458186Selan #define EXTEND_BUFFER \
31558186Selan { char *old_buffer = bufp->buffer; \
31658186Selan if (bufp->allocated == (1L<<16)) goto too_big; \
31758186Selan bufp->allocated *= 2; \
31858186Selan if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
31958186Selan bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
32058186Selan if (bufp->buffer == 0) \
32158186Selan goto memory_exhausted; \
32258186Selan b = (b - old_buffer) + bufp->buffer; \
32358186Selan if (fixup_jump) \
32458186Selan fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
32558186Selan if (laststart) \
32658186Selan laststart = (laststart - old_buffer) + bufp->buffer; \
32758186Selan begalt = (begalt - old_buffer) + bufp->buffer; \
32858186Selan if (pending_exact) \
32958186Selan pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
33058186Selan }
33158186Selan
33258186Selan /* Set the bit for character C in a character set list. */
33358186Selan #define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
33458186Selan
33558186Selan /* Get the next unsigned number in the uncompiled pattern. */
33658186Selan #define GET_UNSIGNED_NUMBER(num) \
33758186Selan { if (p != pend) \
33858186Selan { \
33958186Selan PATFETCH (c); \
34058186Selan while (isdigit (c)) \
34158186Selan { \
34258186Selan if (num < 0) \
34358186Selan num = 0; \
34458186Selan num = num * 10 + c - '0'; \
34558186Selan if (p == pend) \
34658186Selan break; \
34758186Selan PATFETCH (c); \
34858186Selan } \
34958186Selan } \
35058186Selan }
35158186Selan
35258186Selan /* Subroutines for re_compile_pattern. */
35358186Selan static void store_jump (char *from, char opcode, char *to);
35458186Selan static void insert_jump (char op, char *from, char *to, char *current_end);
35558186Selan static void store_jump_n (char *from, char opcode, char *to, unsigned n);
35658186Selan static void insert_jump_n (char, char *, char *, char *, unsigned);
35758186Selan static void insert_op_2 (char, char *, char *_end, int, int);
35858186Selan
35958186Selan
36058186Selan /* re_compile_pattern takes a regular-expression string
36158186Selan and converts it into a buffer full of byte commands for matching.
36258186Selan
36358186Selan PATTERN is the address of the pattern string
36458186Selan SIZE is the length of it.
36558186Selan BUFP is a struct re_pattern_buffer * which points to the info
36658186Selan on where to store the byte commands.
36758186Selan This structure contains a char * which points to the
36858186Selan actual space, which should have been obtained with malloc.
36958186Selan re_compile_pattern may use realloc to grow the buffer space.
37058186Selan
37158186Selan The number of bytes of commands can be found out by looking in
37258186Selan the `struct re_pattern_buffer' that bufp pointed to, after
37358186Selan re_compile_pattern returns. */
37458186Selan
37558186Selan char *
re_compile_pattern(const char * pattern,int size,struct re_pattern_buffer * bufp)37658186Selan re_compile_pattern (const char *pattern, int size, struct re_pattern_buffer *bufp)
37758186Selan {
37858186Selan register char *b = bufp->buffer;
37958186Selan register const char *p = pattern;
38058186Selan const char *pend = pattern + size;
38158186Selan register unsigned c, c1;
38258186Selan const char *p1;
38358186Selan unsigned char *translate = (unsigned char *) bufp->translate;
38458186Selan
38558186Selan /* Address of the count-byte of the most recently inserted `exactn'
38658186Selan command. This makes it possible to tell whether a new exact-match
38758186Selan character can be added to that command or requires a new `exactn'
38858186Selan command. */
38958186Selan
39058186Selan char *pending_exact = 0;
39158186Selan
39258186Selan /* Address of the place where a forward-jump should go to the end of
39358186Selan the containing expression. Each alternative of an `or', except the
39458186Selan last, ends with a forward-jump of this sort. */
39558186Selan
39658186Selan char *fixup_jump = 0;
39758186Selan
39858186Selan /* Address of start of the most recently finished expression.
39958186Selan This tells postfix * where to find the start of its operand. */
40058186Selan
40158186Selan char *laststart = 0;
40258186Selan
40358186Selan /* In processing a repeat, 1 means zero matches is allowed. */
40458186Selan
40558186Selan char zero_times_ok;
40658186Selan
40758186Selan /* In processing a repeat, 1 means many matches is allowed. */
40858186Selan
40958186Selan char many_times_ok;
41058186Selan
41158186Selan /* Address of beginning of regexp, or inside of last \(. */
41258186Selan
41358186Selan char *begalt = b;
41458186Selan
41558186Selan /* In processing an interval, at least this many matches must be made. */
41658186Selan int lower_bound;
41758186Selan
41858186Selan /* In processing an interval, at most this many matches can be made. */
41958186Selan int upper_bound;
42058186Selan
42158186Selan /* Place in pattern (i.e., the {) to which to go back if the interval
42258186Selan is invalid. */
42358186Selan const char *beg_interval = 0;
42458186Selan
42558186Selan /* Stack of information saved by \( and restored by \).
42658186Selan Four stack elements are pushed by each \(:
42758186Selan First, the value of b.
42858186Selan Second, the value of fixup_jump.
42958186Selan Third, the value of regnum.
43058186Selan Fourth, the value of begalt. */
43158186Selan
43258186Selan int stackb[40];
43358186Selan int *stackp = stackb;
43458186Selan int *stacke = stackb + 40;
43558186Selan int *stackt;
43658186Selan
43758186Selan /* Counts \('s as they are encountered. Remembered for the matching \),
43858186Selan where it becomes the register number to put in the stop_memory
43958186Selan command. */
44058186Selan
44158186Selan unsigned regnum = 1;
44258186Selan
44358186Selan bufp->fastmap_accurate = 0;
44458186Selan
44558186Selan #ifndef emacs
44658186Selan #ifndef SYNTAX_TABLE
44758186Selan /* Initialize the syntax table. */
44858186Selan init_syntax_once();
44958186Selan #endif
45058186Selan #endif
45158186Selan
45258186Selan if (bufp->allocated == 0)
45358186Selan {
45458186Selan bufp->allocated = INIT_BUF_SIZE;
45558186Selan if (bufp->buffer)
45658186Selan /* EXTEND_BUFFER loses when bufp->allocated is 0. */
45758186Selan bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
45858186Selan else
45958186Selan /* Caller did not allocate a buffer. Do it for them. */
46058186Selan bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
46158186Selan if (!bufp->buffer) goto memory_exhausted;
46258186Selan begalt = b = bufp->buffer;
46358186Selan }
46458186Selan
46558186Selan while (p != pend)
46658186Selan {
46758186Selan PATFETCH (c);
46858186Selan
46958186Selan switch (c)
47058186Selan {
47158186Selan case '$':
47258186Selan {
47358186Selan const char *p1 = p;
47458186Selan /* When testing what follows the $,
47558186Selan look past the \-constructs that don't consume anything. */
47658186Selan if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
47758186Selan while (p1 != pend)
47858186Selan {
47958186Selan if (*p1 == '\\' && p1 + 1 != pend
48058186Selan && (p1[1] == '<' || p1[1] == '>'
48158186Selan || p1[1] == '`' || p1[1] == '\''
48258186Selan #ifdef emacs
48358186Selan || p1[1] == '='
48458186Selan #endif
48558186Selan || p1[1] == 'b' || p1[1] == 'B'))
48658186Selan p1 += 2;
48758186Selan else
48858186Selan break;
48958186Selan }
49058186Selan if (obscure_syntax & RE_TIGHT_VBAR)
49158186Selan {
49258186Selan if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
49358186Selan goto normal_char;
49458186Selan /* Make operand of last vbar end before this `$'. */
49558186Selan if (fixup_jump)
49658186Selan store_jump (fixup_jump, jump, b);
49758186Selan fixup_jump = 0;
49858186Selan BUFPUSH (endline);
49958186Selan break;
50058186Selan }
50158186Selan /* $ means succeed if at end of line, but only in special contexts.
50258186Selan If validly in the middle of a pattern, it is a normal character. */
50358186Selan
50458186Selan if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
50558186Selan goto invalid_pattern;
50658186Selan if (p1 == pend || *p1 == '\n'
50758186Selan || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
50858186Selan || (obscure_syntax & RE_NO_BK_PARENS
50958186Selan ? *p1 == ')'
51058186Selan : *p1 == '\\' && p1[1] == ')')
51158186Selan || (obscure_syntax & RE_NO_BK_VBAR
51258186Selan ? *p1 == '|'
51358186Selan : *p1 == '\\' && p1[1] == '|'))
51458186Selan {
51558186Selan BUFPUSH (endline);
51658186Selan break;
51758186Selan }
51858186Selan goto normal_char;
51958186Selan }
52058186Selan case '^':
52158186Selan /* ^ means succeed if at beg of line, but only if no preceding
52258186Selan pattern. */
52358186Selan
52458186Selan if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
52558186Selan goto invalid_pattern;
52658186Selan if (laststart && p - 2 >= pattern && p[-2] != '\n'
52758186Selan && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
52858186Selan goto normal_char;
52958186Selan if (obscure_syntax & RE_TIGHT_VBAR)
53058186Selan {
53158186Selan if (p != pattern + 1
53258186Selan && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
53358186Selan goto normal_char;
53458186Selan BUFPUSH (begline);
53558186Selan begalt = b;
53658186Selan }
53758186Selan else
53858186Selan BUFPUSH (begline);
53958186Selan break;
54058186Selan
54158186Selan case '+':
54258186Selan case '?':
54358186Selan if ((obscure_syntax & RE_BK_PLUS_QM)
54458186Selan || (obscure_syntax & RE_LIMITED_OPS))
54558186Selan goto normal_char;
54658186Selan handle_plus:
54758186Selan case '*':
54858186Selan /* If there is no previous pattern, char not special. */
54958186Selan if (!laststart)
55058186Selan {
55158186Selan if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
55258186Selan goto invalid_pattern;
55358186Selan else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
55458186Selan goto normal_char;
55558186Selan }
55658186Selan /* If there is a sequence of repetition chars,
55758186Selan collapse it down to just one. */
55858186Selan zero_times_ok = 0;
55958186Selan many_times_ok = 0;
56058186Selan while (1)
56158186Selan {
56258186Selan zero_times_ok |= c != '+';
56358186Selan many_times_ok |= c != '?';
56458186Selan if (p == pend)
56558186Selan break;
56658186Selan PATFETCH (c);
56758186Selan if (c == '*')
56858186Selan ;
56958186Selan else if (!(obscure_syntax & RE_BK_PLUS_QM)
57058186Selan && (c == '+' || c == '?'))
57158186Selan ;
57258186Selan else if ((obscure_syntax & RE_BK_PLUS_QM)
57358186Selan && c == '\\')
57458186Selan {
57558186Selan int c1;
57658186Selan PATFETCH (c1);
57758186Selan if (!(c1 == '+' || c1 == '?'))
57858186Selan {
57958186Selan PATUNFETCH;
58058186Selan PATUNFETCH;
58158186Selan break;
58258186Selan }
58358186Selan c = c1;
58458186Selan }
58558186Selan else
58658186Selan {
58758186Selan PATUNFETCH;
58858186Selan break;
58958186Selan }
59058186Selan }
59158186Selan
59258186Selan /* Star, etc. applied to an empty pattern is equivalent
59358186Selan to an empty pattern. */
59458186Selan if (!laststart)
59558186Selan break;
59658186Selan
59758186Selan /* Now we know whether or not zero matches is allowed
59858186Selan and also whether or not two or more matches is allowed. */
59958186Selan if (many_times_ok)
60058186Selan {
60158186Selan /* If more than one repetition is allowed, put in at the
60258186Selan end a backward relative jump from b to before the next
60358186Selan jump we're going to put in below (which jumps from
60458186Selan laststart to after this jump). */
60558186Selan GET_BUFFER_SPACE (3);
60658186Selan store_jump (b, maybe_finalize_jump, laststart - 3);
60758186Selan b += 3; /* Because store_jump put stuff here. */
60858186Selan }
60958186Selan /* On failure, jump from laststart to b + 3, which will be the
61058186Selan end of the buffer after this jump is inserted. */
61158186Selan GET_BUFFER_SPACE (3);
61258186Selan insert_jump (on_failure_jump, laststart, b + 3, b);
61358186Selan pending_exact = 0;
61458186Selan b += 3;
61558186Selan if (!zero_times_ok)
61658186Selan {
61758186Selan /* At least one repetition is required, so insert a
61858186Selan dummy-failure before the initial on-failure-jump
61958186Selan instruction of the loop. This effects a skip over that
62058186Selan instruction the first time we hit that loop. */
62158186Selan GET_BUFFER_SPACE (6);
62258186Selan insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
62358186Selan b += 3;
62458186Selan }
62558186Selan break;
62658186Selan
62758186Selan case '.':
62858186Selan laststart = b;
62958186Selan BUFPUSH (anychar);
63058186Selan break;
63158186Selan
63258186Selan case '[':
63358186Selan if (p == pend)
63458186Selan goto invalid_pattern;
63558186Selan while (b - bufp->buffer
63658186Selan > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
63758186Selan EXTEND_BUFFER;
63858186Selan
63958186Selan laststart = b;
64058186Selan if (*p == '^')
64158186Selan {
64258186Selan BUFPUSH (charset_not);
64358186Selan p++;
64458186Selan }
64558186Selan else
64658186Selan BUFPUSH (charset);
64758186Selan p1 = p;
64858186Selan
64958186Selan BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
65058186Selan /* Clear the whole map */
65158186Selan memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
65258186Selan
65358186Selan if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
65458186Selan SET_LIST_BIT ('\n');
65558186Selan
65658186Selan
65758186Selan /* Read in characters and ranges, setting map bits. */
65858186Selan while (1)
65958186Selan {
66058186Selan /* Don't translate while fetching, in case it's a range bound.
66158186Selan When we set the bit for the character, we translate it. */
66258186Selan PATFETCH_RAW (c);
66358186Selan
66458186Selan /* If set, \ escapes characters when inside [...]. */
66558186Selan if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
66658186Selan {
66758186Selan PATFETCH(c1);
66858186Selan SET_LIST_BIT (c1);
66958186Selan continue;
67058186Selan }
67158186Selan if (c == ']')
67258186Selan {
67358186Selan if (p == p1 + 1)
67458186Selan {
67558186Selan /* If this is an empty bracket expression. */
67658186Selan if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
67758186Selan && p == pend)
67858186Selan goto invalid_pattern;
67958186Selan }
68058186Selan else
68158186Selan /* Stop if this isn't merely a ] inside a bracket
68258186Selan expression, but rather the end of a bracket
68358186Selan expression. */
68458186Selan break;
68558186Selan }
68658186Selan /* Get a range. */
68758186Selan if (p[0] == '-' && p[1] != ']')
68858186Selan {
68958186Selan PATFETCH (c1);
69058186Selan /* Don't translate the range bounds while fetching them. */
69158186Selan PATFETCH_RAW (c1);
69258186Selan
69358186Selan if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
69458186Selan goto invalid_pattern;
69558186Selan
69658186Selan if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
69758186Selan && c1 == '-' && *p != ']')
69858186Selan goto invalid_pattern;
69958186Selan
70058186Selan while (c <= c1)
70158186Selan {
70258186Selan /* Translate each char that's in the range. */
70358186Selan if (translate)
70458186Selan SET_LIST_BIT (translate[c]);
70558186Selan else
70658186Selan SET_LIST_BIT (c);
70758186Selan c++;
70858186Selan }
70958186Selan }
71058186Selan else if ((obscure_syntax & RE_CHAR_CLASSES)
71158186Selan && c == '[' && p[0] == ':')
71258186Selan {
71358186Selan /* Longest valid character class word has six characters. */
71458186Selan char str[CHAR_CLASS_MAX_LENGTH];
71558186Selan PATFETCH (c);
71658186Selan c1 = 0;
71758186Selan /* If no ] at end. */
71858186Selan if (p == pend)
71958186Selan goto invalid_pattern;
72058186Selan while (1)
72158186Selan {
72258186Selan /* Don't translate the ``character class'' characters. */
72358186Selan PATFETCH_RAW (c);
72458186Selan if (c == ':' || c == ']' || p == pend
72558186Selan || c1 == CHAR_CLASS_MAX_LENGTH)
72658186Selan break;
72758186Selan str[c1++] = c;
72858186Selan }
72958186Selan str[c1] = '\0';
73058186Selan if (p == pend
73158186Selan || c == ']' /* End of the bracket expression. */
73258186Selan || p[0] != ']'
73358186Selan || p + 1 == pend
73458186Selan || (strcmp (str, "alpha") != 0
73558186Selan && strcmp (str, "upper") != 0
73658186Selan && strcmp (str, "lower") != 0
73758186Selan && strcmp (str, "digit") != 0
73858186Selan && strcmp (str, "alnum") != 0
73958186Selan && strcmp (str, "xdigit") != 0
74058186Selan && strcmp (str, "space") != 0
74158186Selan && strcmp (str, "print") != 0
74258186Selan && strcmp (str, "punct") != 0
74358186Selan && strcmp (str, "graph") != 0
74458186Selan && strcmp (str, "cntrl") != 0))
74558186Selan {
74658186Selan /* Undo the ending character, the letters, and leave
74758186Selan the leading : and [ (but set bits for them). */
74858186Selan c1++;
74958186Selan while (c1--)
75058186Selan PATUNFETCH;
75158186Selan SET_LIST_BIT ('[');
75258186Selan SET_LIST_BIT (':');
75358186Selan }
75458186Selan else
75558186Selan {
75658186Selan /* The ] at the end of the character class. */
75758186Selan PATFETCH (c);
75858186Selan if (c != ']')
75958186Selan goto invalid_pattern;
76058186Selan for (c = 0; c < (1 << BYTEWIDTH); c++)
76158186Selan {
76258186Selan if ((strcmp (str, "alpha") == 0 && isalpha (c))
76358186Selan || (strcmp (str, "upper") == 0 && isupper (c))
76458186Selan || (strcmp (str, "lower") == 0 && islower (c))
76558186Selan || (strcmp (str, "digit") == 0 && isdigit (c))
76658186Selan || (strcmp (str, "alnum") == 0 && isalnum (c))
76758186Selan || (strcmp (str, "xdigit") == 0 && isxdigit (c))
76858186Selan || (strcmp (str, "space") == 0 && isspace (c))
76958186Selan || (strcmp (str, "print") == 0 && isprint (c))
77058186Selan || (strcmp (str, "punct") == 0 && ispunct (c))
77158186Selan || (strcmp (str, "graph") == 0 && isgraph (c))
77258186Selan || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
77358186Selan SET_LIST_BIT (c);
77458186Selan }
77558186Selan }
77658186Selan }
77758186Selan else if (translate)
77858186Selan SET_LIST_BIT (translate[c]);
77958186Selan else
78058186Selan SET_LIST_BIT (c);
78158186Selan }
78258186Selan
78358186Selan /* Discard any character set/class bitmap bytes that are all
78458186Selan 0 at the end of the map. Decrement the map-length byte too. */
78558186Selan while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
78658186Selan b[-1]--;
78758186Selan b += b[-1];
78858186Selan break;
78958186Selan
79058186Selan case '(':
79158186Selan if (! (obscure_syntax & RE_NO_BK_PARENS))
79258186Selan goto normal_char;
79358186Selan else
79458186Selan goto handle_open;
79558186Selan
79658186Selan case ')':
79758186Selan if (! (obscure_syntax & RE_NO_BK_PARENS))
79858186Selan goto normal_char;
79958186Selan else
80058186Selan goto handle_close;
80158186Selan
80258186Selan case '\n':
80358186Selan if (! (obscure_syntax & RE_NEWLINE_OR))
80458186Selan goto normal_char;
80558186Selan else
80658186Selan goto handle_bar;
80758186Selan
80858186Selan case '|':
80958186Selan if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
81058186Selan && (! laststart || p == pend))
81158186Selan goto invalid_pattern;
81258186Selan else if (! (obscure_syntax & RE_NO_BK_VBAR))
81358186Selan goto normal_char;
81458186Selan else
81558186Selan goto handle_bar;
81658186Selan
81758186Selan case '{':
81858186Selan if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
81958186Selan && (obscure_syntax & RE_INTERVALS)))
82058186Selan goto normal_char;
82158186Selan else
82258186Selan goto handle_interval;
82358186Selan
82458186Selan case '\\':
82558186Selan if (p == pend) goto invalid_pattern;
82658186Selan PATFETCH_RAW (c);
82758186Selan switch (c)
82858186Selan {
82958186Selan case '(':
83058186Selan if (obscure_syntax & RE_NO_BK_PARENS)
83158186Selan goto normal_backsl;
83258186Selan handle_open:
83358186Selan if (stackp == stacke) goto nesting_too_deep;
83458186Selan
83558186Selan /* Laststart should point to the start_memory that we are about
83658186Selan to push (unless the pattern has RE_NREGS or more ('s). */
83758186Selan *stackp++ = b - bufp->buffer;
83858186Selan if (regnum < RE_NREGS)
83958186Selan {
84058186Selan BUFPUSH (start_memory);
84158186Selan BUFPUSH (regnum);
84258186Selan }
84358186Selan *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
84458186Selan *stackp++ = regnum++;
84558186Selan *stackp++ = begalt - bufp->buffer;
84658186Selan fixup_jump = 0;
84758186Selan laststart = 0;
84858186Selan begalt = b;
84958186Selan break;
85058186Selan
85158186Selan case ')':
85258186Selan if (obscure_syntax & RE_NO_BK_PARENS)
85358186Selan goto normal_backsl;
85458186Selan handle_close:
85558186Selan if (stackp == stackb) goto unmatched_close;
85658186Selan begalt = *--stackp + bufp->buffer;
85758186Selan if (fixup_jump)
85858186Selan store_jump (fixup_jump, jump, b);
85958186Selan if (stackp[-1] < RE_NREGS)
86058186Selan {
86158186Selan BUFPUSH (stop_memory);
86258186Selan BUFPUSH (stackp[-1]);
86358186Selan }
86458186Selan stackp -= 2;
86558186Selan fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
86658186Selan laststart = *--stackp + bufp->buffer;
86758186Selan break;
86858186Selan
86958186Selan case '|':
87058186Selan if ((obscure_syntax & RE_LIMITED_OPS)
87158186Selan || (obscure_syntax & RE_NO_BK_VBAR))
87258186Selan goto normal_backsl;
87358186Selan handle_bar:
87458186Selan if (obscure_syntax & RE_LIMITED_OPS)
87558186Selan goto normal_char;
87658186Selan /* Insert before the previous alternative a jump which
87758186Selan jumps to this alternative if the former fails. */
87858186Selan GET_BUFFER_SPACE (6);
87958186Selan insert_jump (on_failure_jump, begalt, b + 6, b);
88058186Selan pending_exact = 0;
88158186Selan b += 3;
88258186Selan /* The alternative before the previous alternative has a
88358186Selan jump after it which gets executed if it gets matched.
88458186Selan Adjust that jump so it will jump to the previous
88558186Selan alternative's analogous jump (put in below, which in
88658186Selan turn will jump to the next (if any) alternative's such
88758186Selan jump, etc.). The last such jump jumps to the correct
88858186Selan final destination. */
88958186Selan if (fixup_jump)
89058186Selan store_jump (fixup_jump, jump, b);
89158186Selan
89258186Selan /* Leave space for a jump after previous alternative---to be
89358186Selan filled in later. */
89458186Selan fixup_jump = b;
89558186Selan b += 3;
89658186Selan
89758186Selan laststart = 0;
89858186Selan begalt = b;
89958186Selan break;
90058186Selan
90158186Selan case '{':
90258186Selan if (! (obscure_syntax & RE_INTERVALS)
90358186Selan /* Let \{ be a literal. */
90458186Selan || ((obscure_syntax & RE_INTERVALS)
90558186Selan && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
90658186Selan /* If it's the string "\{". */
90758186Selan || (p - 2 == pattern && p == pend))
90858186Selan goto normal_backsl;
90958186Selan handle_interval:
91058186Selan beg_interval = p - 1; /* The {. */
91158186Selan /* If there is no previous pattern, this isn't an interval. */
91258186Selan if (!laststart)
91358186Selan {
91458186Selan if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
91558186Selan goto invalid_pattern;
91658186Selan else
91758186Selan goto normal_backsl;
91858186Selan }
91958186Selan /* It also isn't an interval if not preceded by an re
92058186Selan matching a single character or subexpression, or if
92158186Selan the current type of intervals can't handle back
92258186Selan references and the previous thing is a back reference. */
92358186Selan if (! (*laststart == anychar
92458186Selan || *laststart == charset
92558186Selan || *laststart == charset_not
92658186Selan || *laststart == start_memory
92758186Selan || (*laststart == exactn && laststart[1] == 1)
92858186Selan || (! (obscure_syntax & RE_NO_BK_REFS)
92958186Selan && *laststart == duplicate)))
93058186Selan {
93158186Selan if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
93258186Selan goto normal_char;
93358186Selan
93458186Selan /* Posix extended syntax is handled in previous
93558186Selan statement; this is for Posix basic syntax. */
93658186Selan if (obscure_syntax & RE_INTERVALS)
93758186Selan goto invalid_pattern;
93858186Selan
93958186Selan goto normal_backsl;
94058186Selan }
94158186Selan lower_bound = -1; /* So can see if are set. */
94258186Selan upper_bound = -1;
94358186Selan GET_UNSIGNED_NUMBER (lower_bound);
94458186Selan if (c == ',')
94558186Selan {
94658186Selan GET_UNSIGNED_NUMBER (upper_bound);
94758186Selan if (upper_bound < 0)
94858186Selan upper_bound = RE_DUP_MAX;
94958186Selan }
95058186Selan if (upper_bound < 0)
95158186Selan upper_bound = lower_bound;
95258186Selan if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
95358186Selan {
95458186Selan if (c != '\\')
95558186Selan goto invalid_pattern;
95658186Selan PATFETCH (c);
95758186Selan }
95858186Selan if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
95958186Selan || lower_bound > upper_bound
96058186Selan || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
96158186Selan && p != pend && *p == '{'))
96258186Selan {
96358186Selan if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
96458186Selan goto unfetch_interval;
96558186Selan else
96658186Selan goto invalid_pattern;
96758186Selan }
96858186Selan
96958186Selan /* If upper_bound is zero, don't want to succeed at all;
97058186Selan jump from laststart to b + 3, which will be the end of
97158186Selan the buffer after this jump is inserted. */
97258186Selan
97358186Selan if (upper_bound == 0)
97458186Selan {
97558186Selan GET_BUFFER_SPACE (3);
97658186Selan insert_jump (jump, laststart, b + 3, b);
97758186Selan b += 3;
97858186Selan }
97958186Selan
98058186Selan /* Otherwise, after lower_bound number of succeeds, jump
98158186Selan to after the jump_n which will be inserted at the end
98258186Selan of the buffer, and insert that jump_n. */
98358186Selan else
98458186Selan { /* Set to 5 if only one repetition is allowed and
98558186Selan hence no jump_n is inserted at the current end of
98658186Selan the buffer; then only space for the succeed_n is
98758186Selan needed. Otherwise, need space for both the
98858186Selan succeed_n and the jump_n. */
98958186Selan
99058186Selan unsigned slots_needed = upper_bound == 1 ? 5 : 10;
99158186Selan
99258186Selan GET_BUFFER_SPACE ((int) slots_needed);
99358186Selan /* Initialize the succeed_n to n, even though it will
99458186Selan be set by its attendant set_number_at, because
99558186Selan re_compile_fastmap will need to know it. Jump to
99658186Selan what the end of buffer will be after inserting
99758186Selan this succeed_n and possibly appending a jump_n. */
99858186Selan insert_jump_n (succeed_n, laststart, b + slots_needed,
99958186Selan b, lower_bound);
100058186Selan b += 5; /* Just increment for the succeed_n here. */
100158186Selan
100258186Selan /* More than one repetition is allowed, so put in at
100358186Selan the end of the buffer a backward jump from b to the
100458186Selan succeed_n we put in above. By the time we've gotten
100558186Selan to this jump when matching, we'll have matched once
100658186Selan already, so jump back only upper_bound - 1 times. */
100758186Selan
100858186Selan if (upper_bound > 1)
100958186Selan {
101058186Selan store_jump_n (b, jump_n, laststart, upper_bound - 1);
101158186Selan b += 5;
101258186Selan /* When hit this when matching, reset the
101358186Selan preceding jump_n's n to upper_bound - 1. */
101458186Selan BUFPUSH (set_number_at);
101558186Selan GET_BUFFER_SPACE (2);
101658186Selan STORE_NUMBER_AND_INCR (b, -5);
101758186Selan STORE_NUMBER_AND_INCR (b, upper_bound - 1);
101858186Selan }
101958186Selan /* When hit this when matching, set the succeed_n's n. */
102058186Selan GET_BUFFER_SPACE (5);
102158186Selan insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
102258186Selan b += 5;
102358186Selan }
102458186Selan pending_exact = 0;
102558186Selan beg_interval = 0;
102658186Selan break;
102758186Selan
102858186Selan
102958186Selan unfetch_interval:
103058186Selan /* If an invalid interval, match the characters as literals. */
103158186Selan if (beg_interval)
103258186Selan p = beg_interval;
103358186Selan else
103458186Selan {
103558186Selan fprintf (stderr,
103658186Selan "regex: no interval beginning to which to backtrack.\n");
103758186Selan exit (1);
103858186Selan }
103958186Selan
104058186Selan beg_interval = 0;
104158186Selan PATFETCH (c); /* normal_char expects char in `c'. */
104258186Selan goto normal_char;
104358186Selan break;
104458186Selan
104558186Selan #ifdef emacs
104658186Selan case '=':
104758186Selan BUFPUSH (at_dot);
104858186Selan break;
104958186Selan
105058186Selan case 's':
105158186Selan laststart = b;
105258186Selan BUFPUSH (syntaxspec);
105358186Selan PATFETCH (c);
105458186Selan BUFPUSH (syntax_spec_code[c]);
105558186Selan break;
105658186Selan
105758186Selan case 'S':
105858186Selan laststart = b;
105958186Selan BUFPUSH (notsyntaxspec);
106058186Selan PATFETCH (c);
106158186Selan BUFPUSH (syntax_spec_code[c]);
106258186Selan break;
106358186Selan #endif /* emacs */
106458186Selan
106558186Selan case 'w':
106658186Selan laststart = b;
106758186Selan BUFPUSH (wordchar);
106858186Selan break;
106958186Selan
107058186Selan case 'W':
107158186Selan laststart = b;
107258186Selan BUFPUSH (notwordchar);
107358186Selan break;
107458186Selan
107558186Selan case '<':
107658186Selan BUFPUSH (wordbeg);
107758186Selan break;
107858186Selan
107958186Selan case '>':
108058186Selan BUFPUSH (wordend);
108158186Selan break;
108258186Selan
108358186Selan case 'b':
108458186Selan BUFPUSH (wordbound);
108558186Selan break;
108658186Selan
108758186Selan case 'B':
108858186Selan BUFPUSH (notwordbound);
108958186Selan break;
109058186Selan
109158186Selan case '`':
109258186Selan BUFPUSH (begbuf);
109358186Selan break;
109458186Selan
109558186Selan case '\'':
109658186Selan BUFPUSH (endbuf);
109758186Selan break;
109858186Selan
109958186Selan case '1':
110058186Selan case '2':
110158186Selan case '3':
110258186Selan case '4':
110358186Selan case '5':
110458186Selan case '6':
110558186Selan case '7':
110658186Selan case '8':
110758186Selan case '9':
110858186Selan if (obscure_syntax & RE_NO_BK_REFS)
110958186Selan goto normal_char;
111058186Selan c1 = c - '0';
111158186Selan if (c1 >= regnum)
111258186Selan {
111358186Selan if (obscure_syntax & RE_NO_EMPTY_BK_REF)
111458186Selan goto invalid_pattern;
111558186Selan else
111658186Selan goto normal_char;
111758186Selan }
111858186Selan /* Can't back reference to a subexpression if inside of it. */
111958186Selan for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
112058186Selan if (*stackt == c1)
112158186Selan goto normal_char;
112258186Selan laststart = b;
112358186Selan BUFPUSH (duplicate);
112458186Selan BUFPUSH (c1);
112558186Selan break;
112658186Selan
112758186Selan case '+':
112858186Selan case '?':
112958186Selan if (obscure_syntax & RE_BK_PLUS_QM)
113058186Selan goto handle_plus;
113158186Selan else
113258186Selan goto normal_backsl;
113358186Selan break;
113458186Selan
113558186Selan default:
113658186Selan normal_backsl:
113758186Selan /* You might think it would be useful for \ to mean
113858186Selan not to translate; but if we don't translate it
113958186Selan it will never match anything. */
114058186Selan if (translate) c = translate[c];
114158186Selan goto normal_char;
114258186Selan }
114358186Selan break;
114458186Selan
114558186Selan default:
114658186Selan normal_char: /* Expects the character in `c'. */
114758186Selan if (!pending_exact || pending_exact + *pending_exact + 1 != b
114858186Selan || *pending_exact == 0177 || *p == '*' || *p == '^'
114958186Selan || ((obscure_syntax & RE_BK_PLUS_QM)
115058186Selan ? *p == '\\' && (p[1] == '+' || p[1] == '?')
115158186Selan : (*p == '+' || *p == '?'))
115258186Selan || ((obscure_syntax & RE_INTERVALS)
115358186Selan && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
115458186Selan ? *p == '{'
115558186Selan : (p[0] == '\\' && p[1] == '{'))))
115658186Selan {
115758186Selan laststart = b;
115858186Selan BUFPUSH (exactn);
115958186Selan pending_exact = b;
116058186Selan BUFPUSH (0);
116158186Selan }
116258186Selan BUFPUSH (c);
116358186Selan (*pending_exact)++;
116458186Selan }
116558186Selan }
116658186Selan
116758186Selan if (fixup_jump)
116858186Selan store_jump (fixup_jump, jump, b);
116958186Selan
117058186Selan if (stackp != stackb) goto unmatched_open;
117158186Selan
117258186Selan bufp->used = b - bufp->buffer;
117358186Selan return 0;
117458186Selan
117558186Selan invalid_pattern:
117658186Selan return "Invalid regular expression";
117758186Selan
117858186Selan unmatched_open:
117958186Selan return "Unmatched \\(";
118058186Selan
118158186Selan unmatched_close:
118258186Selan return "Unmatched \\)";
118358186Selan
118458186Selan end_of_pattern:
118558186Selan return "Premature end of regular expression";
118658186Selan
118758186Selan nesting_too_deep:
118858186Selan return "Nesting too deep";
118958186Selan
119058186Selan too_big:
119158186Selan return "Regular expression too big";
119258186Selan
119358186Selan memory_exhausted:
119458186Selan return "Memory exhausted";
119558186Selan }
119658186Selan
119758186Selan
119858186Selan /* Store a jump of the form <OPCODE> <relative address>.
119958186Selan Store in the location FROM a jump operation to jump to relative
120058186Selan address FROM - TO. OPCODE is the opcode to store. */
120158186Selan
120258186Selan static void
store_jump(char * from,char opcode,char * to)120358186Selan store_jump (char *from, char opcode, char *to)
120458186Selan {
120558186Selan from[0] = opcode;
120658186Selan STORE_NUMBER(from + 1, to - (from + 3));
120758186Selan }
120858186Selan
120958186Selan
121058186Selan /* Open up space before char FROM, and insert there a jump to TO.
121158186Selan CURRENT_END gives the end of the storage not in use, so we know
121258186Selan how much data to copy up. OP is the opcode of the jump to insert.
121358186Selan
121458186Selan If you call this function, you must zero out pending_exact. */
121558186Selan
121658186Selan static void
insert_jump(char op,char * from,char * to,char * current_end)121758186Selan insert_jump (char op, char *from, char *to, char *current_end)
121858186Selan {
121958186Selan register char *pfrom = current_end; /* Copy from here... */
122058186Selan register char *pto = current_end + 3; /* ...to here. */
122158186Selan
122258186Selan while (pfrom != from)
122358186Selan *--pto = *--pfrom;
122458186Selan store_jump (from, op, to);
122558186Selan }
122658186Selan
122758186Selan
122858186Selan /* Store a jump of the form <opcode> <relative address> <n> .
122958186Selan
123058186Selan Store in the location FROM a jump operation to jump to relative
123158186Selan address FROM - TO. OPCODE is the opcode to store, N is a number the
123258186Selan jump uses, say, to decide how many times to jump.
123358186Selan
123458186Selan If you call this function, you must zero out pending_exact. */
123558186Selan
123658186Selan static void
store_jump_n(char * from,char opcode,char * to,unsigned n)123758186Selan store_jump_n (char *from, char opcode, char *to, unsigned n)
123858186Selan {
123958186Selan from[0] = opcode;
124058186Selan STORE_NUMBER (from + 1, to - (from + 3));
124158186Selan STORE_NUMBER (from + 3, n);
124258186Selan }
124358186Selan
124458186Selan
124558186Selan /* Similar to insert_jump, but handles a jump which needs an extra
124658186Selan number to handle minimum and maximum cases. Open up space at
124758186Selan location FROM, and insert there a jump to TO. CURRENT_END gives the
124858186Selan end of the storage in use, so we know how much data to copy up. OP is
124958186Selan the opcode of the jump to insert.
125058186Selan
125158186Selan If you call this function, you must zero out pending_exact. */
125258186Selan
125358186Selan static void
insert_jump_n(char op,char * from,char * to,char * current_end,unsigned n)125458186Selan insert_jump_n (char op, char *from, char *to, char *current_end, unsigned n)
125558186Selan {
125658186Selan register char *pfrom = current_end; /* Copy from here... */
125758186Selan register char *pto = current_end + 5; /* ...to here. */
125858186Selan
125958186Selan while (pfrom != from)
126058186Selan *--pto = *--pfrom;
126158186Selan store_jump_n (from, op, to, n);
126258186Selan }
126358186Selan
126458186Selan
126558186Selan /* Open up space at location THERE, and insert operation OP followed by
126658186Selan NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
126758186Selan we know how much data to copy up.
126858186Selan
126958186Selan If you call this function, you must zero out pending_exact. */
127058186Selan
127158186Selan static void
insert_op_2(char op,char * there,char * current_end,int num_1,int num_2)127258186Selan insert_op_2 (char op, char *there, char *current_end, int num_1, int num_2)
127358186Selan {
127458186Selan register char *pfrom = current_end; /* Copy from here... */
127558186Selan register char *pto = current_end + 5; /* ...to here. */
127658186Selan
127758186Selan while (pfrom != there)
127858186Selan *--pto = *--pfrom;
127958186Selan
128058186Selan there[0] = op;
128158186Selan STORE_NUMBER (there + 1, num_1);
128258186Selan STORE_NUMBER (there + 3, num_2);
128358186Selan }
128458186Selan
128558186Selan
128658186Selan
128758186Selan /* Given a pattern, compute a fastmap from it. The fastmap records
128858186Selan which of the (1 << BYTEWIDTH) possible characters can start a string
128958186Selan that matches the pattern. This fastmap is used by re_search to skip
129058186Selan quickly over totally implausible text.
129158186Selan
129258186Selan The caller must supply the address of a (1 << BYTEWIDTH)-byte data
129358186Selan area as bufp->fastmap.
129458186Selan The other components of bufp describe the pattern to be used. */
129558186Selan
129658186Selan void
re_compile_fastmap(struct re_pattern_buffer * bufp)129758186Selan re_compile_fastmap (struct re_pattern_buffer *bufp)
129858186Selan {
129958186Selan unsigned char *pattern = (unsigned char *) bufp->buffer;
130058186Selan int size = bufp->used;
130158186Selan register char *fastmap = bufp->fastmap;
130258186Selan register unsigned char *p = pattern;
130358186Selan register unsigned char *pend = pattern + size;
130458186Selan register int j, k;
130558186Selan unsigned char *translate = (unsigned char *) bufp->translate;
130658186Selan
130758186Selan unsigned char *stackb[NFAILURES];
130858186Selan unsigned char **stackp = stackb;
130958186Selan
131058186Selan unsigned is_a_succeed_n;
131158186Selan
131258186Selan memset (fastmap, 0, (1 << BYTEWIDTH));
131358186Selan bufp->fastmap_accurate = 1;
131458186Selan bufp->can_be_null = 0;
131558186Selan
131658186Selan while (p)
131758186Selan {
131858186Selan is_a_succeed_n = 0;
131958186Selan if (p == pend)
132058186Selan {
132158186Selan bufp->can_be_null = 1;
132258186Selan break;
132358186Selan }
132458186Selan #ifdef SWITCH_ENUM_BUG
132558186Selan switch ((int) ((enum regexpcode) *p++))
132658186Selan #else
132758186Selan switch ((enum regexpcode) *p++)
132858186Selan #endif
132958186Selan {
133058186Selan case exactn:
133158186Selan if (translate)
133258186Selan fastmap[translate[p[1]]] = 1;
133358186Selan else
133458186Selan fastmap[p[1]] = 1;
133558186Selan break;
133658186Selan
133758186Selan case unused:
133858186Selan case begline:
133958186Selan #ifdef emacs
134058186Selan case before_dot:
134158186Selan case at_dot:
134258186Selan case after_dot:
134358186Selan #endif
134458186Selan case begbuf:
134558186Selan case endbuf:
134658186Selan case wordbound:
134758186Selan case notwordbound:
134858186Selan case wordbeg:
134958186Selan case wordend:
135058186Selan continue;
135158186Selan
135258186Selan case endline:
135358186Selan if (translate)
135458186Selan fastmap[translate['\n']] = 1;
135558186Selan else
135658186Selan fastmap['\n'] = 1;
135758186Selan
135858186Selan if (bufp->can_be_null != 1)
135958186Selan bufp->can_be_null = 2;
136058186Selan break;
136158186Selan
136258186Selan case jump_n:
136358186Selan case finalize_jump:
136458186Selan case maybe_finalize_jump:
136558186Selan case jump:
136658186Selan case dummy_failure_jump:
136758186Selan EXTRACT_NUMBER_AND_INCR (j, p);
136858186Selan p += j;
136958186Selan if (j > 0)
137058186Selan continue;
137158186Selan /* Jump backward reached implies we just went through
137258186Selan the body of a loop and matched nothing.
137358186Selan Opcode jumped to should be an on_failure_jump.
137458186Selan Just treat it like an ordinary jump.
137558186Selan For a * loop, it has pushed its failure point already;
137658186Selan If so, discard that as redundant. */
137758186Selan
137858186Selan if ((enum regexpcode) *p != on_failure_jump
137958186Selan && (enum regexpcode) *p != succeed_n)
138058186Selan continue;
138158186Selan p++;
138258186Selan EXTRACT_NUMBER_AND_INCR (j, p);
138358186Selan p += j;
138458186Selan if (stackp != stackb && *stackp == p)
138558186Selan stackp--;
138658186Selan continue;
138758186Selan
138858186Selan case on_failure_jump:
138958186Selan handle_on_failure_jump:
139058186Selan EXTRACT_NUMBER_AND_INCR (j, p);
139158186Selan *++stackp = p + j;
139258186Selan if (is_a_succeed_n)
139358186Selan EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
139458186Selan continue;
139558186Selan
139658186Selan case succeed_n:
139758186Selan is_a_succeed_n = 1;
139858186Selan /* Get to the number of times to succeed. */
139958186Selan p += 2;
140058186Selan /* Increment p past the n for when k != 0. */
140158186Selan EXTRACT_NUMBER_AND_INCR (k, p);
140258186Selan if (k == 0)
140358186Selan {
140458186Selan p -= 4;
140558186Selan goto handle_on_failure_jump;
140658186Selan }
140758186Selan continue;
140858186Selan
140958186Selan case set_number_at:
141058186Selan p += 4;
141158186Selan continue;
141258186Selan
141358186Selan case start_memory:
141458186Selan case stop_memory:
141558186Selan p++;
141658186Selan continue;
141758186Selan
141858186Selan case duplicate:
141958186Selan bufp->can_be_null = 1;
142058186Selan fastmap['\n'] = 1;
142158186Selan case anychar:
142258186Selan for (j = 0; j < (1 << BYTEWIDTH); j++)
142358186Selan if (j != '\n')
142458186Selan fastmap[j] = 1;
142558186Selan if (bufp->can_be_null)
142658186Selan return;
142758186Selan /* Don't return; check the alternative paths
142858186Selan so we can set can_be_null if appropriate. */
142958186Selan break;
143058186Selan
143158186Selan case wordchar:
143258186Selan for (j = 0; j < (1 << BYTEWIDTH); j++)
143358186Selan if (SYNTAX (j) == Sword)
143458186Selan fastmap[j] = 1;
143558186Selan break;
143658186Selan
143758186Selan case notwordchar:
143858186Selan for (j = 0; j < (1 << BYTEWIDTH); j++)
143958186Selan if (SYNTAX (j) != Sword)
144058186Selan fastmap[j] = 1;
144158186Selan break;
144258186Selan
144358186Selan #ifdef emacs
144458186Selan case syntaxspec:
144558186Selan k = *p++;
144658186Selan for (j = 0; j < (1 << BYTEWIDTH); j++)
144758186Selan if (SYNTAX (j) == (enum syntaxcode) k)
144858186Selan fastmap[j] = 1;
144958186Selan break;
145058186Selan
145158186Selan case notsyntaxspec:
145258186Selan k = *p++;
145358186Selan for (j = 0; j < (1 << BYTEWIDTH); j++)
145458186Selan if (SYNTAX (j) != (enum syntaxcode) k)
145558186Selan fastmap[j] = 1;
145658186Selan break;
145758186Selan #endif /* not emacs */
145858186Selan
145958186Selan case charset:
146058186Selan for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
146158186Selan if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
146258186Selan {
146358186Selan if (translate)
146458186Selan fastmap[translate[j]] = 1;
146558186Selan else
146658186Selan fastmap[j] = 1;
146758186Selan }
146858186Selan break;
146958186Selan
147058186Selan case charset_not:
147158186Selan /* Chars beyond end of map must be allowed */
147258186Selan for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
147358186Selan if (translate)
147458186Selan fastmap[translate[j]] = 1;
147558186Selan else
147658186Selan fastmap[j] = 1;
147758186Selan
147858186Selan for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
147958186Selan if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
148058186Selan {
148158186Selan if (translate)
148258186Selan fastmap[translate[j]] = 1;
148358186Selan else
148458186Selan fastmap[j] = 1;
148558186Selan }
148658186Selan break;
148758186Selan }
148858186Selan
148958186Selan /* Get here means we have successfully found the possible starting
149058186Selan characters of one path of the pattern. We need not follow this
149158186Selan path any farther. Instead, look at the next alternative
149258186Selan remembered in the stack. */
149358186Selan if (stackp != stackb)
149458186Selan p = *stackp--;
149558186Selan else
149658186Selan break;
149758186Selan }
149858186Selan }
149958186Selan
150058186Selan
150158186Selan
150258186Selan /* Like re_search_2, below, but only one string is specified, and
150358186Selan doesn't let you say where to stop matching. */
150458186Selan
150558186Selan int
re_search(struct re_pattern_buffer * pbufp,char * string,int size,int startpos,int range,struct re_registers * regs)150658186Selan re_search (struct re_pattern_buffer *pbufp,
150758186Selan char *string,
150858186Selan int size,
150958186Selan int startpos,
151058186Selan int range,
151158186Selan struct re_registers *regs)
151258186Selan {
151358186Selan return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
151458186Selan regs, size);
151558186Selan }
151658186Selan
151758186Selan
151858186Selan /* Using the compiled pattern in PBUFP->buffer, first tries to match the
151958186Selan virtual concatenation of STRING1 and STRING2, starting first at index
152058186Selan STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of
152158186Selan places to try before giving up. If RANGE is negative, it searches
152258186Selan backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
152358186Selan - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
152458186Selan In REGS, return the indices of the virtual concatenation of STRING1
152558186Selan and STRING2 that matched the entire PBUFP->buffer and its contained
152658186Selan subexpressions. Do not consider matching one past the index MSTOP in
152758186Selan the virtual concatenation of STRING1 and STRING2.
152858186Selan
152958186Selan The value returned is the position in the strings at which the match
153058186Selan was found, or -1 if no match was found, or -2 if error (such as
153158186Selan failure stack overflow). */
153258186Selan
153358186Selan int
re_search_2(struct re_pattern_buffer * pbufp,char * string1,int size1,char * string2,int size2,int startpos,register int range,struct re_registers * regs,int mstop)153458186Selan re_search_2 (struct re_pattern_buffer *pbufp,
153558186Selan char *string1, int size1,
153658186Selan char *string2, int size2,
153758186Selan int startpos,
153858186Selan register int range,
153958186Selan struct re_registers *regs,
154058186Selan int mstop)
154158186Selan {
154258186Selan register char *fastmap = pbufp->fastmap;
154358186Selan register unsigned char *translate = (unsigned char *) pbufp->translate;
154458186Selan int total_size = size1 + size2;
154558186Selan int endpos = startpos + range;
154658186Selan int val;
154758186Selan
154858186Selan /* Check for out-of-range starting position. */
154958186Selan if (startpos < 0 || startpos > total_size)
155058186Selan return -1;
155158186Selan
155258186Selan /* Fix up range if it would eventually take startpos outside of the
155358186Selan virtual concatenation of string1 and string2. */
155458186Selan if (endpos < -1)
155558186Selan range = -1 - startpos;
155658186Selan else if (endpos > total_size)
155758186Selan range = total_size - startpos;
155858186Selan
155958186Selan /* Update the fastmap now if not correct already. */
156058186Selan if (fastmap && !pbufp->fastmap_accurate)
156158186Selan re_compile_fastmap (pbufp);
156258186Selan
156358186Selan /* If the search isn't to be a backwards one, don't waste time in a
156458186Selan long search for a pattern that says it is anchored. */
156558186Selan if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
156658186Selan && range > 0)
156758186Selan {
156858186Selan if (startpos > 0)
156958186Selan return -1;
157058186Selan else
157158186Selan range = 1;
157258186Selan }
157358186Selan
157458186Selan while (1)
157558186Selan {
157658186Selan /* If a fastmap is supplied, skip quickly over characters that
157758186Selan cannot possibly be the start of a match. Note, however, that
157858186Selan if the pattern can possibly match the null string, we must
157958186Selan test it at each starting point so that we take the first null
158058186Selan string we get. */
158158186Selan
158258186Selan if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
158358186Selan {
158458186Selan if (range > 0) /* Searching forwards. */
158558186Selan {
158658186Selan register int lim = 0;
158758186Selan register unsigned char *p;
158858186Selan int irange = range;
158958186Selan if (startpos < size1 && startpos + range >= size1)
159058186Selan lim = range - (size1 - startpos);
159158186Selan
159258186Selan p = ((unsigned char *)
159358186Selan &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
159458186Selan
159558186Selan while (range > lim && !fastmap[translate
159658186Selan ? translate[*p++]
159758186Selan : *p++])
159858186Selan range--;
159958186Selan startpos += irange - range;
160058186Selan }
160158186Selan else /* Searching backwards. */
160258186Selan {
160358186Selan register unsigned char c;
160458186Selan
160558186Selan if (string1 == 0 || startpos >= size1)
160658186Selan c = string2[startpos - size1];
160758186Selan else
160858186Selan c = string1[startpos];
160958186Selan
161058186Selan c &= 0xff;
161158186Selan if (translate ? !fastmap[translate[c]] : !fastmap[c])
161258186Selan goto advance;
161358186Selan }
161458186Selan }
161558186Selan
161658186Selan if (range >= 0 && startpos == total_size
161758186Selan && fastmap && pbufp->can_be_null == 0)
161858186Selan return -1;
161958186Selan
162058186Selan val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
162158186Selan regs, mstop);
162258186Selan if (val >= 0)
162358186Selan return startpos;
162458186Selan if (val == -2)
162558186Selan return -2;
162658186Selan
162758186Selan #ifdef C_ALLOCA
162858186Selan alloca (0);
162958186Selan #endif /* C_ALLOCA */
163058186Selan
163158186Selan advance:
163258186Selan if (!range)
163358186Selan break;
163458186Selan else if (range > 0)
163558186Selan {
163658186Selan range--;
163758186Selan startpos++;
163858186Selan }
163958186Selan else
164058186Selan {
164158186Selan range++;
164258186Selan startpos--;
164358186Selan }
164458186Selan }
164558186Selan return -1;
164658186Selan }
164758186Selan
164858186Selan
164958186Selan
165058186Selan #ifndef emacs /* emacs never uses this. */
165158186Selan int
re_match(struct re_pattern_buffer * pbufp,char * string,int size,int pos,struct re_registers * regs)165258186Selan re_match (struct re_pattern_buffer *pbufp,
165358186Selan char *string,
165458186Selan int size,
165558186Selan int pos,
165658186Selan struct re_registers *regs)
165758186Selan {
165858186Selan return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
165958186Selan }
166058186Selan #endif /* not emacs */
166158186Selan
166258186Selan
166358186Selan /* The following are used for re_match_2, defined below: */
166458186Selan
166558186Selan /* Roughly the maximum number of failure points on the stack. Would be
166658186Selan exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */
166758186Selan
166858186Selan int re_max_failures = 2000;
166958186Selan
167058186Selan /* Routine used by re_match_2. */
167158186Selan static int bcmp_translate (char *, char *, int, unsigned char *);
167258186Selan
167358186Selan
167458186Selan /* Structure and accessing macros used in re_match_2: */
167558186Selan
167658186Selan struct register_info
167758186Selan {
167858186Selan unsigned is_active : 1;
167958186Selan unsigned matched_something : 1;
168058186Selan };
168158186Selan
168258186Selan #define IS_ACTIVE(R) ((R).is_active)
168358186Selan #define MATCHED_SOMETHING(R) ((R).matched_something)
168458186Selan
168558186Selan
168658186Selan /* Macros used by re_match_2: */
168758186Selan
168858186Selan
168958186Selan /* I.e., regstart, regend, and reg_info. */
169058186Selan
169158186Selan #define NUM_REG_ITEMS 3
169258186Selan
169358186Selan /* We push at most this many things on the stack whenever we
169458186Selan fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
169558186Selan arguments to the PUSH_FAILURE_POINT macro. */
169658186Selan
169758186Selan #define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2)
169858186Selan
169958186Selan
170058186Selan /* We push this many things on the stack whenever we fail. */
170158186Selan
170258186Selan #define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2)
170358186Selan
170458186Selan
170558186Selan /* This pushes most of the information about the current state we will want
170658186Selan if we ever fail back to it. */
170758186Selan
170858186Selan #define PUSH_FAILURE_POINT(pattern_place, string_place) \
170958186Selan { \
171058186Selan short last_used_reg, this_reg; \
171158186Selan \
171258186Selan /* Find out how many registers are active or have been matched. \
171358186Selan (Aside from register zero, which is only set at the end.) */ \
171458186Selan for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
171558186Selan if (regstart[last_used_reg] != (unsigned char *) -1) \
171658186Selan break; \
171758186Selan \
171858186Selan if (stacke - stackp < NUM_FAILURE_ITEMS) \
171958186Selan { \
172058186Selan unsigned char **stackx; \
172158186Selan int len = stacke - stackb; \
172258186Selan if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
172358186Selan return -2; \
172458186Selan \
172558186Selan /* Roughly double the size of the stack. */ \
172658186Selan stackx = (unsigned char **) alloca (2 * len \
172758186Selan * sizeof (unsigned char *));\
172858186Selan /* Only copy what is in use. */ \
172958186Selan memcpy (stackx, stackb, len * sizeof (char *)); \
173058186Selan stackp = stackx + (stackp - stackb); \
173158186Selan stackb = stackx; \
173258186Selan stacke = stackb + 2 * len; \
173358186Selan } \
173458186Selan \
173558186Selan /* Now push the info for each of those registers. */ \
173658186Selan for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \
173758186Selan { \
173858186Selan *stackp++ = regstart[this_reg]; \
173958186Selan *stackp++ = regend[this_reg]; \
174058186Selan *stackp++ = (unsigned char *) ®_info[this_reg]; \
174158186Selan } \
174258186Selan \
174358186Selan /* Push how many registers we saved. */ \
174458186Selan *stackp++ = (unsigned char *) last_used_reg; \
174558186Selan \
174658186Selan *stackp++ = pattern_place; \
174758186Selan *stackp++ = string_place; \
174858186Selan }
174958186Selan
175058186Selan
175158186Selan /* This pops what PUSH_FAILURE_POINT pushes. */
175258186Selan
175358186Selan #define POP_FAILURE_POINT() \
175458186Selan { \
175558186Selan int temp; \
175658186Selan stackp -= 2; /* Remove failure points. */ \
175758186Selan temp = (int) *--stackp; /* How many regs pushed. */ \
175858186Selan temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
175958186Selan stackp -= temp; /* Remove the register info. */ \
176058186Selan }
176158186Selan
176258186Selan
176358186Selan #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
176458186Selan
176558186Selan /* Is true if there is a first string and if PTR is pointing anywhere
176658186Selan inside it or just past the end. */
176758186Selan
176858186Selan #define IS_IN_FIRST_STRING(ptr) \
176958186Selan (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
177058186Selan
177158186Selan /* Call before fetching a character with *d. This switches over to
177258186Selan string2 if necessary. */
177358186Selan
177458186Selan #define PREFETCH \
177558186Selan while (d == dend) \
177658186Selan { \
177758186Selan /* end of string2 => fail. */ \
177858186Selan if (dend == end_match_2) \
177958186Selan goto fail; \
178058186Selan /* end of string1 => advance to string2. */ \
178158186Selan d = string2; \
178258186Selan dend = end_match_2; \
178358186Selan }
178458186Selan
178558186Selan
178658186Selan /* Call this when have matched something; it sets `matched' flags for the
178758186Selan registers corresponding to the subexpressions of which we currently
178858186Selan are inside. */
178958186Selan #define SET_REGS_MATCHED \
179058186Selan { unsigned this_reg; \
179158186Selan for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \
179258186Selan { \
179358186Selan if (IS_ACTIVE(reg_info[this_reg])) \
179458186Selan MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
179558186Selan else \
179658186Selan MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
179758186Selan } \
179858186Selan }
179958186Selan
180058186Selan /* Test if at very beginning or at very end of the virtual concatenation
180158186Selan of string1 and string2. If there is only one string, we've put it in
180258186Selan string2. */
180358186Selan
180458186Selan #define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2)
180558186Selan #define AT_STRINGS_END (d == end2)
180658186Selan
180758186Selan #define AT_WORD_BOUNDARY \
180858186Selan (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
180958186Selan
181058186Selan /* We have two special cases to check for:
181158186Selan 1) if we're past the end of string1, we have to look at the first
181258186Selan character in string2;
181358186Selan 2) if we're before the beginning of string2, we have to look at the
181458186Selan last character in string1; we assume there is a string1, so use
181558186Selan this in conjunction with AT_STRINGS_BEG. */
181658186Selan #define IS_A_LETTER(d) \
181758186Selan (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
181858186Selan == Sword)
181958186Selan
182058186Selan
182158186Selan /* Match the pattern described by PBUFP against the virtual
182258186Selan concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
182358186Selan respectively. Start the match at index POS in the virtual
182458186Selan concatenation of STRING1 and STRING2. In REGS, return the indices of
182558186Selan the virtual concatenation of STRING1 and STRING2 that matched the
182658186Selan entire PBUFP->buffer and its contained subexpressions. Do not
182758186Selan consider matching one past the index MSTOP in the virtual
182858186Selan concatenation of STRING1 and STRING2.
182958186Selan
183058186Selan If pbufp->fastmap is nonzero, then it had better be up to date.
183158186Selan
183258186Selan The reason that the data to match are specified as two components
183358186Selan which are to be regarded as concatenated is so this function can be
183458186Selan used directly on the contents of an Emacs buffer.
183558186Selan
183658186Selan -1 is returned if there is no match. -2 is returned if there is an
183758186Selan error (such as match stack overflow). Otherwise the value is the
183858186Selan length of the substring which was matched. */
183958186Selan
184058186Selan int
re_match_2(struct re_pattern_buffer * pbufp,char * string1_arg,int size1,char * string2_arg,int size2,int pos,struct re_registers * regs,int mstop)184158186Selan re_match_2 (struct re_pattern_buffer *pbufp,
184258186Selan char *string1_arg, int size1,
184358186Selan char *string2_arg, int size2,
184458186Selan int pos,
184558186Selan struct re_registers *regs,
184658186Selan int mstop)
184758186Selan {
184858186Selan register unsigned char *p = (unsigned char *) pbufp->buffer;
184958186Selan
185058186Selan /* Pointer to beyond end of buffer. */
185158186Selan register unsigned char *pend = p + pbufp->used;
185258186Selan
185358186Selan unsigned char *string1 = (unsigned char *) string1_arg;
185458186Selan unsigned char *string2 = (unsigned char *) string2_arg;
185558186Selan unsigned char *end1; /* Just past end of first string. */
185658186Selan unsigned char *end2; /* Just past end of second string. */
185758186Selan
185858186Selan /* Pointers into string1 and string2, just past the last characters in
185958186Selan each to consider matching. */
186058186Selan unsigned char *end_match_1, *end_match_2;
186158186Selan
186258186Selan register unsigned char *d, *dend;
186358186Selan register int mcnt; /* Multipurpose. */
186458186Selan unsigned char *translate = (unsigned char *) pbufp->translate;
186558186Selan unsigned is_a_jump_n = 0;
186658186Selan
186758186Selan /* Failure point stack. Each place that can handle a failure further
186858186Selan down the line pushes a failure point on this stack. It consists of
186958186Selan restart, regend, and reg_info for all registers corresponding to the
187058186Selan subexpressions we're currently inside, plus the number of such
187158186Selan registers, and, finally, two char *'s. The first char * is where to
187258186Selan resume scanning the pattern; the second one is where to resume
187358186Selan scanning the strings. If the latter is zero, the failure point is a
187458186Selan ``dummy''; if a failure happens and the failure point is a dummy, it
187558186Selan gets discarded and the next next one is tried. */
187658186Selan
187758186Selan unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
187858186Selan unsigned char **stackb = initial_stack;
187958186Selan unsigned char **stackp = stackb;
188058186Selan unsigned char **stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
188158186Selan
188258186Selan
188358186Selan /* Information on the contents of registers. These are pointers into
188458186Selan the input strings; they record just what was matched (on this
188558186Selan attempt) by a subexpression part of the pattern, that is, the
188658186Selan regnum-th regstart pointer points to where in the pattern we began
188758186Selan matching and the regnum-th regend points to right after where we
188858186Selan stopped matching the regnum-th subexpression. (The zeroth register
188958186Selan keeps track of what the whole pattern matches.) */
189058186Selan
189158186Selan unsigned char *regstart[RE_NREGS];
189258186Selan unsigned char *regend[RE_NREGS];
189358186Selan
189458186Selan /* The is_active field of reg_info helps us keep track of which (possibly
189558186Selan nested) subexpressions we are currently in. The matched_something
189658186Selan field of reg_info[reg_num] helps us tell whether or not we have
189758186Selan matched any of the pattern so far this time through the reg_num-th
189858186Selan subexpression. These two fields get reset each time through any
189958186Selan loop their register is in. */
190058186Selan
190158186Selan struct register_info reg_info[RE_NREGS];
190258186Selan
190358186Selan
190458186Selan /* The following record the register info as found in the above
190558186Selan variables when we find a match better than any we've seen before.
190658186Selan This happens as we backtrack through the failure points, which in
190758186Selan turn happens only if we have not yet matched the entire string. */
190858186Selan
190958186Selan unsigned best_regs_set = 0;
191058186Selan unsigned char *best_regstart[RE_NREGS];
191158186Selan unsigned char *best_regend[RE_NREGS];
191258186Selan
191358186Selan /* Initialize subexpression text positions to -1 to mark ones that no
191458186Selan \( or ( and \) or ) has been seen for. Also set all registers to
191558186Selan inactive and mark them as not having matched anything or ever
191658186Selan failed. */
191758186Selan for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
191858186Selan {
191958186Selan regstart[mcnt] = regend[mcnt] = (unsigned char *) -1;
192058186Selan IS_ACTIVE (reg_info[mcnt]) = 0;
192158186Selan MATCHED_SOMETHING (reg_info[mcnt]) = 0;
192258186Selan }
192358186Selan
192458186Selan if (regs)
192558186Selan for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
192658186Selan regs->start[mcnt] = regs->end[mcnt] = -1;
192758186Selan
192858186Selan /* Set up pointers to ends of strings.
192958186Selan Don't allow the second string to be empty unless both are empty. */
193058186Selan if (size2 == 0)
193158186Selan {
193258186Selan string2 = string1;
193358186Selan size2 = size1;
193458186Selan string1 = 0;
193558186Selan size1 = 0;
193658186Selan }
193758186Selan end1 = string1 + size1;
193858186Selan end2 = string2 + size2;
193958186Selan
194058186Selan /* Compute where to stop matching, within the two strings. */
194158186Selan if (mstop <= size1)
194258186Selan {
194358186Selan end_match_1 = string1 + mstop;
194458186Selan end_match_2 = string2;
194558186Selan }
194658186Selan else
194758186Selan {
194858186Selan end_match_1 = end1;
194958186Selan end_match_2 = string2 + mstop - size1;
195058186Selan }
195158186Selan
195258186Selan /* `p' scans through the pattern as `d' scans through the data. `dend'
195358186Selan is the end of the input string that `d' points within. `d' is
195458186Selan advanced into the following input string whenever necessary, but
195558186Selan this happens before fetching; therefore, at the beginning of the
195658186Selan loop, `d' can be pointing at the end of a string, but it cannot
195758186Selan equal string2. */
195858186Selan
195958186Selan if (size1 != 0 && pos <= size1)
196058186Selan d = string1 + pos, dend = end_match_1;
196158186Selan else
196258186Selan d = string2 + pos - size1, dend = end_match_2;
196358186Selan
196458186Selan
196558186Selan /* This loops over pattern commands. It exits by returning from the
196658186Selan function if match is complete, or it drops through if match fails
196758186Selan at this starting point in the input data. */
196858186Selan
196958186Selan while (1)
197058186Selan {
197158186Selan is_a_jump_n = 0;
197258186Selan /* End of pattern means we might have succeeded. */
197358186Selan if (p == pend)
197458186Selan {
197558186Selan /* If not end of string, try backtracking. Otherwise done. */
197658186Selan if (d != end_match_2)
197758186Selan {
197858186Selan if (stackp != stackb)
197958186Selan {
198058186Selan /* More failure points to try. */
198158186Selan
198258186Selan unsigned in_same_string =
198358186Selan IS_IN_FIRST_STRING (best_regend[0])
198458186Selan == MATCHING_IN_FIRST_STRING;
198558186Selan
198658186Selan /* If exceeds best match so far, save it. */
198758186Selan if (! best_regs_set
198858186Selan || (in_same_string && d > best_regend[0])
198958186Selan || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
199058186Selan {
199158186Selan best_regs_set = 1;
199258186Selan best_regend[0] = d; /* Never use regstart[0]. */
199358186Selan
199458186Selan for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
199558186Selan {
199658186Selan best_regstart[mcnt] = regstart[mcnt];
199758186Selan best_regend[mcnt] = regend[mcnt];
199858186Selan }
199958186Selan }
200058186Selan goto fail;
200158186Selan }
200258186Selan /* If no failure points, don't restore garbage. */
200358186Selan else if (best_regs_set)
200458186Selan {
200558186Selan restore_best_regs:
200658186Selan /* Restore best match. */
200758186Selan d = best_regend[0];
200858186Selan
200958186Selan for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
201058186Selan {
201158186Selan regstart[mcnt] = best_regstart[mcnt];
201258186Selan regend[mcnt] = best_regend[mcnt];
201358186Selan }
201458186Selan }
201558186Selan }
201658186Selan
201758186Selan /* If caller wants register contents data back, convert it
201858186Selan to indices. */
201958186Selan if (regs)
202058186Selan {
202158186Selan regs->start[0] = pos;
202258186Selan if (MATCHING_IN_FIRST_STRING)
202358186Selan regs->end[0] = d - string1;
202458186Selan else
202558186Selan regs->end[0] = d - string2 + size1;
202658186Selan for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
202758186Selan {
202858186Selan if (regend[mcnt] == (unsigned char *) -1)
202958186Selan {
203058186Selan regs->start[mcnt] = -1;
203158186Selan regs->end[mcnt] = -1;
203258186Selan continue;
203358186Selan }
203458186Selan if (IS_IN_FIRST_STRING (regstart[mcnt]))
203558186Selan regs->start[mcnt] = regstart[mcnt] - string1;
203658186Selan else
203758186Selan regs->start[mcnt] = regstart[mcnt] - string2 + size1;
203858186Selan
203958186Selan if (IS_IN_FIRST_STRING (regend[mcnt]))
204058186Selan regs->end[mcnt] = regend[mcnt] - string1;
204158186Selan else
204258186Selan regs->end[mcnt] = regend[mcnt] - string2 + size1;
204358186Selan }
204458186Selan }
204558186Selan return d - pos - (MATCHING_IN_FIRST_STRING
204658186Selan ? string1
204758186Selan : string2 - size1);
204858186Selan }
204958186Selan
205058186Selan /* Otherwise match next pattern command. */
205158186Selan #ifdef SWITCH_ENUM_BUG
205258186Selan switch ((int) ((enum regexpcode) *p++))
205358186Selan #else
205458186Selan switch ((enum regexpcode) *p++)
205558186Selan #endif
205658186Selan {
205758186Selan
205858186Selan /* \( [or `(', as appropriate] is represented by start_memory,
205958186Selan \) by stop_memory. Both of those commands are followed by
206058186Selan a register number in the next byte. The text matched
206158186Selan within the \( and \) is recorded under that number. */
206258186Selan case start_memory:
206358186Selan regstart[*p] = d;
206458186Selan IS_ACTIVE (reg_info[*p]) = 1;
206558186Selan MATCHED_SOMETHING (reg_info[*p]) = 0;
206658186Selan p++;
206758186Selan break;
206858186Selan
206958186Selan case stop_memory:
207058186Selan regend[*p] = d;
207158186Selan IS_ACTIVE (reg_info[*p]) = 0;
207258186Selan
207358186Selan /* If just failed to match something this time around with a sub-
207458186Selan expression that's in a loop, try to force exit from the loop. */
207558186Selan if ((! MATCHED_SOMETHING (reg_info[*p])
207658186Selan || (enum regexpcode) p[-3] == start_memory)
207758186Selan && (p + 1) != pend)
207858186Selan {
207958186Selan register unsigned char *p2 = p + 1;
208058186Selan mcnt = 0;
208158186Selan switch (*p2++)
208258186Selan {
208358186Selan case jump_n:
208458186Selan is_a_jump_n = 1;
208558186Selan case finalize_jump:
208658186Selan case maybe_finalize_jump:
208758186Selan case jump:
208858186Selan case dummy_failure_jump:
208958186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p2);
209058186Selan if (is_a_jump_n)
209158186Selan p2 += 2;
209258186Selan break;
209358186Selan }
209458186Selan p2 += mcnt;
209558186Selan
209658186Selan /* If the next operation is a jump backwards in the pattern
209758186Selan to an on_failure_jump, exit from the loop by forcing a
209858186Selan failure after pushing on the stack the on_failure_jump's
209958186Selan jump in the pattern, and d. */
210058186Selan if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
210158186Selan {
210258186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p2);
210358186Selan PUSH_FAILURE_POINT (p2 + mcnt, d);
210458186Selan goto fail;
210558186Selan }
210658186Selan }
210758186Selan p++;
210858186Selan break;
210958186Selan
211058186Selan /* \<digit> has been turned into a `duplicate' command which is
211158186Selan followed by the numeric value of <digit> as the register number. */
211258186Selan case duplicate:
211358186Selan {
211458186Selan int regno = *p++; /* Get which register to match against */
211558186Selan register unsigned char *d2, *dend2;
211658186Selan
211758186Selan /* Where in input to try to start matching. */
211858186Selan d2 = regstart[regno];
211958186Selan
212058186Selan /* Where to stop matching; if both the place to start and
212158186Selan the place to stop matching are in the same string, then
212258186Selan set to the place to stop, otherwise, for now have to use
212358186Selan the end of the first string. */
212458186Selan
212558186Selan dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
212658186Selan == IS_IN_FIRST_STRING (regend[regno]))
212758186Selan ? regend[regno] : end_match_1);
212858186Selan while (1)
212958186Selan {
213058186Selan /* If necessary, advance to next segment in register
213158186Selan contents. */
213258186Selan while (d2 == dend2)
213358186Selan {
213458186Selan if (dend2 == end_match_2) break;
213558186Selan if (dend2 == regend[regno]) break;
213658186Selan d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
213758186Selan }
213858186Selan /* At end of register contents => success */
213958186Selan if (d2 == dend2) break;
214058186Selan
214158186Selan /* If necessary, advance to next segment in data. */
214258186Selan PREFETCH;
214358186Selan
214458186Selan /* How many characters left in this segment to match. */
214558186Selan mcnt = dend - d;
214658186Selan
214758186Selan /* Want how many consecutive characters we can match in
214858186Selan one shot, so, if necessary, adjust the count. */
214958186Selan if (mcnt > dend2 - d2)
215058186Selan mcnt = dend2 - d2;
215158186Selan
215258186Selan /* Compare that many; failure if mismatch, else move
215358186Selan past them. */
215458186Selan if (translate
215558186Selan ? bcmp_translate ((char*)d, (char*)d2, mcnt, translate)
215658186Selan : memcmp (d, d2, mcnt))
215758186Selan goto fail;
215858186Selan d += mcnt, d2 += mcnt;
215958186Selan }
216058186Selan }
216158186Selan break;
216258186Selan
216358186Selan case anychar:
216458186Selan PREFETCH; /* Fetch a data character. */
216558186Selan /* Match anything but a newline, maybe even a null. */
216658186Selan if ((translate ? translate[*d] : *d) == '\n'
216758186Selan || ((obscure_syntax & RE_DOT_NOT_NULL)
216858186Selan && (translate ? translate[*d] : *d) == '\000'))
216958186Selan goto fail;
217058186Selan SET_REGS_MATCHED;
217158186Selan d++;
217258186Selan break;
217358186Selan
217458186Selan case charset:
217558186Selan case charset_not:
217658186Selan {
217758186Selan int not = 0; /* Nonzero for charset_not. */
217858186Selan register int c;
217958186Selan if (*(p - 1) == (unsigned char) charset_not)
218058186Selan not = 1;
218158186Selan
218258186Selan PREFETCH; /* Fetch a data character. */
218358186Selan
218458186Selan if (translate)
218558186Selan c = translate[*d];
218658186Selan else
218758186Selan c = *d;
218858186Selan
218958186Selan if (c < *p * BYTEWIDTH
219058186Selan && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
219158186Selan not = !not;
219258186Selan
219358186Selan p += 1 + *p;
219458186Selan
219558186Selan if (!not) goto fail;
219658186Selan SET_REGS_MATCHED;
219758186Selan d++;
219858186Selan break;
219958186Selan }
220058186Selan
220158186Selan case begline:
220258186Selan if ((size1 != 0 && d == string1)
220358186Selan || (size1 == 0 && size2 != 0 && d == string2)
220458186Selan || (d && d[-1] == '\n')
220558186Selan || (size1 == 0 && size2 == 0))
220658186Selan break;
220758186Selan else
220858186Selan goto fail;
220958186Selan
221058186Selan case endline:
221158186Selan if (d == end2
221258186Selan || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
221358186Selan break;
221458186Selan goto fail;
221558186Selan
221658186Selan /* `or' constructs are handled by starting each alternative with
221758186Selan an on_failure_jump that points to the start of the next
221858186Selan alternative. Each alternative except the last ends with a
221958186Selan jump to the joining point. (Actually, each jump except for
222058186Selan the last one really jumps to the following jump, because
222158186Selan tensioning the jumps is a hassle.) */
222258186Selan
222358186Selan /* The start of a stupid repeat has an on_failure_jump that points
222458186Selan past the end of the repeat text. This makes a failure point so
222558186Selan that on failure to match a repetition, matching restarts past
222658186Selan as many repetitions have been found with no way to fail and
222758186Selan look for another one. */
222858186Selan
222958186Selan /* A smart repeat is similar but loops back to the on_failure_jump
223058186Selan so that each repetition makes another failure point. */
223158186Selan
223258186Selan case on_failure_jump:
223358186Selan on_failure:
223458186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p);
223558186Selan PUSH_FAILURE_POINT (p + mcnt, d);
223658186Selan break;
223758186Selan
223858186Selan /* The end of a smart repeat has a maybe_finalize_jump back.
223958186Selan Change it either to a finalize_jump or an ordinary jump. */
224058186Selan case maybe_finalize_jump:
224158186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p);
224258186Selan {
224358186Selan register unsigned char *p2 = p;
224458186Selan /* Compare what follows with the beginning of the repeat.
224558186Selan If we can establish that there is nothing that they would
224658186Selan both match, we can change to finalize_jump. */
224758186Selan while (p2 + 1 != pend
224858186Selan && (*p2 == (unsigned char) stop_memory
224958186Selan || *p2 == (unsigned char) start_memory))
225058186Selan p2 += 2; /* Skip over reg number. */
225158186Selan if (p2 == pend)
225258186Selan p[-3] = (unsigned char) finalize_jump;
225358186Selan else if (*p2 == (unsigned char) exactn
225458186Selan || *p2 == (unsigned char) endline)
225558186Selan {
225658186Selan register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
225758186Selan register unsigned char *p1 = p + mcnt;
225858186Selan /* p1[0] ... p1[2] are an on_failure_jump.
225958186Selan Examine what follows that. */
226058186Selan if (p1[3] == (unsigned char) exactn && p1[5] != c)
226158186Selan p[-3] = (unsigned char) finalize_jump;
226258186Selan else if (p1[3] == (unsigned char) charset
226358186Selan || p1[3] == (unsigned char) charset_not)
226458186Selan {
226558186Selan int not = p1[3] == (unsigned char) charset_not;
226658186Selan if (c < p1[4] * BYTEWIDTH
226758186Selan && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
226858186Selan not = !not;
226958186Selan /* `not' is 1 if c would match. */
227058186Selan /* That means it is not safe to finalize. */
227158186Selan if (!not)
227258186Selan p[-3] = (unsigned char) finalize_jump;
227358186Selan }
227458186Selan }
227558186Selan }
227658186Selan p -= 2; /* Point at relative address again. */
227758186Selan if (p[-1] != (unsigned char) finalize_jump)
227858186Selan {
227958186Selan p[-1] = (unsigned char) jump;
228058186Selan goto nofinalize;
228158186Selan }
228258186Selan /* Note fall through. */
228358186Selan
228458186Selan /* The end of a stupid repeat has a finalize_jump back to the
228558186Selan start, where another failure point will be made which will
228658186Selan point to after all the repetitions found so far. */
228758186Selan
228858186Selan /* Take off failure points put on by matching on_failure_jump
228958186Selan because didn't fail. Also remove the register information
229058186Selan put on by the on_failure_jump. */
229158186Selan case finalize_jump:
229258186Selan POP_FAILURE_POINT ();
229358186Selan /* Note fall through. */
229458186Selan
229558186Selan /* Jump without taking off any failure points. */
229658186Selan case jump:
229758186Selan nofinalize:
229858186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p);
229958186Selan p += mcnt;
230058186Selan break;
230158186Selan
230258186Selan case dummy_failure_jump:
230358186Selan /* Normally, the on_failure_jump pushes a failure point, which
230458186Selan then gets popped at finalize_jump. We will end up at
230558186Selan finalize_jump, also, and with a pattern of, say, `a+', we
230658186Selan are skipping over the on_failure_jump, so we have to push
230758186Selan something meaningless for finalize_jump to pop. */
230858186Selan PUSH_FAILURE_POINT (0, 0);
230958186Selan goto nofinalize;
231058186Selan
231158186Selan
231258186Selan /* Have to succeed matching what follows at least n times. Then
231358186Selan just handle like an on_failure_jump. */
231458186Selan case succeed_n:
231558186Selan EXTRACT_NUMBER (mcnt, p + 2);
231658186Selan /* Originally, this is how many times we HAVE to succeed. */
231758186Selan if (mcnt)
231858186Selan {
231958186Selan mcnt--;
232058186Selan p += 2;
232158186Selan STORE_NUMBER_AND_INCR (p, mcnt);
232258186Selan }
232358186Selan else if (mcnt == 0)
232458186Selan {
232558186Selan p[2] = unused;
232658186Selan p[3] = unused;
232758186Selan goto on_failure;
232858186Selan }
232958186Selan else
233058186Selan {
233158186Selan fprintf (stderr, "regex: the succeed_n's n is not set.\n");
233258186Selan exit (1);
233358186Selan }
233458186Selan break;
233558186Selan
233658186Selan case jump_n:
233758186Selan EXTRACT_NUMBER (mcnt, p + 2);
233858186Selan /* Originally, this is how many times we CAN jump. */
233958186Selan if (mcnt)
234058186Selan {
234158186Selan mcnt--;
234258186Selan STORE_NUMBER(p + 2, mcnt);
234358186Selan goto nofinalize; /* Do the jump without taking off
234458186Selan any failure points. */
234558186Selan }
234658186Selan /* If don't have to jump any more, skip over the rest of command. */
234758186Selan else
234858186Selan p += 4;
234958186Selan break;
235058186Selan
235158186Selan case set_number_at:
235258186Selan {
235358186Selan register unsigned char *p1;
235458186Selan
235558186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p);
235658186Selan p1 = p + mcnt;
235758186Selan EXTRACT_NUMBER_AND_INCR (mcnt, p);
235858186Selan STORE_NUMBER (p1, mcnt);
235958186Selan break;
236058186Selan }
236158186Selan
236258186Selan /* Ignore these. Used to ignore the n of succeed_n's which
236358186Selan currently have n == 0. */
236458186Selan case unused:
236558186Selan break;
236658186Selan
236758186Selan case wordbound:
236858186Selan if (AT_WORD_BOUNDARY)
236958186Selan break;
237058186Selan goto fail;
237158186Selan
237258186Selan case notwordbound:
237358186Selan if (AT_WORD_BOUNDARY)
237458186Selan goto fail;
237558186Selan break;
237658186Selan
237758186Selan case wordbeg:
237858186Selan /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
237958186Selan if (IS_A_LETTER (d) && (AT_STRINGS_BEG || !IS_A_LETTER (d - 1)))
238058186Selan break;
238158186Selan goto fail;
238258186Selan
238358186Selan case wordend:
238458186Selan /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
238558186Selan if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
238658186Selan && (!IS_A_LETTER (d) || AT_STRINGS_END))
238758186Selan break;
238858186Selan goto fail;
238958186Selan
239058186Selan #ifdef emacs
239158186Selan case before_dot:
239258186Selan if (PTR_CHAR_POS (d) >= point)
239358186Selan goto fail;
239458186Selan break;
239558186Selan
239658186Selan case at_dot:
239758186Selan if (PTR_CHAR_POS (d) != point)
239858186Selan goto fail;
239958186Selan break;
240058186Selan
240158186Selan case after_dot:
240258186Selan if (PTR_CHAR_POS (d) <= point)
240358186Selan goto fail;
240458186Selan break;
240558186Selan
240658186Selan case wordchar:
240758186Selan mcnt = (int) Sword;
240858186Selan goto matchsyntax;
240958186Selan
241058186Selan case syntaxspec:
241158186Selan mcnt = *p++;
241258186Selan matchsyntax:
241358186Selan PREFETCH;
241458186Selan if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
241558186Selan SET_REGS_MATCHED;
241658186Selan break;
241758186Selan
241858186Selan case notwordchar:
241958186Selan mcnt = (int) Sword;
242058186Selan goto matchnotsyntax;
242158186Selan
242258186Selan case notsyntaxspec:
242358186Selan mcnt = *p++;
242458186Selan matchnotsyntax:
242558186Selan PREFETCH;
242658186Selan if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
242758186Selan SET_REGS_MATCHED;
242858186Selan break;
242958186Selan
243058186Selan #else /* not emacs */
243158186Selan
243258186Selan case wordchar:
243358186Selan PREFETCH;
243458186Selan if (!IS_A_LETTER (d))
243558186Selan goto fail;
243658186Selan SET_REGS_MATCHED;
243758186Selan break;
243858186Selan
243958186Selan case notwordchar:
244058186Selan PREFETCH;
244158186Selan if (IS_A_LETTER (d))
244258186Selan goto fail;
244358186Selan SET_REGS_MATCHED;
244458186Selan break;
244558186Selan
244658186Selan #endif /* not emacs */
244758186Selan
244858186Selan case begbuf:
244958186Selan if (AT_STRINGS_BEG)
245058186Selan break;
245158186Selan goto fail;
245258186Selan
245358186Selan case endbuf:
245458186Selan if (AT_STRINGS_END)
245558186Selan break;
245658186Selan goto fail;
245758186Selan
245858186Selan case exactn:
245958186Selan /* Match the next few pattern characters exactly.
246058186Selan mcnt is how many characters to match. */
246158186Selan mcnt = *p++;
246258186Selan /* This is written out as an if-else so we don't waste time
246358186Selan testing `translate' inside the loop. */
246458186Selan if (translate)
246558186Selan {
246658186Selan do
246758186Selan {
246858186Selan PREFETCH;
246958186Selan if (translate[*d++] != *p++) goto fail;
247058186Selan }
247158186Selan while (--mcnt);
247258186Selan }
247358186Selan else
247458186Selan {
247558186Selan do
247658186Selan {
247758186Selan PREFETCH;
247858186Selan if (*d++ != *p++) goto fail;
247958186Selan }
248058186Selan while (--mcnt);
248158186Selan }
248258186Selan SET_REGS_MATCHED;
248358186Selan break;
248458186Selan }
248558186Selan continue; /* Successfully executed one pattern command; keep going. */
248658186Selan
248758186Selan /* Jump here if any matching operation fails. */
248858186Selan fail:
248958186Selan if (stackp != stackb)
249058186Selan /* A restart point is known. Restart there and pop it. */
249158186Selan {
249258186Selan short last_used_reg, this_reg;
249358186Selan
249458186Selan /* If this failure point is from a dummy_failure_point, just
249558186Selan skip it. */
249658186Selan if (!stackp[-2])
249758186Selan {
249858186Selan POP_FAILURE_POINT ();
249958186Selan goto fail;
250058186Selan }
250158186Selan
250258186Selan d = *--stackp;
250358186Selan p = *--stackp;
250458186Selan if (d >= string1 && d <= end1)
250558186Selan dend = end_match_1;
250658186Selan /* Restore register info. */
250758186Selan last_used_reg = (short) *--stackp;
250858186Selan
250958186Selan /* Make the ones that weren't saved -1 or 0 again. */
251058186Selan for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
251158186Selan {
251258186Selan regend[this_reg] = (unsigned char *) -1;
251358186Selan regstart[this_reg] = (unsigned char *) -1;
251458186Selan IS_ACTIVE (reg_info[this_reg]) = 0;
251558186Selan MATCHED_SOMETHING (reg_info[this_reg]) = 0;
251658186Selan }
251758186Selan
251858186Selan /* And restore the rest from the stack. */
251958186Selan for ( ; this_reg > 0; this_reg--)
252058186Selan {
252158186Selan reg_info[this_reg] = *(struct register_info *) *--stackp;
252258186Selan regend[this_reg] = *--stackp;
252358186Selan regstart[this_reg] = *--stackp;
252458186Selan }
252558186Selan }
252658186Selan else
252758186Selan break; /* Matching at this starting point really fails. */
252858186Selan }
252958186Selan
253058186Selan if (best_regs_set)
253158186Selan goto restore_best_regs;
253258186Selan return -1; /* Failure to match. */
253358186Selan }
253458186Selan
253558186Selan
253658186Selan static int
bcmp_translate(char * s1,char * s2,int len,unsigned char * translate)253758186Selan bcmp_translate (char *s1, char *s2, int len, unsigned char *translate)
253858186Selan {
253958186Selan register unsigned char *p1 = (unsigned char*)s1;
254058186Selan register unsigned char *p2 = (unsigned char*)s2;
254158186Selan while (len)
254258186Selan {
254358186Selan if (translate [*p1++] != translate [*p2++]) return 1;
254458186Selan len--;
254558186Selan }
254658186Selan return 0;
254758186Selan }
254858186Selan
254958186Selan
255058186Selan
255158186Selan /* Entry points compatible with 4.2 BSD regex library. */
255258186Selan
255358186Selan #ifndef emacs
255458186Selan
255558186Selan static struct re_pattern_buffer re_comp_buf;
255658186Selan
255758186Selan char *
re_comp(const char * s)2558*60385Selan re_comp (const char *s)
255958186Selan {
256058186Selan if (!s)
256158186Selan {
256258186Selan if (!re_comp_buf.buffer)
256358186Selan return "No previous regular expression";
256458186Selan return 0;
256558186Selan }
256658186Selan
256758186Selan if (!re_comp_buf.buffer)
256858186Selan {
256958186Selan if (!(re_comp_buf.buffer = (char *) malloc (200)))
257058186Selan return "Memory exhausted";
257158186Selan re_comp_buf.allocated = 200;
257258186Selan if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
257358186Selan return "Memory exhausted";
257458186Selan }
257558186Selan return re_compile_pattern (s, strlen (s), &re_comp_buf);
257658186Selan }
257758186Selan
257858186Selan int
re_exec(const char * s)2579*60385Selan re_exec (const char *s)
258058186Selan {
258158186Selan int len = strlen (s);
258258186Selan return 0 <= re_search (&re_comp_buf, s, len, 0, len,
258358186Selan (struct re_registers *) 0);
258458186Selan }
258558186Selan #endif /* not emacs */
258658186Selan
258758186Selan
258858186Selan
258958186Selan #ifdef test
259058186Selan
259158186Selan #include <stdio.h>
259258186Selan
259358186Selan /* Indexed by a character, gives the upper case equivalent of the
259458186Selan character. */
259558186Selan
259658186Selan char upcase[0400] =
259758186Selan { 000, 001, 002, 003, 004, 005, 006, 007,
259858186Selan 010, 011, 012, 013, 014, 015, 016, 017,
259958186Selan 020, 021, 022, 023, 024, 025, 026, 027,
260058186Selan 030, 031, 032, 033, 034, 035, 036, 037,
260158186Selan 040, 041, 042, 043, 044, 045, 046, 047,
260258186Selan 050, 051, 052, 053, 054, 055, 056, 057,
260358186Selan 060, 061, 062, 063, 064, 065, 066, 067,
260458186Selan 070, 071, 072, 073, 074, 075, 076, 077,
260558186Selan 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
260658186Selan 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
260758186Selan 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
260858186Selan 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
260958186Selan 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
261058186Selan 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
261158186Selan 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
261258186Selan 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
261358186Selan 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
261458186Selan 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
261558186Selan 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
261658186Selan 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
261758186Selan 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
261858186Selan 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
261958186Selan 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
262058186Selan 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
262158186Selan 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
262258186Selan 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
262358186Selan 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
262458186Selan 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
262558186Selan 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
262658186Selan 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
262758186Selan 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
262858186Selan 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
262958186Selan };
263058186Selan
263158186Selan #ifdef canned
263258186Selan
263358186Selan #include "tests.h"
263458186Selan
263558186Selan typedef enum { extended_test, basic_test } test_type;
263658186Selan
263758186Selan /* Use this to run the tests we've thought of. */
263858186Selan
263958186Selan void
main()264058186Selan main ()
264158186Selan {
264258186Selan test_type t = extended_test;
264358186Selan
264458186Selan if (t == basic_test)
264558186Selan {
264658186Selan printf ("Running basic tests:\n\n");
264758186Selan test_posix_basic ();
264858186Selan }
264958186Selan else if (t == extended_test)
265058186Selan {
265158186Selan printf ("Running extended tests:\n\n");
265258186Selan test_posix_extended ();
265358186Selan }
265458186Selan }
265558186Selan
265658186Selan #else /* not canned */
265758186Selan
265858186Selan /* Use this to run interactive tests. */
265958186Selan
266058186Selan void
main(int argc,char ** argv)266158186Selan main (int argc, char **argv)
266258186Selan {
266358186Selan char pat[80];
266458186Selan struct re_pattern_buffer buf;
266558186Selan int i;
266658186Selan char c;
266758186Selan char fastmap[(1 << BYTEWIDTH)];
266858186Selan
266958186Selan /* Allow a command argument to specify the style of syntax. */
267058186Selan if (argc > 1)
267158186Selan obscure_syntax = atoi (argv[1]);
267258186Selan
267358186Selan buf.allocated = 40;
267458186Selan buf.buffer = (char *) malloc (buf.allocated);
267558186Selan buf.fastmap = fastmap;
267658186Selan buf.translate = upcase;
267758186Selan
267858186Selan while (1)
267958186Selan {
268058186Selan gets (pat);
268158186Selan
268258186Selan if (*pat)
268358186Selan {
268458186Selan re_compile_pattern (pat, strlen(pat), &buf);
268558186Selan
268658186Selan for (i = 0; i < buf.used; i++)
268758186Selan printchar (buf.buffer[i]);
268858186Selan
268958186Selan putchar ('\n');
269058186Selan
269158186Selan printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
269258186Selan
269358186Selan re_compile_fastmap (&buf);
269458186Selan printf ("Allowed by fastmap: ");
269558186Selan for (i = 0; i < (1 << BYTEWIDTH); i++)
269658186Selan if (fastmap[i]) printchar (i);
269758186Selan putchar ('\n');
269858186Selan }
269958186Selan
270058186Selan gets (pat); /* Now read the string to match against */
270158186Selan
270258186Selan i = re_match (&buf, pat, strlen (pat), 0, 0);
270358186Selan printf ("Match value %d.\n", i);
270458186Selan }
270558186Selan }
270658186Selan
270758186Selan #endif
270858186Selan
270958186Selan
271058186Selan #ifdef NOTDEF
271158186Selan void
print_buf(struct re_pattern_buffer * bufpbufp)271258186Selan print_buf (struct re_pattern_buffer *bufpbufp)
271358186Selan {
271458186Selan int i;
271558186Selan
271658186Selan printf ("buf is :\n----------------\n");
271758186Selan for (i = 0; i < bufp->used; i++)
271858186Selan printchar (bufp->buffer[i]);
271958186Selan
272058186Selan printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
272158186Selan
272258186Selan printf ("Allowed by fastmap: ");
272358186Selan for (i = 0; i < (1 << BYTEWIDTH); i++)
272458186Selan if (bufp->fastmap[i])
272558186Selan printchar (i);
272658186Selan printf ("\nAllowed by translate: ");
272758186Selan if (bufp->translate)
272858186Selan for (i = 0; i < (1 << BYTEWIDTH); i++)
272958186Selan if (bufp->translate[i])
273058186Selan printchar (i);
273158186Selan printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
273258186Selan printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
273358186Selan }
273458186Selan #endif /* NOTDEF */
273558186Selan
273658186Selan void
printchar(char c)273758186Selan printchar (char c)
273858186Selan {
273958186Selan if (c < 040 || c >= 0177)
274058186Selan {
274158186Selan putchar ('\\');
274258186Selan putchar (((c >> 6) & 3) + '0');
274358186Selan putchar (((c >> 3) & 7) + '0');
274458186Selan putchar ((c & 7) + '0');
274558186Selan }
274658186Selan else
274758186Selan putchar (c);
274858186Selan }
274958186Selan
275058186Selan void
error(char * string)275158186Selan error (char *string)
275258186Selan {
275358186Selan puts (string);
275458186Selan exit (1);
275558186Selan }
275658186Selan #endif /* test */
2757