1*0b459c2cSDavid du Colombier /* Extended regular expression matching and search library, version
2*0b459c2cSDavid du Colombier 0.12. (Implements POSIX draft P10003.2/D11.2, except for
3*0b459c2cSDavid du Colombier internationalization features.)
4*0b459c2cSDavid du Colombier
5*0b459c2cSDavid du Colombier Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
6*0b459c2cSDavid du Colombier
7*0b459c2cSDavid du Colombier This program is free software; you can redistribute it and/or modify
8*0b459c2cSDavid du Colombier it under the terms of the GNU General Public License as published by
9*0b459c2cSDavid du Colombier the Free Software Foundation; either version 2, or (at your option)
10*0b459c2cSDavid du Colombier any later version.
11*0b459c2cSDavid du Colombier
12*0b459c2cSDavid du Colombier This program is distributed in the hope that it will be useful,
13*0b459c2cSDavid du Colombier but WITHOUT ANY WARRANTY; without even the implied warranty of
14*0b459c2cSDavid du Colombier MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15*0b459c2cSDavid du Colombier GNU General Public License for more details.
16*0b459c2cSDavid du Colombier
17*0b459c2cSDavid du Colombier You should have received a copy of the GNU General Public License
18*0b459c2cSDavid du Colombier along with this program; if not, write to the Free Software
19*0b459c2cSDavid du Colombier Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20*0b459c2cSDavid du Colombier USA. */
21*0b459c2cSDavid du Colombier
22*0b459c2cSDavid du Colombier /* AIX requires this to be the first thing in the file. */
23*0b459c2cSDavid du Colombier #if defined (_AIX) && !defined (REGEX_MALLOC)
24*0b459c2cSDavid du Colombier #pragma alloca
25*0b459c2cSDavid du Colombier #endif
26*0b459c2cSDavid du Colombier
27*0b459c2cSDavid du Colombier #undef _GNU_SOURCE
28*0b459c2cSDavid du Colombier #define _GNU_SOURCE
29*0b459c2cSDavid du Colombier
30*0b459c2cSDavid du Colombier #ifdef emacs
31*0b459c2cSDavid du Colombier /* Converts the pointer to the char to BEG-based offset from the start. */
32*0b459c2cSDavid du Colombier #define PTR_TO_OFFSET(d) \
33*0b459c2cSDavid du Colombier POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \
34*0b459c2cSDavid du Colombier ? (d) - string1 : (d) - (string2 - size1))
35*0b459c2cSDavid du Colombier #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
36*0b459c2cSDavid du Colombier #else
37*0b459c2cSDavid du Colombier #define PTR_TO_OFFSET(d) 0
38*0b459c2cSDavid du Colombier #endif
39*0b459c2cSDavid du Colombier
40*0b459c2cSDavid du Colombier #include "config.h"
41*0b459c2cSDavid du Colombier
42*0b459c2cSDavid du Colombier /* We need this for `regex.h', and perhaps for the Emacs include files. */
43*0b459c2cSDavid du Colombier #include <sys/types.h>
44*0b459c2cSDavid du Colombier
45*0b459c2cSDavid du Colombier /* This is for other GNU distributions with internationalized messages. */
46*0b459c2cSDavid du Colombier #if HAVE_LIBINTL_H || defined (_LIBC)
47*0b459c2cSDavid du Colombier # include <libintl.h>
48*0b459c2cSDavid du Colombier #else
49*0b459c2cSDavid du Colombier # define gettext(msgid) (msgid)
50*0b459c2cSDavid du Colombier #endif
51*0b459c2cSDavid du Colombier
52*0b459c2cSDavid du Colombier #ifndef gettext_noop
53*0b459c2cSDavid du Colombier /* This define is so xgettext can find the internationalizable
54*0b459c2cSDavid du Colombier strings. */
55*0b459c2cSDavid du Colombier #define gettext_noop(String) String
56*0b459c2cSDavid du Colombier #endif
57*0b459c2cSDavid du Colombier
58*0b459c2cSDavid du Colombier /* The `emacs' switch turns on certain matching commands
59*0b459c2cSDavid du Colombier that make sense only in Emacs. */
60*0b459c2cSDavid du Colombier #ifdef emacs
61*0b459c2cSDavid du Colombier
62*0b459c2cSDavid du Colombier #include "lisp.h"
63*0b459c2cSDavid du Colombier #include "buffer.h"
64*0b459c2cSDavid du Colombier
65*0b459c2cSDavid du Colombier /* Make syntax table lookup grant data in gl_state. */
66*0b459c2cSDavid du Colombier #define SYNTAX_ENTRY_VIA_PROPERTY
67*0b459c2cSDavid du Colombier
68*0b459c2cSDavid du Colombier #include "syntax.h"
69*0b459c2cSDavid du Colombier #include "charset.h"
70*0b459c2cSDavid du Colombier #include "category.h"
71*0b459c2cSDavid du Colombier
72*0b459c2cSDavid du Colombier #define malloc xmalloc
73*0b459c2cSDavid du Colombier #define realloc xrealloc
74*0b459c2cSDavid du Colombier #define free xfree
75*0b459c2cSDavid du Colombier
76*0b459c2cSDavid du Colombier #else /* not emacs */
77*0b459c2cSDavid du Colombier
78*0b459c2cSDavid du Colombier /* If we are not linking with Emacs proper,
79*0b459c2cSDavid du Colombier we can't use the relocating allocator
80*0b459c2cSDavid du Colombier even if config.h says that we can. */
81*0b459c2cSDavid du Colombier #undef REL_ALLOC
82*0b459c2cSDavid du Colombier
83*0b459c2cSDavid du Colombier #if defined (STDC_HEADERS) || defined (_LIBC)
84*0b459c2cSDavid du Colombier #include <stdlib.h>
85*0b459c2cSDavid du Colombier #else
86*0b459c2cSDavid du Colombier char *malloc ();
87*0b459c2cSDavid du Colombier char *realloc ();
88*0b459c2cSDavid du Colombier #endif
89*0b459c2cSDavid du Colombier
90*0b459c2cSDavid du Colombier /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
91*0b459c2cSDavid du Colombier If nothing else has been done, use the method below. */
92*0b459c2cSDavid du Colombier #ifdef INHIBIT_STRING_HEADER
93*0b459c2cSDavid du Colombier #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
94*0b459c2cSDavid du Colombier #if !defined (bzero) && !defined (bcopy)
95*0b459c2cSDavid du Colombier #undef INHIBIT_STRING_HEADER
96*0b459c2cSDavid du Colombier #endif
97*0b459c2cSDavid du Colombier #endif
98*0b459c2cSDavid du Colombier #endif
99*0b459c2cSDavid du Colombier
100*0b459c2cSDavid du Colombier /* This is the normal way of making sure we have a bcopy and a bzero.
101*0b459c2cSDavid du Colombier This is used in most programs--a few other programs avoid this
102*0b459c2cSDavid du Colombier by defining INHIBIT_STRING_HEADER. */
103*0b459c2cSDavid du Colombier #ifndef INHIBIT_STRING_HEADER
104*0b459c2cSDavid du Colombier #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
105*0b459c2cSDavid du Colombier #include <string.h>
106*0b459c2cSDavid du Colombier #ifndef bcmp
107*0b459c2cSDavid du Colombier #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
108*0b459c2cSDavid du Colombier #endif
109*0b459c2cSDavid du Colombier #ifndef bcopy
110*0b459c2cSDavid du Colombier #define bcopy(s, d, n) memcpy ((d), (s), (n))
111*0b459c2cSDavid du Colombier #endif
112*0b459c2cSDavid du Colombier #ifndef bzero
113*0b459c2cSDavid du Colombier #define bzero(s, n) memset ((s), 0, (n))
114*0b459c2cSDavid du Colombier #endif
115*0b459c2cSDavid du Colombier #else
116*0b459c2cSDavid du Colombier #include <strings.h>
117*0b459c2cSDavid du Colombier #endif
118*0b459c2cSDavid du Colombier #endif
119*0b459c2cSDavid du Colombier
120*0b459c2cSDavid du Colombier /* Define the syntax stuff for \<, \>, etc. */
121*0b459c2cSDavid du Colombier
122*0b459c2cSDavid du Colombier /* This must be nonzero for the wordchar and notwordchar pattern
123*0b459c2cSDavid du Colombier commands in re_match_2. */
124*0b459c2cSDavid du Colombier #ifndef Sword
125*0b459c2cSDavid du Colombier #define Sword 1
126*0b459c2cSDavid du Colombier #endif
127*0b459c2cSDavid du Colombier
128*0b459c2cSDavid du Colombier #ifdef SWITCH_ENUM_BUG
129*0b459c2cSDavid du Colombier #define SWITCH_ENUM_CAST(x) ((int)(x))
130*0b459c2cSDavid du Colombier #else
131*0b459c2cSDavid du Colombier #define SWITCH_ENUM_CAST(x) (x)
132*0b459c2cSDavid du Colombier #endif
133*0b459c2cSDavid du Colombier
134*0b459c2cSDavid du Colombier #ifdef SYNTAX_TABLE
135*0b459c2cSDavid du Colombier
136*0b459c2cSDavid du Colombier extern char *re_syntax_table;
137*0b459c2cSDavid du Colombier
138*0b459c2cSDavid du Colombier #else /* not SYNTAX_TABLE */
139*0b459c2cSDavid du Colombier
140*0b459c2cSDavid du Colombier /* How many characters in the character set. */
141*0b459c2cSDavid du Colombier #define CHAR_SET_SIZE 256
142*0b459c2cSDavid du Colombier
143*0b459c2cSDavid du Colombier static char re_syntax_table[CHAR_SET_SIZE];
144*0b459c2cSDavid du Colombier
145*0b459c2cSDavid du Colombier static void
init_syntax_once()146*0b459c2cSDavid du Colombier init_syntax_once ()
147*0b459c2cSDavid du Colombier {
148*0b459c2cSDavid du Colombier register int c;
149*0b459c2cSDavid du Colombier static int done = 0;
150*0b459c2cSDavid du Colombier
151*0b459c2cSDavid du Colombier if (done)
152*0b459c2cSDavid du Colombier return;
153*0b459c2cSDavid du Colombier
154*0b459c2cSDavid du Colombier bzero (re_syntax_table, sizeof re_syntax_table);
155*0b459c2cSDavid du Colombier
156*0b459c2cSDavid du Colombier for (c = 'a'; c <= 'z'; c++)
157*0b459c2cSDavid du Colombier re_syntax_table[c] = Sword;
158*0b459c2cSDavid du Colombier
159*0b459c2cSDavid du Colombier for (c = 'A'; c <= 'Z'; c++)
160*0b459c2cSDavid du Colombier re_syntax_table[c] = Sword;
161*0b459c2cSDavid du Colombier
162*0b459c2cSDavid du Colombier for (c = '0'; c <= '9'; c++)
163*0b459c2cSDavid du Colombier re_syntax_table[c] = Sword;
164*0b459c2cSDavid du Colombier
165*0b459c2cSDavid du Colombier re_syntax_table['_'] = Sword;
166*0b459c2cSDavid du Colombier
167*0b459c2cSDavid du Colombier done = 1;
168*0b459c2cSDavid du Colombier }
169*0b459c2cSDavid du Colombier
170*0b459c2cSDavid du Colombier #endif /* not SYNTAX_TABLE */
171*0b459c2cSDavid du Colombier
172*0b459c2cSDavid du Colombier #define SYNTAX(c) re_syntax_table[c]
173*0b459c2cSDavid du Colombier
174*0b459c2cSDavid du Colombier /* Dummy macros for non-Emacs environments. */
175*0b459c2cSDavid du Colombier #define BASE_LEADING_CODE_P(c) (0)
176*0b459c2cSDavid du Colombier #define WORD_BOUNDARY_P(c1, c2) (0)
177*0b459c2cSDavid du Colombier #define CHAR_HEAD_P(p) (1)
178*0b459c2cSDavid du Colombier #define SINGLE_BYTE_CHAR_P(c) (1)
179*0b459c2cSDavid du Colombier #define SAME_CHARSET_P(c1, c2) (1)
180*0b459c2cSDavid du Colombier #define MULTIBYTE_FORM_LENGTH(p, s) (1)
181*0b459c2cSDavid du Colombier #define STRING_CHAR(p, s) (*(p))
182*0b459c2cSDavid du Colombier #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
183*0b459c2cSDavid du Colombier #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
184*0b459c2cSDavid du Colombier (c = ((p) == (end1) ? *(str2) : *(p)))
185*0b459c2cSDavid du Colombier #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
186*0b459c2cSDavid du Colombier (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
187*0b459c2cSDavid du Colombier #endif /* not emacs */
188*0b459c2cSDavid du Colombier
189*0b459c2cSDavid du Colombier /* Get the interface, including the syntax bits. */
190*0b459c2cSDavid du Colombier #include "regex.h"
191*0b459c2cSDavid du Colombier
192*0b459c2cSDavid du Colombier /* isalpha etc. are used for the character classes. */
193*0b459c2cSDavid du Colombier #include <ctype.h>
194*0b459c2cSDavid du Colombier
195*0b459c2cSDavid du Colombier /* Jim Meyering writes:
196*0b459c2cSDavid du Colombier
197*0b459c2cSDavid du Colombier "... Some ctype macros are valid only for character codes that
198*0b459c2cSDavid du Colombier isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
199*0b459c2cSDavid du Colombier using /bin/cc or gcc but without giving an ansi option). So, all
200*0b459c2cSDavid du Colombier ctype uses should be through macros like ISPRINT... If
201*0b459c2cSDavid du Colombier STDC_HEADERS is defined, then autoconf has verified that the ctype
202*0b459c2cSDavid du Colombier macros don't need to be guarded with references to isascii. ...
203*0b459c2cSDavid du Colombier Defining isascii to 1 should let any compiler worth its salt
204*0b459c2cSDavid du Colombier eliminate the && through constant folding." */
205*0b459c2cSDavid du Colombier
206*0b459c2cSDavid du Colombier #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
207*0b459c2cSDavid du Colombier #define ISASCII(c) 1
208*0b459c2cSDavid du Colombier #else
209*0b459c2cSDavid du Colombier #define ISASCII(c) isascii(c)
210*0b459c2cSDavid du Colombier #endif
211*0b459c2cSDavid du Colombier
212*0b459c2cSDavid du Colombier #ifdef isblank
213*0b459c2cSDavid du Colombier #define ISBLANK(c) (ISASCII (c) && isblank (c))
214*0b459c2cSDavid du Colombier #else
215*0b459c2cSDavid du Colombier #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
216*0b459c2cSDavid du Colombier #endif
217*0b459c2cSDavid du Colombier #ifdef isgraph
218*0b459c2cSDavid du Colombier #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
219*0b459c2cSDavid du Colombier #else
220*0b459c2cSDavid du Colombier #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
221*0b459c2cSDavid du Colombier #endif
222*0b459c2cSDavid du Colombier
223*0b459c2cSDavid du Colombier #define ISPRINT(c) (ISASCII (c) && isprint (c))
224*0b459c2cSDavid du Colombier #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
225*0b459c2cSDavid du Colombier #define ISALNUM(c) (ISASCII (c) && isalnum (c))
226*0b459c2cSDavid du Colombier #define ISALPHA(c) (ISASCII (c) && isalpha (c))
227*0b459c2cSDavid du Colombier #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
228*0b459c2cSDavid du Colombier #define ISLOWER(c) (ISASCII (c) && islower (c))
229*0b459c2cSDavid du Colombier #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
230*0b459c2cSDavid du Colombier #define ISSPACE(c) (ISASCII (c) && isspace (c))
231*0b459c2cSDavid du Colombier #define ISUPPER(c) (ISASCII (c) && isupper (c))
232*0b459c2cSDavid du Colombier #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
233*0b459c2cSDavid du Colombier
234*0b459c2cSDavid du Colombier #ifndef NULL
235*0b459c2cSDavid du Colombier #define NULL (void *)0
236*0b459c2cSDavid du Colombier #endif
237*0b459c2cSDavid du Colombier
238*0b459c2cSDavid du Colombier /* We remove any previous definition of `SIGN_EXTEND_CHAR',
239*0b459c2cSDavid du Colombier since ours (we hope) works properly with all combinations of
240*0b459c2cSDavid du Colombier machines, compilers, `char' and `unsigned char' argument types.
241*0b459c2cSDavid du Colombier (Per Bothner suggested the basic approach.) */
242*0b459c2cSDavid du Colombier #undef SIGN_EXTEND_CHAR
243*0b459c2cSDavid du Colombier #if __STDC__
244*0b459c2cSDavid du Colombier #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
245*0b459c2cSDavid du Colombier #else /* not __STDC__ */
246*0b459c2cSDavid du Colombier /* As in Harbison and Steele. */
247*0b459c2cSDavid du Colombier #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
248*0b459c2cSDavid du Colombier #endif
249*0b459c2cSDavid du Colombier
250*0b459c2cSDavid du Colombier /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
251*0b459c2cSDavid du Colombier use `alloca' instead of `malloc'. This is because using malloc in
252*0b459c2cSDavid du Colombier re_search* or re_match* could cause memory leaks when C-g is used in
253*0b459c2cSDavid du Colombier Emacs; also, malloc is slower and causes storage fragmentation. On
254*0b459c2cSDavid du Colombier the other hand, malloc is more portable, and easier to debug.
255*0b459c2cSDavid du Colombier
256*0b459c2cSDavid du Colombier Because we sometimes use alloca, some routines have to be macros,
257*0b459c2cSDavid du Colombier not functions -- `alloca'-allocated space disappears at the end of the
258*0b459c2cSDavid du Colombier function it is called in. */
259*0b459c2cSDavid du Colombier
260*0b459c2cSDavid du Colombier #ifdef REGEX_MALLOC
261*0b459c2cSDavid du Colombier
262*0b459c2cSDavid du Colombier #define REGEX_ALLOCATE malloc
263*0b459c2cSDavid du Colombier #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
264*0b459c2cSDavid du Colombier #define REGEX_FREE free
265*0b459c2cSDavid du Colombier
266*0b459c2cSDavid du Colombier #else /* not REGEX_MALLOC */
267*0b459c2cSDavid du Colombier
268*0b459c2cSDavid du Colombier /* Emacs already defines alloca, sometimes. */
269*0b459c2cSDavid du Colombier #ifndef alloca
270*0b459c2cSDavid du Colombier
271*0b459c2cSDavid du Colombier /* Make alloca work the best possible way. */
272*0b459c2cSDavid du Colombier #ifdef __GNUC__
273*0b459c2cSDavid du Colombier #define alloca __builtin_alloca
274*0b459c2cSDavid du Colombier #else /* not __GNUC__ */
275*0b459c2cSDavid du Colombier #if HAVE_ALLOCA_H
276*0b459c2cSDavid du Colombier #include <alloca.h>
277*0b459c2cSDavid du Colombier #else /* not __GNUC__ or HAVE_ALLOCA_H */
278*0b459c2cSDavid du Colombier #if 0 /* It is a bad idea to declare alloca. We always cast the result. */
279*0b459c2cSDavid du Colombier #ifndef _AIX /* Already did AIX, up at the top. */
280*0b459c2cSDavid du Colombier char *alloca ();
281*0b459c2cSDavid du Colombier #endif /* not _AIX */
282*0b459c2cSDavid du Colombier #endif
283*0b459c2cSDavid du Colombier #endif /* not HAVE_ALLOCA_H */
284*0b459c2cSDavid du Colombier #endif /* not __GNUC__ */
285*0b459c2cSDavid du Colombier
286*0b459c2cSDavid du Colombier #endif /* not alloca */
287*0b459c2cSDavid du Colombier
288*0b459c2cSDavid du Colombier #define REGEX_ALLOCATE alloca
289*0b459c2cSDavid du Colombier
290*0b459c2cSDavid du Colombier /* Assumes a `char *destination' variable. */
291*0b459c2cSDavid du Colombier #define REGEX_REALLOCATE(source, osize, nsize) \
292*0b459c2cSDavid du Colombier (destination = (char *) alloca (nsize), \
293*0b459c2cSDavid du Colombier bcopy (source, destination, osize), \
294*0b459c2cSDavid du Colombier destination)
295*0b459c2cSDavid du Colombier
296*0b459c2cSDavid du Colombier /* No need to do anything to free, after alloca. */
297*0b459c2cSDavid du Colombier #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
298*0b459c2cSDavid du Colombier
299*0b459c2cSDavid du Colombier #endif /* not REGEX_MALLOC */
300*0b459c2cSDavid du Colombier
301*0b459c2cSDavid du Colombier /* Define how to allocate the failure stack. */
302*0b459c2cSDavid du Colombier
303*0b459c2cSDavid du Colombier #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
304*0b459c2cSDavid du Colombier
305*0b459c2cSDavid du Colombier #define REGEX_ALLOCATE_STACK(size) \
306*0b459c2cSDavid du Colombier r_alloc (&failure_stack_ptr, (size))
307*0b459c2cSDavid du Colombier #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
308*0b459c2cSDavid du Colombier r_re_alloc (&failure_stack_ptr, (nsize))
309*0b459c2cSDavid du Colombier #define REGEX_FREE_STACK(ptr) \
310*0b459c2cSDavid du Colombier r_alloc_free (&failure_stack_ptr)
311*0b459c2cSDavid du Colombier
312*0b459c2cSDavid du Colombier #else /* not using relocating allocator */
313*0b459c2cSDavid du Colombier
314*0b459c2cSDavid du Colombier #ifdef REGEX_MALLOC
315*0b459c2cSDavid du Colombier
316*0b459c2cSDavid du Colombier #define REGEX_ALLOCATE_STACK malloc
317*0b459c2cSDavid du Colombier #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
318*0b459c2cSDavid du Colombier #define REGEX_FREE_STACK free
319*0b459c2cSDavid du Colombier
320*0b459c2cSDavid du Colombier #else /* not REGEX_MALLOC */
321*0b459c2cSDavid du Colombier
322*0b459c2cSDavid du Colombier #define REGEX_ALLOCATE_STACK alloca
323*0b459c2cSDavid du Colombier
324*0b459c2cSDavid du Colombier #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
325*0b459c2cSDavid du Colombier REGEX_REALLOCATE (source, osize, nsize)
326*0b459c2cSDavid du Colombier /* No need to explicitly free anything. */
327*0b459c2cSDavid du Colombier #define REGEX_FREE_STACK(arg)
328*0b459c2cSDavid du Colombier
329*0b459c2cSDavid du Colombier #endif /* not REGEX_MALLOC */
330*0b459c2cSDavid du Colombier #endif /* not using relocating allocator */
331*0b459c2cSDavid du Colombier
332*0b459c2cSDavid du Colombier
333*0b459c2cSDavid du Colombier /* True if `size1' is non-NULL and PTR is pointing anywhere inside
334*0b459c2cSDavid du Colombier `string1' or just past its end. This works if PTR is NULL, which is
335*0b459c2cSDavid du Colombier a good thing. */
336*0b459c2cSDavid du Colombier #define FIRST_STRING_P(ptr) \
337*0b459c2cSDavid du Colombier (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
338*0b459c2cSDavid du Colombier
339*0b459c2cSDavid du Colombier /* (Re)Allocate N items of type T using malloc, or fail. */
340*0b459c2cSDavid du Colombier #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
341*0b459c2cSDavid du Colombier #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
342*0b459c2cSDavid du Colombier #define RETALLOC_IF(addr, n, t) \
343*0b459c2cSDavid du Colombier if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
344*0b459c2cSDavid du Colombier #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
345*0b459c2cSDavid du Colombier
346*0b459c2cSDavid du Colombier #define BYTEWIDTH 8 /* In bits. */
347*0b459c2cSDavid du Colombier
348*0b459c2cSDavid du Colombier #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
349*0b459c2cSDavid du Colombier
350*0b459c2cSDavid du Colombier #undef MAX
351*0b459c2cSDavid du Colombier #undef MIN
352*0b459c2cSDavid du Colombier #define MAX(a, b) ((a) > (b) ? (a) : (b))
353*0b459c2cSDavid du Colombier #define MIN(a, b) ((a) < (b) ? (a) : (b))
354*0b459c2cSDavid du Colombier
355*0b459c2cSDavid du Colombier typedef char boolean;
356*0b459c2cSDavid du Colombier #define false 0
357*0b459c2cSDavid du Colombier #define true 1
358*0b459c2cSDavid du Colombier
359*0b459c2cSDavid du Colombier static int re_match_2_internal ();
360*0b459c2cSDavid du Colombier
361*0b459c2cSDavid du Colombier /* These are the command codes that appear in compiled regular
362*0b459c2cSDavid du Colombier expressions. Some opcodes are followed by argument bytes. A
363*0b459c2cSDavid du Colombier command code can specify any interpretation whatsoever for its
364*0b459c2cSDavid du Colombier arguments. Zero bytes may appear in the compiled regular expression. */
365*0b459c2cSDavid du Colombier
366*0b459c2cSDavid du Colombier typedef enum
367*0b459c2cSDavid du Colombier {
368*0b459c2cSDavid du Colombier no_op = 0,
369*0b459c2cSDavid du Colombier
370*0b459c2cSDavid du Colombier /* Succeed right away--no more backtracking. */
371*0b459c2cSDavid du Colombier succeed,
372*0b459c2cSDavid du Colombier
373*0b459c2cSDavid du Colombier /* Followed by one byte giving n, then by n literal bytes. */
374*0b459c2cSDavid du Colombier exactn,
375*0b459c2cSDavid du Colombier
376*0b459c2cSDavid du Colombier /* Matches any (more or less) character. */
377*0b459c2cSDavid du Colombier anychar,
378*0b459c2cSDavid du Colombier
379*0b459c2cSDavid du Colombier /* Matches any one char belonging to specified set. First
380*0b459c2cSDavid du Colombier following byte is number of bitmap bytes. Then come bytes
381*0b459c2cSDavid du Colombier for a bitmap saying which chars are in. Bits in each byte
382*0b459c2cSDavid du Colombier are ordered low-bit-first. A character is in the set if its
383*0b459c2cSDavid du Colombier bit is 1. A character too large to have a bit in the map is
384*0b459c2cSDavid du Colombier automatically not in the set. */
385*0b459c2cSDavid du Colombier charset,
386*0b459c2cSDavid du Colombier
387*0b459c2cSDavid du Colombier /* Same parameters as charset, but match any character that is
388*0b459c2cSDavid du Colombier not one of those specified. */
389*0b459c2cSDavid du Colombier charset_not,
390*0b459c2cSDavid du Colombier
391*0b459c2cSDavid du Colombier /* Start remembering the text that is matched, for storing in a
392*0b459c2cSDavid du Colombier register. Followed by one byte with the register number, in
393*0b459c2cSDavid du Colombier the range 0 to one less than the pattern buffer's re_nsub
394*0b459c2cSDavid du Colombier field. Then followed by one byte with the number of groups
395*0b459c2cSDavid du Colombier inner to this one. (This last has to be part of the
396*0b459c2cSDavid du Colombier start_memory only because we need it in the on_failure_jump
397*0b459c2cSDavid du Colombier of re_match_2.) */
398*0b459c2cSDavid du Colombier start_memory,
399*0b459c2cSDavid du Colombier
400*0b459c2cSDavid du Colombier /* Stop remembering the text that is matched and store it in a
401*0b459c2cSDavid du Colombier memory register. Followed by one byte with the register
402*0b459c2cSDavid du Colombier number, in the range 0 to one less than `re_nsub' in the
403*0b459c2cSDavid du Colombier pattern buffer, and one byte with the number of inner groups,
404*0b459c2cSDavid du Colombier just like `start_memory'. (We need the number of inner
405*0b459c2cSDavid du Colombier groups here because we don't have any easy way of finding the
406*0b459c2cSDavid du Colombier corresponding start_memory when we're at a stop_memory.) */
407*0b459c2cSDavid du Colombier stop_memory,
408*0b459c2cSDavid du Colombier
409*0b459c2cSDavid du Colombier /* Match a duplicate of something remembered. Followed by one
410*0b459c2cSDavid du Colombier byte containing the register number. */
411*0b459c2cSDavid du Colombier duplicate,
412*0b459c2cSDavid du Colombier
413*0b459c2cSDavid du Colombier /* Fail unless at beginning of line. */
414*0b459c2cSDavid du Colombier begline,
415*0b459c2cSDavid du Colombier
416*0b459c2cSDavid du Colombier /* Fail unless at end of line. */
417*0b459c2cSDavid du Colombier endline,
418*0b459c2cSDavid du Colombier
419*0b459c2cSDavid du Colombier /* Succeeds if at beginning of buffer (if emacs) or at beginning
420*0b459c2cSDavid du Colombier of string to be matched (if not). */
421*0b459c2cSDavid du Colombier begbuf,
422*0b459c2cSDavid du Colombier
423*0b459c2cSDavid du Colombier /* Analogously, for end of buffer/string. */
424*0b459c2cSDavid du Colombier endbuf,
425*0b459c2cSDavid du Colombier
426*0b459c2cSDavid du Colombier /* Followed by two byte relative address to which to jump. */
427*0b459c2cSDavid du Colombier jump,
428*0b459c2cSDavid du Colombier
429*0b459c2cSDavid du Colombier /* Same as jump, but marks the end of an alternative. */
430*0b459c2cSDavid du Colombier jump_past_alt,
431*0b459c2cSDavid du Colombier
432*0b459c2cSDavid du Colombier /* Followed by two-byte relative address of place to resume at
433*0b459c2cSDavid du Colombier in case of failure. */
434*0b459c2cSDavid du Colombier on_failure_jump,
435*0b459c2cSDavid du Colombier
436*0b459c2cSDavid du Colombier /* Like on_failure_jump, but pushes a placeholder instead of the
437*0b459c2cSDavid du Colombier current string position when executed. */
438*0b459c2cSDavid du Colombier on_failure_keep_string_jump,
439*0b459c2cSDavid du Colombier
440*0b459c2cSDavid du Colombier /* Throw away latest failure point and then jump to following
441*0b459c2cSDavid du Colombier two-byte relative address. */
442*0b459c2cSDavid du Colombier pop_failure_jump,
443*0b459c2cSDavid du Colombier
444*0b459c2cSDavid du Colombier /* Change to pop_failure_jump if know won't have to backtrack to
445*0b459c2cSDavid du Colombier match; otherwise change to jump. This is used to jump
446*0b459c2cSDavid du Colombier back to the beginning of a repeat. If what follows this jump
447*0b459c2cSDavid du Colombier clearly won't match what the repeat does, such that we can be
448*0b459c2cSDavid du Colombier sure that there is no use backtracking out of repetitions
449*0b459c2cSDavid du Colombier already matched, then we change it to a pop_failure_jump.
450*0b459c2cSDavid du Colombier Followed by two-byte address. */
451*0b459c2cSDavid du Colombier maybe_pop_jump,
452*0b459c2cSDavid du Colombier
453*0b459c2cSDavid du Colombier /* Jump to following two-byte address, and push a dummy failure
454*0b459c2cSDavid du Colombier point. This failure point will be thrown away if an attempt
455*0b459c2cSDavid du Colombier is made to use it for a failure. A `+' construct makes this
456*0b459c2cSDavid du Colombier before the first repeat. Also used as an intermediary kind
457*0b459c2cSDavid du Colombier of jump when compiling an alternative. */
458*0b459c2cSDavid du Colombier dummy_failure_jump,
459*0b459c2cSDavid du Colombier
460*0b459c2cSDavid du Colombier /* Push a dummy failure point and continue. Used at the end of
461*0b459c2cSDavid du Colombier alternatives. */
462*0b459c2cSDavid du Colombier push_dummy_failure,
463*0b459c2cSDavid du Colombier
464*0b459c2cSDavid du Colombier /* Followed by two-byte relative address and two-byte number n.
465*0b459c2cSDavid du Colombier After matching N times, jump to the address upon failure. */
466*0b459c2cSDavid du Colombier succeed_n,
467*0b459c2cSDavid du Colombier
468*0b459c2cSDavid du Colombier /* Followed by two-byte relative address, and two-byte number n.
469*0b459c2cSDavid du Colombier Jump to the address N times, then fail. */
470*0b459c2cSDavid du Colombier jump_n,
471*0b459c2cSDavid du Colombier
472*0b459c2cSDavid du Colombier /* Set the following two-byte relative address to the
473*0b459c2cSDavid du Colombier subsequent two-byte number. The address *includes* the two
474*0b459c2cSDavid du Colombier bytes of number. */
475*0b459c2cSDavid du Colombier set_number_at,
476*0b459c2cSDavid du Colombier
477*0b459c2cSDavid du Colombier wordchar, /* Matches any word-constituent character. */
478*0b459c2cSDavid du Colombier notwordchar, /* Matches any char that is not a word-constituent. */
479*0b459c2cSDavid du Colombier
480*0b459c2cSDavid du Colombier wordbeg, /* Succeeds if at word beginning. */
481*0b459c2cSDavid du Colombier wordend, /* Succeeds if at word end. */
482*0b459c2cSDavid du Colombier
483*0b459c2cSDavid du Colombier wordbound, /* Succeeds if at a word boundary. */
484*0b459c2cSDavid du Colombier notwordbound /* Succeeds if not at a word boundary. */
485*0b459c2cSDavid du Colombier
486*0b459c2cSDavid du Colombier #ifdef emacs
487*0b459c2cSDavid du Colombier ,before_dot, /* Succeeds if before point. */
488*0b459c2cSDavid du Colombier at_dot, /* Succeeds if at point. */
489*0b459c2cSDavid du Colombier after_dot, /* Succeeds if after point. */
490*0b459c2cSDavid du Colombier
491*0b459c2cSDavid du Colombier /* Matches any character whose syntax is specified. Followed by
492*0b459c2cSDavid du Colombier a byte which contains a syntax code, e.g., Sword. */
493*0b459c2cSDavid du Colombier syntaxspec,
494*0b459c2cSDavid du Colombier
495*0b459c2cSDavid du Colombier /* Matches any character whose syntax is not that specified. */
496*0b459c2cSDavid du Colombier notsyntaxspec,
497*0b459c2cSDavid du Colombier
498*0b459c2cSDavid du Colombier /* Matches any character whose category-set contains the specified
499*0b459c2cSDavid du Colombier category. The operator is followed by a byte which contains a
500*0b459c2cSDavid du Colombier category code (mnemonic ASCII character). */
501*0b459c2cSDavid du Colombier categoryspec,
502*0b459c2cSDavid du Colombier
503*0b459c2cSDavid du Colombier /* Matches any character whose category-set does not contain the
504*0b459c2cSDavid du Colombier specified category. The operator is followed by a byte which
505*0b459c2cSDavid du Colombier contains the category code (mnemonic ASCII character). */
506*0b459c2cSDavid du Colombier notcategoryspec
507*0b459c2cSDavid du Colombier #endif /* emacs */
508*0b459c2cSDavid du Colombier } re_opcode_t;
509*0b459c2cSDavid du Colombier
510*0b459c2cSDavid du Colombier /* Common operations on the compiled pattern. */
511*0b459c2cSDavid du Colombier
512*0b459c2cSDavid du Colombier /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
513*0b459c2cSDavid du Colombier
514*0b459c2cSDavid du Colombier #define STORE_NUMBER(destination, number) \
515*0b459c2cSDavid du Colombier do { \
516*0b459c2cSDavid du Colombier (destination)[0] = (number) & 0377; \
517*0b459c2cSDavid du Colombier (destination)[1] = (number) >> 8; \
518*0b459c2cSDavid du Colombier } while (0)
519*0b459c2cSDavid du Colombier
520*0b459c2cSDavid du Colombier /* Same as STORE_NUMBER, except increment DESTINATION to
521*0b459c2cSDavid du Colombier the byte after where the number is stored. Therefore, DESTINATION
522*0b459c2cSDavid du Colombier must be an lvalue. */
523*0b459c2cSDavid du Colombier
524*0b459c2cSDavid du Colombier #define STORE_NUMBER_AND_INCR(destination, number) \
525*0b459c2cSDavid du Colombier do { \
526*0b459c2cSDavid du Colombier STORE_NUMBER (destination, number); \
527*0b459c2cSDavid du Colombier (destination) += 2; \
528*0b459c2cSDavid du Colombier } while (0)
529*0b459c2cSDavid du Colombier
530*0b459c2cSDavid du Colombier /* Put into DESTINATION a number stored in two contiguous bytes starting
531*0b459c2cSDavid du Colombier at SOURCE. */
532*0b459c2cSDavid du Colombier
533*0b459c2cSDavid du Colombier #define EXTRACT_NUMBER(destination, source) \
534*0b459c2cSDavid du Colombier do { \
535*0b459c2cSDavid du Colombier (destination) = *(source) & 0377; \
536*0b459c2cSDavid du Colombier (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
537*0b459c2cSDavid du Colombier } while (0)
538*0b459c2cSDavid du Colombier
539*0b459c2cSDavid du Colombier #ifdef DEBUG
540*0b459c2cSDavid du Colombier static void
extract_number(dest,source)541*0b459c2cSDavid du Colombier extract_number (dest, source)
542*0b459c2cSDavid du Colombier int *dest;
543*0b459c2cSDavid du Colombier unsigned char *source;
544*0b459c2cSDavid du Colombier {
545*0b459c2cSDavid du Colombier int temp = SIGN_EXTEND_CHAR (*(source + 1));
546*0b459c2cSDavid du Colombier *dest = *source & 0377;
547*0b459c2cSDavid du Colombier *dest += temp << 8;
548*0b459c2cSDavid du Colombier }
549*0b459c2cSDavid du Colombier
550*0b459c2cSDavid du Colombier #ifndef EXTRACT_MACROS /* To debug the macros. */
551*0b459c2cSDavid du Colombier #undef EXTRACT_NUMBER
552*0b459c2cSDavid du Colombier #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
553*0b459c2cSDavid du Colombier #endif /* not EXTRACT_MACROS */
554*0b459c2cSDavid du Colombier
555*0b459c2cSDavid du Colombier #endif /* DEBUG */
556*0b459c2cSDavid du Colombier
557*0b459c2cSDavid du Colombier /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
558*0b459c2cSDavid du Colombier SOURCE must be an lvalue. */
559*0b459c2cSDavid du Colombier
560*0b459c2cSDavid du Colombier #define EXTRACT_NUMBER_AND_INCR(destination, source) \
561*0b459c2cSDavid du Colombier do { \
562*0b459c2cSDavid du Colombier EXTRACT_NUMBER (destination, source); \
563*0b459c2cSDavid du Colombier (source) += 2; \
564*0b459c2cSDavid du Colombier } while (0)
565*0b459c2cSDavid du Colombier
566*0b459c2cSDavid du Colombier #ifdef DEBUG
567*0b459c2cSDavid du Colombier static void
extract_number_and_incr(destination,source)568*0b459c2cSDavid du Colombier extract_number_and_incr (destination, source)
569*0b459c2cSDavid du Colombier int *destination;
570*0b459c2cSDavid du Colombier unsigned char **source;
571*0b459c2cSDavid du Colombier {
572*0b459c2cSDavid du Colombier extract_number (destination, *source);
573*0b459c2cSDavid du Colombier *source += 2;
574*0b459c2cSDavid du Colombier }
575*0b459c2cSDavid du Colombier
576*0b459c2cSDavid du Colombier #ifndef EXTRACT_MACROS
577*0b459c2cSDavid du Colombier #undef EXTRACT_NUMBER_AND_INCR
578*0b459c2cSDavid du Colombier #define EXTRACT_NUMBER_AND_INCR(dest, src) \
579*0b459c2cSDavid du Colombier extract_number_and_incr (&dest, &src)
580*0b459c2cSDavid du Colombier #endif /* not EXTRACT_MACROS */
581*0b459c2cSDavid du Colombier
582*0b459c2cSDavid du Colombier #endif /* DEBUG */
583*0b459c2cSDavid du Colombier
584*0b459c2cSDavid du Colombier /* Store a multibyte character in three contiguous bytes starting
585*0b459c2cSDavid du Colombier DESTINATION, and increment DESTINATION to the byte after where the
586*0b459c2cSDavid du Colombier character is stored. Therefore, DESTINATION must be an lvalue. */
587*0b459c2cSDavid du Colombier
588*0b459c2cSDavid du Colombier #define STORE_CHARACTER_AND_INCR(destination, character) \
589*0b459c2cSDavid du Colombier do { \
590*0b459c2cSDavid du Colombier (destination)[0] = (character) & 0377; \
591*0b459c2cSDavid du Colombier (destination)[1] = ((character) >> 8) & 0377; \
592*0b459c2cSDavid du Colombier (destination)[2] = (character) >> 16; \
593*0b459c2cSDavid du Colombier (destination) += 3; \
594*0b459c2cSDavid du Colombier } while (0)
595*0b459c2cSDavid du Colombier
596*0b459c2cSDavid du Colombier /* Put into DESTINATION a character stored in three contiguous bytes
597*0b459c2cSDavid du Colombier starting at SOURCE. */
598*0b459c2cSDavid du Colombier
599*0b459c2cSDavid du Colombier #define EXTRACT_CHARACTER(destination, source) \
600*0b459c2cSDavid du Colombier do { \
601*0b459c2cSDavid du Colombier (destination) = ((source)[0] \
602*0b459c2cSDavid du Colombier | ((source)[1] << 8) \
603*0b459c2cSDavid du Colombier | ((source)[2] << 16)); \
604*0b459c2cSDavid du Colombier } while (0)
605*0b459c2cSDavid du Colombier
606*0b459c2cSDavid du Colombier
607*0b459c2cSDavid du Colombier /* Macros for charset. */
608*0b459c2cSDavid du Colombier
609*0b459c2cSDavid du Colombier /* Size of bitmap of charset P in bytes. P is a start of charset,
610*0b459c2cSDavid du Colombier i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */
611*0b459c2cSDavid du Colombier #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
612*0b459c2cSDavid du Colombier
613*0b459c2cSDavid du Colombier /* Nonzero if charset P has range table. */
614*0b459c2cSDavid du Colombier #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
615*0b459c2cSDavid du Colombier
616*0b459c2cSDavid du Colombier /* Return the address of range table of charset P. But not the start
617*0b459c2cSDavid du Colombier of table itself, but the before where the number of ranges is
618*0b459c2cSDavid du Colombier stored. `2 +' means to skip re_opcode_t and size of bitmap. */
619*0b459c2cSDavid du Colombier #define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
620*0b459c2cSDavid du Colombier
621*0b459c2cSDavid du Colombier /* Test if C is listed in the bitmap of charset P. */
622*0b459c2cSDavid du Colombier #define CHARSET_LOOKUP_BITMAP(p, c) \
623*0b459c2cSDavid du Colombier ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
624*0b459c2cSDavid du Colombier && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
625*0b459c2cSDavid du Colombier
626*0b459c2cSDavid du Colombier /* Return the address of end of RANGE_TABLE. COUNT is number of
627*0b459c2cSDavid du Colombier ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
628*0b459c2cSDavid du Colombier is start of range and end of range. `* 3' is size of each start
629*0b459c2cSDavid du Colombier and end. */
630*0b459c2cSDavid du Colombier #define CHARSET_RANGE_TABLE_END(range_table, count) \
631*0b459c2cSDavid du Colombier ((range_table) + (count) * 2 * 3)
632*0b459c2cSDavid du Colombier
633*0b459c2cSDavid du Colombier /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
634*0b459c2cSDavid du Colombier COUNT is number of ranges in RANGE_TABLE. */
635*0b459c2cSDavid du Colombier #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
636*0b459c2cSDavid du Colombier do \
637*0b459c2cSDavid du Colombier { \
638*0b459c2cSDavid du Colombier int range_start, range_end; \
639*0b459c2cSDavid du Colombier unsigned char *p; \
640*0b459c2cSDavid du Colombier unsigned char *range_table_end \
641*0b459c2cSDavid du Colombier = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
642*0b459c2cSDavid du Colombier \
643*0b459c2cSDavid du Colombier for (p = (range_table); p < range_table_end; p += 2 * 3) \
644*0b459c2cSDavid du Colombier { \
645*0b459c2cSDavid du Colombier EXTRACT_CHARACTER (range_start, p); \
646*0b459c2cSDavid du Colombier EXTRACT_CHARACTER (range_end, p + 3); \
647*0b459c2cSDavid du Colombier \
648*0b459c2cSDavid du Colombier if (range_start <= (c) && (c) <= range_end) \
649*0b459c2cSDavid du Colombier { \
650*0b459c2cSDavid du Colombier (not) = !(not); \
651*0b459c2cSDavid du Colombier break; \
652*0b459c2cSDavid du Colombier } \
653*0b459c2cSDavid du Colombier } \
654*0b459c2cSDavid du Colombier } \
655*0b459c2cSDavid du Colombier while (0)
656*0b459c2cSDavid du Colombier
657*0b459c2cSDavid du Colombier /* Test if C is in range table of CHARSET. The flag NOT is negated if
658*0b459c2cSDavid du Colombier C is listed in it. */
659*0b459c2cSDavid du Colombier #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \
660*0b459c2cSDavid du Colombier do \
661*0b459c2cSDavid du Colombier { \
662*0b459c2cSDavid du Colombier /* Number of ranges in range table. */ \
663*0b459c2cSDavid du Colombier int count; \
664*0b459c2cSDavid du Colombier unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
665*0b459c2cSDavid du Colombier \
666*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (count, range_table); \
667*0b459c2cSDavid du Colombier CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
668*0b459c2cSDavid du Colombier } \
669*0b459c2cSDavid du Colombier while (0)
670*0b459c2cSDavid du Colombier
671*0b459c2cSDavid du Colombier /* If DEBUG is defined, Regex prints many voluminous messages about what
672*0b459c2cSDavid du Colombier it is doing (if the variable `debug' is nonzero). If linked with the
673*0b459c2cSDavid du Colombier main program in `iregex.c', you can enter patterns and strings
674*0b459c2cSDavid du Colombier interactively. And if linked with the main program in `main.c' and
675*0b459c2cSDavid du Colombier the other test files, you can run the already-written tests. */
676*0b459c2cSDavid du Colombier
677*0b459c2cSDavid du Colombier #ifdef DEBUG
678*0b459c2cSDavid du Colombier
679*0b459c2cSDavid du Colombier /* We use standard I/O for debugging. */
680*0b459c2cSDavid du Colombier #include <stdio.h>
681*0b459c2cSDavid du Colombier
682*0b459c2cSDavid du Colombier /* It is useful to test things that ``must'' be true when debugging. */
683*0b459c2cSDavid du Colombier #include <assert.h>
684*0b459c2cSDavid du Colombier
685*0b459c2cSDavid du Colombier static int debug = 0;
686*0b459c2cSDavid du Colombier
687*0b459c2cSDavid du Colombier #define DEBUG_STATEMENT(e) e
688*0b459c2cSDavid du Colombier #define DEBUG_PRINT1(x) if (debug) printf (x)
689*0b459c2cSDavid du Colombier #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
690*0b459c2cSDavid du Colombier #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
691*0b459c2cSDavid du Colombier #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
692*0b459c2cSDavid du Colombier #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
693*0b459c2cSDavid du Colombier if (debug) print_partial_compiled_pattern (s, e)
694*0b459c2cSDavid du Colombier #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
695*0b459c2cSDavid du Colombier if (debug) print_double_string (w, s1, sz1, s2, sz2)
696*0b459c2cSDavid du Colombier
697*0b459c2cSDavid du Colombier
698*0b459c2cSDavid du Colombier /* Print the fastmap in human-readable form. */
699*0b459c2cSDavid du Colombier
700*0b459c2cSDavid du Colombier void
print_fastmap(fastmap)701*0b459c2cSDavid du Colombier print_fastmap (fastmap)
702*0b459c2cSDavid du Colombier char *fastmap;
703*0b459c2cSDavid du Colombier {
704*0b459c2cSDavid du Colombier unsigned was_a_range = 0;
705*0b459c2cSDavid du Colombier unsigned i = 0;
706*0b459c2cSDavid du Colombier
707*0b459c2cSDavid du Colombier while (i < (1 << BYTEWIDTH))
708*0b459c2cSDavid du Colombier {
709*0b459c2cSDavid du Colombier if (fastmap[i++])
710*0b459c2cSDavid du Colombier {
711*0b459c2cSDavid du Colombier was_a_range = 0;
712*0b459c2cSDavid du Colombier putchar (i - 1);
713*0b459c2cSDavid du Colombier while (i < (1 << BYTEWIDTH) && fastmap[i])
714*0b459c2cSDavid du Colombier {
715*0b459c2cSDavid du Colombier was_a_range = 1;
716*0b459c2cSDavid du Colombier i++;
717*0b459c2cSDavid du Colombier }
718*0b459c2cSDavid du Colombier if (was_a_range)
719*0b459c2cSDavid du Colombier {
720*0b459c2cSDavid du Colombier printf ("-");
721*0b459c2cSDavid du Colombier putchar (i - 1);
722*0b459c2cSDavid du Colombier }
723*0b459c2cSDavid du Colombier }
724*0b459c2cSDavid du Colombier }
725*0b459c2cSDavid du Colombier putchar ('\n');
726*0b459c2cSDavid du Colombier }
727*0b459c2cSDavid du Colombier
728*0b459c2cSDavid du Colombier
729*0b459c2cSDavid du Colombier /* Print a compiled pattern string in human-readable form, starting at
730*0b459c2cSDavid du Colombier the START pointer into it and ending just before the pointer END. */
731*0b459c2cSDavid du Colombier
732*0b459c2cSDavid du Colombier void
print_partial_compiled_pattern(start,end)733*0b459c2cSDavid du Colombier print_partial_compiled_pattern (start, end)
734*0b459c2cSDavid du Colombier unsigned char *start;
735*0b459c2cSDavid du Colombier unsigned char *end;
736*0b459c2cSDavid du Colombier {
737*0b459c2cSDavid du Colombier int mcnt, mcnt2;
738*0b459c2cSDavid du Colombier unsigned char *p = start;
739*0b459c2cSDavid du Colombier unsigned char *pend = end;
740*0b459c2cSDavid du Colombier
741*0b459c2cSDavid du Colombier if (start == NULL)
742*0b459c2cSDavid du Colombier {
743*0b459c2cSDavid du Colombier printf ("(null)\n");
744*0b459c2cSDavid du Colombier return;
745*0b459c2cSDavid du Colombier }
746*0b459c2cSDavid du Colombier
747*0b459c2cSDavid du Colombier /* Loop over pattern commands. */
748*0b459c2cSDavid du Colombier while (p < pend)
749*0b459c2cSDavid du Colombier {
750*0b459c2cSDavid du Colombier printf ("%d:\t", p - start);
751*0b459c2cSDavid du Colombier
752*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p++)
753*0b459c2cSDavid du Colombier {
754*0b459c2cSDavid du Colombier case no_op:
755*0b459c2cSDavid du Colombier printf ("/no_op");
756*0b459c2cSDavid du Colombier break;
757*0b459c2cSDavid du Colombier
758*0b459c2cSDavid du Colombier case exactn:
759*0b459c2cSDavid du Colombier mcnt = *p++;
760*0b459c2cSDavid du Colombier printf ("/exactn/%d", mcnt);
761*0b459c2cSDavid du Colombier do
762*0b459c2cSDavid du Colombier {
763*0b459c2cSDavid du Colombier putchar ('/');
764*0b459c2cSDavid du Colombier putchar (*p++);
765*0b459c2cSDavid du Colombier }
766*0b459c2cSDavid du Colombier while (--mcnt);
767*0b459c2cSDavid du Colombier break;
768*0b459c2cSDavid du Colombier
769*0b459c2cSDavid du Colombier case start_memory:
770*0b459c2cSDavid du Colombier mcnt = *p++;
771*0b459c2cSDavid du Colombier printf ("/start_memory/%d/%d", mcnt, *p++);
772*0b459c2cSDavid du Colombier break;
773*0b459c2cSDavid du Colombier
774*0b459c2cSDavid du Colombier case stop_memory:
775*0b459c2cSDavid du Colombier mcnt = *p++;
776*0b459c2cSDavid du Colombier printf ("/stop_memory/%d/%d", mcnt, *p++);
777*0b459c2cSDavid du Colombier break;
778*0b459c2cSDavid du Colombier
779*0b459c2cSDavid du Colombier case duplicate:
780*0b459c2cSDavid du Colombier printf ("/duplicate/%d", *p++);
781*0b459c2cSDavid du Colombier break;
782*0b459c2cSDavid du Colombier
783*0b459c2cSDavid du Colombier case anychar:
784*0b459c2cSDavid du Colombier printf ("/anychar");
785*0b459c2cSDavid du Colombier break;
786*0b459c2cSDavid du Colombier
787*0b459c2cSDavid du Colombier case charset:
788*0b459c2cSDavid du Colombier case charset_not:
789*0b459c2cSDavid du Colombier {
790*0b459c2cSDavid du Colombier register int c, last = -100;
791*0b459c2cSDavid du Colombier register int in_range = 0;
792*0b459c2cSDavid du Colombier
793*0b459c2cSDavid du Colombier printf ("/charset [%s",
794*0b459c2cSDavid du Colombier (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
795*0b459c2cSDavid du Colombier
796*0b459c2cSDavid du Colombier assert (p + *p < pend);
797*0b459c2cSDavid du Colombier
798*0b459c2cSDavid du Colombier for (c = 0; c < 256; c++)
799*0b459c2cSDavid du Colombier if (c / 8 < *p
800*0b459c2cSDavid du Colombier && (p[1 + (c/8)] & (1 << (c % 8))))
801*0b459c2cSDavid du Colombier {
802*0b459c2cSDavid du Colombier /* Are we starting a range? */
803*0b459c2cSDavid du Colombier if (last + 1 == c && ! in_range)
804*0b459c2cSDavid du Colombier {
805*0b459c2cSDavid du Colombier putchar ('-');
806*0b459c2cSDavid du Colombier in_range = 1;
807*0b459c2cSDavid du Colombier }
808*0b459c2cSDavid du Colombier /* Have we broken a range? */
809*0b459c2cSDavid du Colombier else if (last + 1 != c && in_range)
810*0b459c2cSDavid du Colombier {
811*0b459c2cSDavid du Colombier putchar (last);
812*0b459c2cSDavid du Colombier in_range = 0;
813*0b459c2cSDavid du Colombier }
814*0b459c2cSDavid du Colombier
815*0b459c2cSDavid du Colombier if (! in_range)
816*0b459c2cSDavid du Colombier putchar (c);
817*0b459c2cSDavid du Colombier
818*0b459c2cSDavid du Colombier last = c;
819*0b459c2cSDavid du Colombier }
820*0b459c2cSDavid du Colombier
821*0b459c2cSDavid du Colombier if (in_range)
822*0b459c2cSDavid du Colombier putchar (last);
823*0b459c2cSDavid du Colombier
824*0b459c2cSDavid du Colombier putchar (']');
825*0b459c2cSDavid du Colombier
826*0b459c2cSDavid du Colombier p += 1 + *p;
827*0b459c2cSDavid du Colombier }
828*0b459c2cSDavid du Colombier break;
829*0b459c2cSDavid du Colombier
830*0b459c2cSDavid du Colombier case begline:
831*0b459c2cSDavid du Colombier printf ("/begline");
832*0b459c2cSDavid du Colombier break;
833*0b459c2cSDavid du Colombier
834*0b459c2cSDavid du Colombier case endline:
835*0b459c2cSDavid du Colombier printf ("/endline");
836*0b459c2cSDavid du Colombier break;
837*0b459c2cSDavid du Colombier
838*0b459c2cSDavid du Colombier case on_failure_jump:
839*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
840*0b459c2cSDavid du Colombier printf ("/on_failure_jump to %d", p + mcnt - start);
841*0b459c2cSDavid du Colombier break;
842*0b459c2cSDavid du Colombier
843*0b459c2cSDavid du Colombier case on_failure_keep_string_jump:
844*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
845*0b459c2cSDavid du Colombier printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
846*0b459c2cSDavid du Colombier break;
847*0b459c2cSDavid du Colombier
848*0b459c2cSDavid du Colombier case dummy_failure_jump:
849*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
850*0b459c2cSDavid du Colombier printf ("/dummy_failure_jump to %d", p + mcnt - start);
851*0b459c2cSDavid du Colombier break;
852*0b459c2cSDavid du Colombier
853*0b459c2cSDavid du Colombier case push_dummy_failure:
854*0b459c2cSDavid du Colombier printf ("/push_dummy_failure");
855*0b459c2cSDavid du Colombier break;
856*0b459c2cSDavid du Colombier
857*0b459c2cSDavid du Colombier case maybe_pop_jump:
858*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
859*0b459c2cSDavid du Colombier printf ("/maybe_pop_jump to %d", p + mcnt - start);
860*0b459c2cSDavid du Colombier break;
861*0b459c2cSDavid du Colombier
862*0b459c2cSDavid du Colombier case pop_failure_jump:
863*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
864*0b459c2cSDavid du Colombier printf ("/pop_failure_jump to %d", p + mcnt - start);
865*0b459c2cSDavid du Colombier break;
866*0b459c2cSDavid du Colombier
867*0b459c2cSDavid du Colombier case jump_past_alt:
868*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
869*0b459c2cSDavid du Colombier printf ("/jump_past_alt to %d", p + mcnt - start);
870*0b459c2cSDavid du Colombier break;
871*0b459c2cSDavid du Colombier
872*0b459c2cSDavid du Colombier case jump:
873*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
874*0b459c2cSDavid du Colombier printf ("/jump to %d", p + mcnt - start);
875*0b459c2cSDavid du Colombier break;
876*0b459c2cSDavid du Colombier
877*0b459c2cSDavid du Colombier case succeed_n:
878*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
879*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt2, &p);
880*0b459c2cSDavid du Colombier printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
881*0b459c2cSDavid du Colombier break;
882*0b459c2cSDavid du Colombier
883*0b459c2cSDavid du Colombier case jump_n:
884*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
885*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt2, &p);
886*0b459c2cSDavid du Colombier printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
887*0b459c2cSDavid du Colombier break;
888*0b459c2cSDavid du Colombier
889*0b459c2cSDavid du Colombier case set_number_at:
890*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt, &p);
891*0b459c2cSDavid du Colombier extract_number_and_incr (&mcnt2, &p);
892*0b459c2cSDavid du Colombier printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
893*0b459c2cSDavid du Colombier break;
894*0b459c2cSDavid du Colombier
895*0b459c2cSDavid du Colombier case wordbound:
896*0b459c2cSDavid du Colombier printf ("/wordbound");
897*0b459c2cSDavid du Colombier break;
898*0b459c2cSDavid du Colombier
899*0b459c2cSDavid du Colombier case notwordbound:
900*0b459c2cSDavid du Colombier printf ("/notwordbound");
901*0b459c2cSDavid du Colombier break;
902*0b459c2cSDavid du Colombier
903*0b459c2cSDavid du Colombier case wordbeg:
904*0b459c2cSDavid du Colombier printf ("/wordbeg");
905*0b459c2cSDavid du Colombier break;
906*0b459c2cSDavid du Colombier
907*0b459c2cSDavid du Colombier case wordend:
908*0b459c2cSDavid du Colombier printf ("/wordend");
909*0b459c2cSDavid du Colombier
910*0b459c2cSDavid du Colombier #ifdef emacs
911*0b459c2cSDavid du Colombier case before_dot:
912*0b459c2cSDavid du Colombier printf ("/before_dot");
913*0b459c2cSDavid du Colombier break;
914*0b459c2cSDavid du Colombier
915*0b459c2cSDavid du Colombier case at_dot:
916*0b459c2cSDavid du Colombier printf ("/at_dot");
917*0b459c2cSDavid du Colombier break;
918*0b459c2cSDavid du Colombier
919*0b459c2cSDavid du Colombier case after_dot:
920*0b459c2cSDavid du Colombier printf ("/after_dot");
921*0b459c2cSDavid du Colombier break;
922*0b459c2cSDavid du Colombier
923*0b459c2cSDavid du Colombier case syntaxspec:
924*0b459c2cSDavid du Colombier printf ("/syntaxspec");
925*0b459c2cSDavid du Colombier mcnt = *p++;
926*0b459c2cSDavid du Colombier printf ("/%d", mcnt);
927*0b459c2cSDavid du Colombier break;
928*0b459c2cSDavid du Colombier
929*0b459c2cSDavid du Colombier case notsyntaxspec:
930*0b459c2cSDavid du Colombier printf ("/notsyntaxspec");
931*0b459c2cSDavid du Colombier mcnt = *p++;
932*0b459c2cSDavid du Colombier printf ("/%d", mcnt);
933*0b459c2cSDavid du Colombier break;
934*0b459c2cSDavid du Colombier #endif /* emacs */
935*0b459c2cSDavid du Colombier
936*0b459c2cSDavid du Colombier case wordchar:
937*0b459c2cSDavid du Colombier printf ("/wordchar");
938*0b459c2cSDavid du Colombier break;
939*0b459c2cSDavid du Colombier
940*0b459c2cSDavid du Colombier case notwordchar:
941*0b459c2cSDavid du Colombier printf ("/notwordchar");
942*0b459c2cSDavid du Colombier break;
943*0b459c2cSDavid du Colombier
944*0b459c2cSDavid du Colombier case begbuf:
945*0b459c2cSDavid du Colombier printf ("/begbuf");
946*0b459c2cSDavid du Colombier break;
947*0b459c2cSDavid du Colombier
948*0b459c2cSDavid du Colombier case endbuf:
949*0b459c2cSDavid du Colombier printf ("/endbuf");
950*0b459c2cSDavid du Colombier break;
951*0b459c2cSDavid du Colombier
952*0b459c2cSDavid du Colombier default:
953*0b459c2cSDavid du Colombier printf ("?%d", *(p-1));
954*0b459c2cSDavid du Colombier }
955*0b459c2cSDavid du Colombier
956*0b459c2cSDavid du Colombier putchar ('\n');
957*0b459c2cSDavid du Colombier }
958*0b459c2cSDavid du Colombier
959*0b459c2cSDavid du Colombier printf ("%d:\tend of pattern.\n", p - start);
960*0b459c2cSDavid du Colombier }
961*0b459c2cSDavid du Colombier
962*0b459c2cSDavid du Colombier
963*0b459c2cSDavid du Colombier void
print_compiled_pattern(bufp)964*0b459c2cSDavid du Colombier print_compiled_pattern (bufp)
965*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
966*0b459c2cSDavid du Colombier {
967*0b459c2cSDavid du Colombier unsigned char *buffer = bufp->buffer;
968*0b459c2cSDavid du Colombier
969*0b459c2cSDavid du Colombier print_partial_compiled_pattern (buffer, buffer + bufp->used);
970*0b459c2cSDavid du Colombier printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
971*0b459c2cSDavid du Colombier
972*0b459c2cSDavid du Colombier if (bufp->fastmap_accurate && bufp->fastmap)
973*0b459c2cSDavid du Colombier {
974*0b459c2cSDavid du Colombier printf ("fastmap: ");
975*0b459c2cSDavid du Colombier print_fastmap (bufp->fastmap);
976*0b459c2cSDavid du Colombier }
977*0b459c2cSDavid du Colombier
978*0b459c2cSDavid du Colombier printf ("re_nsub: %d\t", bufp->re_nsub);
979*0b459c2cSDavid du Colombier printf ("regs_alloc: %d\t", bufp->regs_allocated);
980*0b459c2cSDavid du Colombier printf ("can_be_null: %d\t", bufp->can_be_null);
981*0b459c2cSDavid du Colombier printf ("newline_anchor: %d\n", bufp->newline_anchor);
982*0b459c2cSDavid du Colombier printf ("no_sub: %d\t", bufp->no_sub);
983*0b459c2cSDavid du Colombier printf ("not_bol: %d\t", bufp->not_bol);
984*0b459c2cSDavid du Colombier printf ("not_eol: %d\t", bufp->not_eol);
985*0b459c2cSDavid du Colombier printf ("syntax: %d\n", bufp->syntax);
986*0b459c2cSDavid du Colombier /* Perhaps we should print the translate table? */
987*0b459c2cSDavid du Colombier }
988*0b459c2cSDavid du Colombier
989*0b459c2cSDavid du Colombier
990*0b459c2cSDavid du Colombier void
print_double_string(where,string1,size1,string2,size2)991*0b459c2cSDavid du Colombier print_double_string (where, string1, size1, string2, size2)
992*0b459c2cSDavid du Colombier const char *where;
993*0b459c2cSDavid du Colombier const char *string1;
994*0b459c2cSDavid du Colombier const char *string2;
995*0b459c2cSDavid du Colombier int size1;
996*0b459c2cSDavid du Colombier int size2;
997*0b459c2cSDavid du Colombier {
998*0b459c2cSDavid du Colombier unsigned this_char;
999*0b459c2cSDavid du Colombier
1000*0b459c2cSDavid du Colombier if (where == NULL)
1001*0b459c2cSDavid du Colombier printf ("(null)");
1002*0b459c2cSDavid du Colombier else
1003*0b459c2cSDavid du Colombier {
1004*0b459c2cSDavid du Colombier if (FIRST_STRING_P (where))
1005*0b459c2cSDavid du Colombier {
1006*0b459c2cSDavid du Colombier for (this_char = where - string1; this_char < size1; this_char++)
1007*0b459c2cSDavid du Colombier putchar (string1[this_char]);
1008*0b459c2cSDavid du Colombier
1009*0b459c2cSDavid du Colombier where = string2;
1010*0b459c2cSDavid du Colombier }
1011*0b459c2cSDavid du Colombier
1012*0b459c2cSDavid du Colombier for (this_char = where - string2; this_char < size2; this_char++)
1013*0b459c2cSDavid du Colombier putchar (string2[this_char]);
1014*0b459c2cSDavid du Colombier }
1015*0b459c2cSDavid du Colombier }
1016*0b459c2cSDavid du Colombier
1017*0b459c2cSDavid du Colombier #else /* not DEBUG */
1018*0b459c2cSDavid du Colombier
1019*0b459c2cSDavid du Colombier #undef assert
1020*0b459c2cSDavid du Colombier #define assert(e)
1021*0b459c2cSDavid du Colombier
1022*0b459c2cSDavid du Colombier #define DEBUG_STATEMENT(e)
1023*0b459c2cSDavid du Colombier #define DEBUG_PRINT1(x)
1024*0b459c2cSDavid du Colombier #define DEBUG_PRINT2(x1, x2)
1025*0b459c2cSDavid du Colombier #define DEBUG_PRINT3(x1, x2, x3)
1026*0b459c2cSDavid du Colombier #define DEBUG_PRINT4(x1, x2, x3, x4)
1027*0b459c2cSDavid du Colombier #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1028*0b459c2cSDavid du Colombier #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1029*0b459c2cSDavid du Colombier
1030*0b459c2cSDavid du Colombier #endif /* not DEBUG */
1031*0b459c2cSDavid du Colombier
1032*0b459c2cSDavid du Colombier /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1033*0b459c2cSDavid du Colombier also be assigned to arbitrarily: each pattern buffer stores its own
1034*0b459c2cSDavid du Colombier syntax, so it can be changed between regex compilations. */
1035*0b459c2cSDavid du Colombier /* This has no initializer because initialized variables in Emacs
1036*0b459c2cSDavid du Colombier become read-only after dumping. */
1037*0b459c2cSDavid du Colombier reg_syntax_t re_syntax_options;
1038*0b459c2cSDavid du Colombier
1039*0b459c2cSDavid du Colombier
1040*0b459c2cSDavid du Colombier /* Specify the precise syntax of regexps for compilation. This provides
1041*0b459c2cSDavid du Colombier for compatibility for various utilities which historically have
1042*0b459c2cSDavid du Colombier different, incompatible syntaxes.
1043*0b459c2cSDavid du Colombier
1044*0b459c2cSDavid du Colombier The argument SYNTAX is a bit mask comprised of the various bits
1045*0b459c2cSDavid du Colombier defined in regex.h. We return the old syntax. */
1046*0b459c2cSDavid du Colombier
1047*0b459c2cSDavid du Colombier reg_syntax_t
re_set_syntax(syntax)1048*0b459c2cSDavid du Colombier re_set_syntax (syntax)
1049*0b459c2cSDavid du Colombier reg_syntax_t syntax;
1050*0b459c2cSDavid du Colombier {
1051*0b459c2cSDavid du Colombier reg_syntax_t ret = re_syntax_options;
1052*0b459c2cSDavid du Colombier
1053*0b459c2cSDavid du Colombier re_syntax_options = syntax;
1054*0b459c2cSDavid du Colombier return ret;
1055*0b459c2cSDavid du Colombier }
1056*0b459c2cSDavid du Colombier
1057*0b459c2cSDavid du Colombier /* This table gives an error message for each of the error codes listed
1058*0b459c2cSDavid du Colombier in regex.h. Obviously the order here has to be same as there.
1059*0b459c2cSDavid du Colombier POSIX doesn't require that we do anything for REG_NOERROR,
1060*0b459c2cSDavid du Colombier but why not be nice? */
1061*0b459c2cSDavid du Colombier
1062*0b459c2cSDavid du Colombier static const char *re_error_msgid[] =
1063*0b459c2cSDavid du Colombier {
1064*0b459c2cSDavid du Colombier gettext_noop ("Success"), /* REG_NOERROR */
1065*0b459c2cSDavid du Colombier gettext_noop ("No match"), /* REG_NOMATCH */
1066*0b459c2cSDavid du Colombier gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1067*0b459c2cSDavid du Colombier gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1068*0b459c2cSDavid du Colombier gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1069*0b459c2cSDavid du Colombier gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1070*0b459c2cSDavid du Colombier gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1071*0b459c2cSDavid du Colombier gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1072*0b459c2cSDavid du Colombier gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1073*0b459c2cSDavid du Colombier gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1074*0b459c2cSDavid du Colombier gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1075*0b459c2cSDavid du Colombier gettext_noop ("Invalid range end"), /* REG_ERANGE */
1076*0b459c2cSDavid du Colombier gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1077*0b459c2cSDavid du Colombier gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1078*0b459c2cSDavid du Colombier gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1079*0b459c2cSDavid du Colombier gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1080*0b459c2cSDavid du Colombier gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1081*0b459c2cSDavid du Colombier };
1082*0b459c2cSDavid du Colombier
1083*0b459c2cSDavid du Colombier /* Avoiding alloca during matching, to placate r_alloc. */
1084*0b459c2cSDavid du Colombier
1085*0b459c2cSDavid du Colombier /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1086*0b459c2cSDavid du Colombier searching and matching functions should not call alloca. On some
1087*0b459c2cSDavid du Colombier systems, alloca is implemented in terms of malloc, and if we're
1088*0b459c2cSDavid du Colombier using the relocating allocator routines, then malloc could cause a
1089*0b459c2cSDavid du Colombier relocation, which might (if the strings being searched are in the
1090*0b459c2cSDavid du Colombier ralloc heap) shift the data out from underneath the regexp
1091*0b459c2cSDavid du Colombier routines.
1092*0b459c2cSDavid du Colombier
1093*0b459c2cSDavid du Colombier Here's another reason to avoid allocation: Emacs
1094*0b459c2cSDavid du Colombier processes input from X in a signal handler; processing X input may
1095*0b459c2cSDavid du Colombier call malloc; if input arrives while a matching routine is calling
1096*0b459c2cSDavid du Colombier malloc, then we're scrod. But Emacs can't just block input while
1097*0b459c2cSDavid du Colombier calling matching routines; then we don't notice interrupts when
1098*0b459c2cSDavid du Colombier they come in. So, Emacs blocks input around all regexp calls
1099*0b459c2cSDavid du Colombier except the matching calls, which it leaves unprotected, in the
1100*0b459c2cSDavid du Colombier faith that they will not malloc. */
1101*0b459c2cSDavid du Colombier
1102*0b459c2cSDavid du Colombier /* Normally, this is fine. */
1103*0b459c2cSDavid du Colombier #define MATCH_MAY_ALLOCATE
1104*0b459c2cSDavid du Colombier
1105*0b459c2cSDavid du Colombier /* When using GNU C, we are not REALLY using the C alloca, no matter
1106*0b459c2cSDavid du Colombier what config.h may say. So don't take precautions for it. */
1107*0b459c2cSDavid du Colombier #ifdef __GNUC__
1108*0b459c2cSDavid du Colombier #undef C_ALLOCA
1109*0b459c2cSDavid du Colombier #endif
1110*0b459c2cSDavid du Colombier
1111*0b459c2cSDavid du Colombier /* The match routines may not allocate if (1) they would do it with malloc
1112*0b459c2cSDavid du Colombier and (2) it's not safe for them to use malloc.
1113*0b459c2cSDavid du Colombier Note that if REL_ALLOC is defined, matching would not use malloc for the
1114*0b459c2cSDavid du Colombier failure stack, but we would still use it for the register vectors;
1115*0b459c2cSDavid du Colombier so REL_ALLOC should not affect this. */
1116*0b459c2cSDavid du Colombier #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1117*0b459c2cSDavid du Colombier #undef MATCH_MAY_ALLOCATE
1118*0b459c2cSDavid du Colombier #endif
1119*0b459c2cSDavid du Colombier
1120*0b459c2cSDavid du Colombier
1121*0b459c2cSDavid du Colombier /* Failure stack declarations and macros; both re_compile_fastmap and
1122*0b459c2cSDavid du Colombier re_match_2 use a failure stack. These have to be macros because of
1123*0b459c2cSDavid du Colombier REGEX_ALLOCATE_STACK. */
1124*0b459c2cSDavid du Colombier
1125*0b459c2cSDavid du Colombier
1126*0b459c2cSDavid du Colombier /* Approximate number of failure points for which to initially allocate space
1127*0b459c2cSDavid du Colombier when matching. If this number is exceeded, we allocate more
1128*0b459c2cSDavid du Colombier space, so it is not a hard limit. */
1129*0b459c2cSDavid du Colombier #ifndef INIT_FAILURE_ALLOC
1130*0b459c2cSDavid du Colombier #define INIT_FAILURE_ALLOC 20
1131*0b459c2cSDavid du Colombier #endif
1132*0b459c2cSDavid du Colombier
1133*0b459c2cSDavid du Colombier /* Roughly the maximum number of failure points on the stack. Would be
1134*0b459c2cSDavid du Colombier exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1135*0b459c2cSDavid du Colombier This is a variable only so users of regex can assign to it; we never
1136*0b459c2cSDavid du Colombier change it ourselves. */
1137*0b459c2cSDavid du Colombier #if defined (MATCH_MAY_ALLOCATE)
1138*0b459c2cSDavid du Colombier /* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1139*0b459c2cSDavid du Colombier whose default stack limit is 2mb. In order for a larger
1140*0b459c2cSDavid du Colombier value to work reliably, you have to try to make it accord
1141*0b459c2cSDavid du Colombier with the process stack limit. */
1142*0b459c2cSDavid du Colombier int re_max_failures = 40000;
1143*0b459c2cSDavid du Colombier #else
1144*0b459c2cSDavid du Colombier int re_max_failures = 4000;
1145*0b459c2cSDavid du Colombier #endif
1146*0b459c2cSDavid du Colombier
1147*0b459c2cSDavid du Colombier union fail_stack_elt
1148*0b459c2cSDavid du Colombier {
1149*0b459c2cSDavid du Colombier unsigned char *pointer;
1150*0b459c2cSDavid du Colombier int integer;
1151*0b459c2cSDavid du Colombier };
1152*0b459c2cSDavid du Colombier
1153*0b459c2cSDavid du Colombier typedef union fail_stack_elt fail_stack_elt_t;
1154*0b459c2cSDavid du Colombier
1155*0b459c2cSDavid du Colombier typedef struct
1156*0b459c2cSDavid du Colombier {
1157*0b459c2cSDavid du Colombier fail_stack_elt_t *stack;
1158*0b459c2cSDavid du Colombier unsigned size;
1159*0b459c2cSDavid du Colombier unsigned avail; /* Offset of next open position. */
1160*0b459c2cSDavid du Colombier } fail_stack_type;
1161*0b459c2cSDavid du Colombier
1162*0b459c2cSDavid du Colombier #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1163*0b459c2cSDavid du Colombier #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1164*0b459c2cSDavid du Colombier #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1165*0b459c2cSDavid du Colombier
1166*0b459c2cSDavid du Colombier
1167*0b459c2cSDavid du Colombier /* Define macros to initialize and free the failure stack.
1168*0b459c2cSDavid du Colombier Do `return -2' if the alloc fails. */
1169*0b459c2cSDavid du Colombier
1170*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE
1171*0b459c2cSDavid du Colombier #define INIT_FAIL_STACK() \
1172*0b459c2cSDavid du Colombier do { \
1173*0b459c2cSDavid du Colombier fail_stack.stack = (fail_stack_elt_t *) \
1174*0b459c2cSDavid du Colombier REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1175*0b459c2cSDavid du Colombier * sizeof (fail_stack_elt_t)); \
1176*0b459c2cSDavid du Colombier \
1177*0b459c2cSDavid du Colombier if (fail_stack.stack == NULL) \
1178*0b459c2cSDavid du Colombier return -2; \
1179*0b459c2cSDavid du Colombier \
1180*0b459c2cSDavid du Colombier fail_stack.size = INIT_FAILURE_ALLOC; \
1181*0b459c2cSDavid du Colombier fail_stack.avail = 0; \
1182*0b459c2cSDavid du Colombier } while (0)
1183*0b459c2cSDavid du Colombier
1184*0b459c2cSDavid du Colombier #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1185*0b459c2cSDavid du Colombier #else
1186*0b459c2cSDavid du Colombier #define INIT_FAIL_STACK() \
1187*0b459c2cSDavid du Colombier do { \
1188*0b459c2cSDavid du Colombier fail_stack.avail = 0; \
1189*0b459c2cSDavid du Colombier } while (0)
1190*0b459c2cSDavid du Colombier
1191*0b459c2cSDavid du Colombier #define RESET_FAIL_STACK()
1192*0b459c2cSDavid du Colombier #endif
1193*0b459c2cSDavid du Colombier
1194*0b459c2cSDavid du Colombier
1195*0b459c2cSDavid du Colombier /* Double the size of FAIL_STACK, up to a limit
1196*0b459c2cSDavid du Colombier which allows approximately `re_max_failures' items.
1197*0b459c2cSDavid du Colombier
1198*0b459c2cSDavid du Colombier Return 1 if succeeds, and 0 if either ran out of memory
1199*0b459c2cSDavid du Colombier allocating space for it or it was already too large.
1200*0b459c2cSDavid du Colombier
1201*0b459c2cSDavid du Colombier REGEX_REALLOCATE_STACK requires `destination' be declared. */
1202*0b459c2cSDavid du Colombier
1203*0b459c2cSDavid du Colombier /* Factor to increase the failure stack size by
1204*0b459c2cSDavid du Colombier when we increase it.
1205*0b459c2cSDavid du Colombier This used to be 2, but 2 was too wasteful
1206*0b459c2cSDavid du Colombier because the old discarded stacks added up to as much space
1207*0b459c2cSDavid du Colombier were as ultimate, maximum-size stack. */
1208*0b459c2cSDavid du Colombier #define FAIL_STACK_GROWTH_FACTOR 4
1209*0b459c2cSDavid du Colombier
1210*0b459c2cSDavid du Colombier #define GROW_FAIL_STACK(fail_stack) \
1211*0b459c2cSDavid du Colombier (((fail_stack).size * sizeof (fail_stack_elt_t) \
1212*0b459c2cSDavid du Colombier >= re_max_failures * TYPICAL_FAILURE_SIZE) \
1213*0b459c2cSDavid du Colombier ? 0 \
1214*0b459c2cSDavid du Colombier : ((fail_stack).stack \
1215*0b459c2cSDavid du Colombier = (fail_stack_elt_t *) \
1216*0b459c2cSDavid du Colombier REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1217*0b459c2cSDavid du Colombier (fail_stack).size * sizeof (fail_stack_elt_t), \
1218*0b459c2cSDavid du Colombier MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1219*0b459c2cSDavid du Colombier ((fail_stack).size * sizeof (fail_stack_elt_t) \
1220*0b459c2cSDavid du Colombier * FAIL_STACK_GROWTH_FACTOR))), \
1221*0b459c2cSDavid du Colombier \
1222*0b459c2cSDavid du Colombier (fail_stack).stack == NULL \
1223*0b459c2cSDavid du Colombier ? 0 \
1224*0b459c2cSDavid du Colombier : ((fail_stack).size \
1225*0b459c2cSDavid du Colombier = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1226*0b459c2cSDavid du Colombier ((fail_stack).size * sizeof (fail_stack_elt_t) \
1227*0b459c2cSDavid du Colombier * FAIL_STACK_GROWTH_FACTOR)) \
1228*0b459c2cSDavid du Colombier / sizeof (fail_stack_elt_t)), \
1229*0b459c2cSDavid du Colombier 1)))
1230*0b459c2cSDavid du Colombier
1231*0b459c2cSDavid du Colombier
1232*0b459c2cSDavid du Colombier /* Push pointer POINTER on FAIL_STACK.
1233*0b459c2cSDavid du Colombier Return 1 if was able to do so and 0 if ran out of memory allocating
1234*0b459c2cSDavid du Colombier space to do so. */
1235*0b459c2cSDavid du Colombier #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1236*0b459c2cSDavid du Colombier ((FAIL_STACK_FULL () \
1237*0b459c2cSDavid du Colombier && !GROW_FAIL_STACK (FAIL_STACK)) \
1238*0b459c2cSDavid du Colombier ? 0 \
1239*0b459c2cSDavid du Colombier : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1240*0b459c2cSDavid du Colombier 1))
1241*0b459c2cSDavid du Colombier
1242*0b459c2cSDavid du Colombier /* Push a pointer value onto the failure stack.
1243*0b459c2cSDavid du Colombier Assumes the variable `fail_stack'. Probably should only
1244*0b459c2cSDavid du Colombier be called from within `PUSH_FAILURE_POINT'. */
1245*0b459c2cSDavid du Colombier #define PUSH_FAILURE_POINTER(item) \
1246*0b459c2cSDavid du Colombier fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1247*0b459c2cSDavid du Colombier
1248*0b459c2cSDavid du Colombier /* This pushes an integer-valued item onto the failure stack.
1249*0b459c2cSDavid du Colombier Assumes the variable `fail_stack'. Probably should only
1250*0b459c2cSDavid du Colombier be called from within `PUSH_FAILURE_POINT'. */
1251*0b459c2cSDavid du Colombier #define PUSH_FAILURE_INT(item) \
1252*0b459c2cSDavid du Colombier fail_stack.stack[fail_stack.avail++].integer = (item)
1253*0b459c2cSDavid du Colombier
1254*0b459c2cSDavid du Colombier /* Push a fail_stack_elt_t value onto the failure stack.
1255*0b459c2cSDavid du Colombier Assumes the variable `fail_stack'. Probably should only
1256*0b459c2cSDavid du Colombier be called from within `PUSH_FAILURE_POINT'. */
1257*0b459c2cSDavid du Colombier #define PUSH_FAILURE_ELT(item) \
1258*0b459c2cSDavid du Colombier fail_stack.stack[fail_stack.avail++] = (item)
1259*0b459c2cSDavid du Colombier
1260*0b459c2cSDavid du Colombier /* These three POP... operations complement the three PUSH... operations.
1261*0b459c2cSDavid du Colombier All assume that `fail_stack' is nonempty. */
1262*0b459c2cSDavid du Colombier #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1263*0b459c2cSDavid du Colombier #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1264*0b459c2cSDavid du Colombier #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1265*0b459c2cSDavid du Colombier
1266*0b459c2cSDavid du Colombier /* Used to omit pushing failure point id's when we're not debugging. */
1267*0b459c2cSDavid du Colombier #ifdef DEBUG
1268*0b459c2cSDavid du Colombier #define DEBUG_PUSH PUSH_FAILURE_INT
1269*0b459c2cSDavid du Colombier #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1270*0b459c2cSDavid du Colombier #else
1271*0b459c2cSDavid du Colombier #define DEBUG_PUSH(item)
1272*0b459c2cSDavid du Colombier #define DEBUG_POP(item_addr)
1273*0b459c2cSDavid du Colombier #endif
1274*0b459c2cSDavid du Colombier
1275*0b459c2cSDavid du Colombier
1276*0b459c2cSDavid du Colombier /* Push the information about the state we will need
1277*0b459c2cSDavid du Colombier if we ever fail back to it.
1278*0b459c2cSDavid du Colombier
1279*0b459c2cSDavid du Colombier Requires variables fail_stack, regstart, regend, reg_info, and
1280*0b459c2cSDavid du Colombier num_regs be declared. GROW_FAIL_STACK requires `destination' be
1281*0b459c2cSDavid du Colombier declared.
1282*0b459c2cSDavid du Colombier
1283*0b459c2cSDavid du Colombier Does `return FAILURE_CODE' if runs out of memory. */
1284*0b459c2cSDavid du Colombier
1285*0b459c2cSDavid du Colombier #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1286*0b459c2cSDavid du Colombier do { \
1287*0b459c2cSDavid du Colombier char *destination; \
1288*0b459c2cSDavid du Colombier /* Must be int, so when we don't save any registers, the arithmetic \
1289*0b459c2cSDavid du Colombier of 0 + -1 isn't done as unsigned. */ \
1290*0b459c2cSDavid du Colombier int this_reg; \
1291*0b459c2cSDavid du Colombier \
1292*0b459c2cSDavid du Colombier DEBUG_STATEMENT (failure_id++); \
1293*0b459c2cSDavid du Colombier DEBUG_STATEMENT (nfailure_points_pushed++); \
1294*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1295*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1296*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1297*0b459c2cSDavid du Colombier \
1298*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1299*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1300*0b459c2cSDavid du Colombier \
1301*0b459c2cSDavid du Colombier /* Ensure we have enough space allocated for what we will push. */ \
1302*0b459c2cSDavid du Colombier while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1303*0b459c2cSDavid du Colombier { \
1304*0b459c2cSDavid du Colombier if (!GROW_FAIL_STACK (fail_stack)) \
1305*0b459c2cSDavid du Colombier return failure_code; \
1306*0b459c2cSDavid du Colombier \
1307*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1308*0b459c2cSDavid du Colombier (fail_stack).size); \
1309*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1310*0b459c2cSDavid du Colombier } \
1311*0b459c2cSDavid du Colombier \
1312*0b459c2cSDavid du Colombier /* Push the info, starting with the registers. */ \
1313*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\n"); \
1314*0b459c2cSDavid du Colombier \
1315*0b459c2cSDavid du Colombier if (1) \
1316*0b459c2cSDavid du Colombier for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1317*0b459c2cSDavid du Colombier this_reg++) \
1318*0b459c2cSDavid du Colombier { \
1319*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
1320*0b459c2cSDavid du Colombier DEBUG_STATEMENT (num_regs_pushed++); \
1321*0b459c2cSDavid du Colombier \
1322*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1323*0b459c2cSDavid du Colombier PUSH_FAILURE_POINTER (regstart[this_reg]); \
1324*0b459c2cSDavid du Colombier \
1325*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1326*0b459c2cSDavid du Colombier PUSH_FAILURE_POINTER (regend[this_reg]); \
1327*0b459c2cSDavid du Colombier \
1328*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
1329*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" match_null=%d", \
1330*0b459c2cSDavid du Colombier REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1331*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1332*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" matched_something=%d", \
1333*0b459c2cSDavid du Colombier MATCHED_SOMETHING (reg_info[this_reg])); \
1334*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" ever_matched=%d", \
1335*0b459c2cSDavid du Colombier EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1336*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\n"); \
1337*0b459c2cSDavid du Colombier PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1338*0b459c2cSDavid du Colombier } \
1339*0b459c2cSDavid du Colombier \
1340*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
1341*0b459c2cSDavid du Colombier PUSH_FAILURE_INT (lowest_active_reg); \
1342*0b459c2cSDavid du Colombier \
1343*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1344*0b459c2cSDavid du Colombier PUSH_FAILURE_INT (highest_active_reg); \
1345*0b459c2cSDavid du Colombier \
1346*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1347*0b459c2cSDavid du Colombier DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1348*0b459c2cSDavid du Colombier PUSH_FAILURE_POINTER (pattern_place); \
1349*0b459c2cSDavid du Colombier \
1350*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1351*0b459c2cSDavid du Colombier DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1352*0b459c2cSDavid du Colombier size2); \
1353*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("'\n"); \
1354*0b459c2cSDavid du Colombier PUSH_FAILURE_POINTER (string_place); \
1355*0b459c2cSDavid du Colombier \
1356*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1357*0b459c2cSDavid du Colombier DEBUG_PUSH (failure_id); \
1358*0b459c2cSDavid du Colombier } while (0)
1359*0b459c2cSDavid du Colombier
1360*0b459c2cSDavid du Colombier /* This is the number of items that are pushed and popped on the stack
1361*0b459c2cSDavid du Colombier for each register. */
1362*0b459c2cSDavid du Colombier #define NUM_REG_ITEMS 3
1363*0b459c2cSDavid du Colombier
1364*0b459c2cSDavid du Colombier /* Individual items aside from the registers. */
1365*0b459c2cSDavid du Colombier #ifdef DEBUG
1366*0b459c2cSDavid du Colombier #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1367*0b459c2cSDavid du Colombier #else
1368*0b459c2cSDavid du Colombier #define NUM_NONREG_ITEMS 4
1369*0b459c2cSDavid du Colombier #endif
1370*0b459c2cSDavid du Colombier
1371*0b459c2cSDavid du Colombier /* Estimate the size of data pushed by a typical failure stack entry.
1372*0b459c2cSDavid du Colombier An estimate is all we need, because all we use this for
1373*0b459c2cSDavid du Colombier is to choose a limit for how big to make the failure stack. */
1374*0b459c2cSDavid du Colombier
1375*0b459c2cSDavid du Colombier #define TYPICAL_FAILURE_SIZE 20
1376*0b459c2cSDavid du Colombier
1377*0b459c2cSDavid du Colombier /* This is how many items we actually use for a failure point.
1378*0b459c2cSDavid du Colombier It depends on the regexp. */
1379*0b459c2cSDavid du Colombier #define NUM_FAILURE_ITEMS \
1380*0b459c2cSDavid du Colombier (((0 \
1381*0b459c2cSDavid du Colombier ? 0 : highest_active_reg - lowest_active_reg + 1) \
1382*0b459c2cSDavid du Colombier * NUM_REG_ITEMS) \
1383*0b459c2cSDavid du Colombier + NUM_NONREG_ITEMS)
1384*0b459c2cSDavid du Colombier
1385*0b459c2cSDavid du Colombier /* How many items can still be added to the stack without overflowing it. */
1386*0b459c2cSDavid du Colombier #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1387*0b459c2cSDavid du Colombier
1388*0b459c2cSDavid du Colombier
1389*0b459c2cSDavid du Colombier /* Pops what PUSH_FAIL_STACK pushes.
1390*0b459c2cSDavid du Colombier
1391*0b459c2cSDavid du Colombier We restore into the parameters, all of which should be lvalues:
1392*0b459c2cSDavid du Colombier STR -- the saved data position.
1393*0b459c2cSDavid du Colombier PAT -- the saved pattern position.
1394*0b459c2cSDavid du Colombier LOW_REG, HIGH_REG -- the highest and lowest active registers.
1395*0b459c2cSDavid du Colombier REGSTART, REGEND -- arrays of string positions.
1396*0b459c2cSDavid du Colombier REG_INFO -- array of information about each subexpression.
1397*0b459c2cSDavid du Colombier
1398*0b459c2cSDavid du Colombier Also assumes the variables `fail_stack' and (if debugging), `bufp',
1399*0b459c2cSDavid du Colombier `pend', `string1', `size1', `string2', and `size2'. */
1400*0b459c2cSDavid du Colombier
1401*0b459c2cSDavid du Colombier #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1402*0b459c2cSDavid du Colombier { \
1403*0b459c2cSDavid du Colombier DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1404*0b459c2cSDavid du Colombier int this_reg; \
1405*0b459c2cSDavid du Colombier const unsigned char *string_temp; \
1406*0b459c2cSDavid du Colombier \
1407*0b459c2cSDavid du Colombier assert (!FAIL_STACK_EMPTY ()); \
1408*0b459c2cSDavid du Colombier \
1409*0b459c2cSDavid du Colombier /* Remove failure points and point to how many regs pushed. */ \
1410*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1411*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1412*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1413*0b459c2cSDavid du Colombier \
1414*0b459c2cSDavid du Colombier assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1415*0b459c2cSDavid du Colombier \
1416*0b459c2cSDavid du Colombier DEBUG_POP (&failure_id); \
1417*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1418*0b459c2cSDavid du Colombier \
1419*0b459c2cSDavid du Colombier /* If the saved string location is NULL, it came from an \
1420*0b459c2cSDavid du Colombier on_failure_keep_string_jump opcode, and we want to throw away the \
1421*0b459c2cSDavid du Colombier saved NULL, thus retaining our current position in the string. */ \
1422*0b459c2cSDavid du Colombier string_temp = POP_FAILURE_POINTER (); \
1423*0b459c2cSDavid du Colombier if (string_temp != NULL) \
1424*0b459c2cSDavid du Colombier str = (const char *) string_temp; \
1425*0b459c2cSDavid du Colombier \
1426*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
1427*0b459c2cSDavid du Colombier DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1428*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("'\n"); \
1429*0b459c2cSDavid du Colombier \
1430*0b459c2cSDavid du Colombier pat = (unsigned char *) POP_FAILURE_POINTER (); \
1431*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
1432*0b459c2cSDavid du Colombier DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1433*0b459c2cSDavid du Colombier \
1434*0b459c2cSDavid du Colombier /* Restore register info. */ \
1435*0b459c2cSDavid du Colombier high_reg = (unsigned) POP_FAILURE_INT (); \
1436*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
1437*0b459c2cSDavid du Colombier \
1438*0b459c2cSDavid du Colombier low_reg = (unsigned) POP_FAILURE_INT (); \
1439*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1440*0b459c2cSDavid du Colombier \
1441*0b459c2cSDavid du Colombier if (1) \
1442*0b459c2cSDavid du Colombier for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1443*0b459c2cSDavid du Colombier { \
1444*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1445*0b459c2cSDavid du Colombier \
1446*0b459c2cSDavid du Colombier reg_info[this_reg].word = POP_FAILURE_ELT (); \
1447*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1448*0b459c2cSDavid du Colombier \
1449*0b459c2cSDavid du Colombier regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1450*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1451*0b459c2cSDavid du Colombier \
1452*0b459c2cSDavid du Colombier regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1453*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1454*0b459c2cSDavid du Colombier } \
1455*0b459c2cSDavid du Colombier else \
1456*0b459c2cSDavid du Colombier { \
1457*0b459c2cSDavid du Colombier for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1458*0b459c2cSDavid du Colombier { \
1459*0b459c2cSDavid du Colombier reg_info[this_reg].word.integer = 0; \
1460*0b459c2cSDavid du Colombier regend[this_reg] = 0; \
1461*0b459c2cSDavid du Colombier regstart[this_reg] = 0; \
1462*0b459c2cSDavid du Colombier } \
1463*0b459c2cSDavid du Colombier highest_active_reg = high_reg; \
1464*0b459c2cSDavid du Colombier } \
1465*0b459c2cSDavid du Colombier \
1466*0b459c2cSDavid du Colombier set_regs_matched_done = 0; \
1467*0b459c2cSDavid du Colombier DEBUG_STATEMENT (nfailure_points_popped++); \
1468*0b459c2cSDavid du Colombier } /* POP_FAILURE_POINT */
1469*0b459c2cSDavid du Colombier
1470*0b459c2cSDavid du Colombier
1471*0b459c2cSDavid du Colombier
1472*0b459c2cSDavid du Colombier /* Structure for per-register (a.k.a. per-group) information.
1473*0b459c2cSDavid du Colombier Other register information, such as the
1474*0b459c2cSDavid du Colombier starting and ending positions (which are addresses), and the list of
1475*0b459c2cSDavid du Colombier inner groups (which is a bits list) are maintained in separate
1476*0b459c2cSDavid du Colombier variables.
1477*0b459c2cSDavid du Colombier
1478*0b459c2cSDavid du Colombier We are making a (strictly speaking) nonportable assumption here: that
1479*0b459c2cSDavid du Colombier the compiler will pack our bit fields into something that fits into
1480*0b459c2cSDavid du Colombier the type of `word', i.e., is something that fits into one item on the
1481*0b459c2cSDavid du Colombier failure stack. */
1482*0b459c2cSDavid du Colombier
1483*0b459c2cSDavid du Colombier typedef union
1484*0b459c2cSDavid du Colombier {
1485*0b459c2cSDavid du Colombier fail_stack_elt_t word;
1486*0b459c2cSDavid du Colombier struct
1487*0b459c2cSDavid du Colombier {
1488*0b459c2cSDavid du Colombier /* This field is one if this group can match the empty string,
1489*0b459c2cSDavid du Colombier zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1490*0b459c2cSDavid du Colombier #define MATCH_NULL_UNSET_VALUE 3
1491*0b459c2cSDavid du Colombier unsigned match_null_string_p : 2;
1492*0b459c2cSDavid du Colombier unsigned is_active : 1;
1493*0b459c2cSDavid du Colombier unsigned matched_something : 1;
1494*0b459c2cSDavid du Colombier unsigned ever_matched_something : 1;
1495*0b459c2cSDavid du Colombier } bits;
1496*0b459c2cSDavid du Colombier } register_info_type;
1497*0b459c2cSDavid du Colombier
1498*0b459c2cSDavid du Colombier #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1499*0b459c2cSDavid du Colombier #define IS_ACTIVE(R) ((R).bits.is_active)
1500*0b459c2cSDavid du Colombier #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1501*0b459c2cSDavid du Colombier #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1502*0b459c2cSDavid du Colombier
1503*0b459c2cSDavid du Colombier
1504*0b459c2cSDavid du Colombier /* Call this when have matched a real character; it sets `matched' flags
1505*0b459c2cSDavid du Colombier for the subexpressions which we are currently inside. Also records
1506*0b459c2cSDavid du Colombier that those subexprs have matched. */
1507*0b459c2cSDavid du Colombier #define SET_REGS_MATCHED() \
1508*0b459c2cSDavid du Colombier do \
1509*0b459c2cSDavid du Colombier { \
1510*0b459c2cSDavid du Colombier if (!set_regs_matched_done) \
1511*0b459c2cSDavid du Colombier { \
1512*0b459c2cSDavid du Colombier unsigned r; \
1513*0b459c2cSDavid du Colombier set_regs_matched_done = 1; \
1514*0b459c2cSDavid du Colombier for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1515*0b459c2cSDavid du Colombier { \
1516*0b459c2cSDavid du Colombier MATCHED_SOMETHING (reg_info[r]) \
1517*0b459c2cSDavid du Colombier = EVER_MATCHED_SOMETHING (reg_info[r]) \
1518*0b459c2cSDavid du Colombier = 1; \
1519*0b459c2cSDavid du Colombier } \
1520*0b459c2cSDavid du Colombier } \
1521*0b459c2cSDavid du Colombier } \
1522*0b459c2cSDavid du Colombier while (0)
1523*0b459c2cSDavid du Colombier
1524*0b459c2cSDavid du Colombier /* Registers are set to a sentinel when they haven't yet matched. */
1525*0b459c2cSDavid du Colombier static char reg_unset_dummy;
1526*0b459c2cSDavid du Colombier #define REG_UNSET_VALUE (®_unset_dummy)
1527*0b459c2cSDavid du Colombier #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1528*0b459c2cSDavid du Colombier
1529*0b459c2cSDavid du Colombier /* Subroutine declarations and macros for regex_compile. */
1530*0b459c2cSDavid du Colombier
1531*0b459c2cSDavid du Colombier static void store_op1 (), store_op2 ();
1532*0b459c2cSDavid du Colombier static void insert_op1 (), insert_op2 ();
1533*0b459c2cSDavid du Colombier static boolean at_begline_loc_p (), at_endline_loc_p ();
1534*0b459c2cSDavid du Colombier static boolean group_in_compile_stack ();
1535*0b459c2cSDavid du Colombier static reg_errcode_t compile_range ();
1536*0b459c2cSDavid du Colombier
1537*0b459c2cSDavid du Colombier /* Fetch the next character in the uncompiled pattern---translating it
1538*0b459c2cSDavid du Colombier if necessary. Also cast from a signed character in the constant
1539*0b459c2cSDavid du Colombier string passed to us by the user to an unsigned char that we can use
1540*0b459c2cSDavid du Colombier as an array index (in, e.g., `translate'). */
1541*0b459c2cSDavid du Colombier #ifndef PATFETCH
1542*0b459c2cSDavid du Colombier #define PATFETCH(c) \
1543*0b459c2cSDavid du Colombier do {if (p == pend) return REG_EEND; \
1544*0b459c2cSDavid du Colombier c = (unsigned char) *p++; \
1545*0b459c2cSDavid du Colombier if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \
1546*0b459c2cSDavid du Colombier } while (0)
1547*0b459c2cSDavid du Colombier #endif
1548*0b459c2cSDavid du Colombier
1549*0b459c2cSDavid du Colombier /* Fetch the next character in the uncompiled pattern, with no
1550*0b459c2cSDavid du Colombier translation. */
1551*0b459c2cSDavid du Colombier #define PATFETCH_RAW(c) \
1552*0b459c2cSDavid du Colombier do {if (p == pend) return REG_EEND; \
1553*0b459c2cSDavid du Colombier c = (unsigned char) *p++; \
1554*0b459c2cSDavid du Colombier } while (0)
1555*0b459c2cSDavid du Colombier
1556*0b459c2cSDavid du Colombier /* Go backwards one character in the pattern. */
1557*0b459c2cSDavid du Colombier #define PATUNFETCH p--
1558*0b459c2cSDavid du Colombier
1559*0b459c2cSDavid du Colombier
1560*0b459c2cSDavid du Colombier /* If `translate' is non-null, return translate[D], else just D. We
1561*0b459c2cSDavid du Colombier cast the subscript to translate because some data is declared as
1562*0b459c2cSDavid du Colombier `char *', to avoid warnings when a string constant is passed. But
1563*0b459c2cSDavid du Colombier when we use a character as a subscript we must make it unsigned. */
1564*0b459c2cSDavid du Colombier #ifndef TRANSLATE
1565*0b459c2cSDavid du Colombier #define TRANSLATE(d) \
1566*0b459c2cSDavid du Colombier (RE_TRANSLATE_P (translate) \
1567*0b459c2cSDavid du Colombier ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1568*0b459c2cSDavid du Colombier #endif
1569*0b459c2cSDavid du Colombier
1570*0b459c2cSDavid du Colombier
1571*0b459c2cSDavid du Colombier /* Macros for outputting the compiled pattern into `buffer'. */
1572*0b459c2cSDavid du Colombier
1573*0b459c2cSDavid du Colombier /* If the buffer isn't allocated when it comes in, use this. */
1574*0b459c2cSDavid du Colombier #define INIT_BUF_SIZE 32
1575*0b459c2cSDavid du Colombier
1576*0b459c2cSDavid du Colombier /* Make sure we have at least N more bytes of space in buffer. */
1577*0b459c2cSDavid du Colombier #define GET_BUFFER_SPACE(n) \
1578*0b459c2cSDavid du Colombier while (b - bufp->buffer + (n) > bufp->allocated) \
1579*0b459c2cSDavid du Colombier EXTEND_BUFFER ()
1580*0b459c2cSDavid du Colombier
1581*0b459c2cSDavid du Colombier /* Make sure we have one more byte of buffer space and then add C to it. */
1582*0b459c2cSDavid du Colombier #define BUF_PUSH(c) \
1583*0b459c2cSDavid du Colombier do { \
1584*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (1); \
1585*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c); \
1586*0b459c2cSDavid du Colombier } while (0)
1587*0b459c2cSDavid du Colombier
1588*0b459c2cSDavid du Colombier
1589*0b459c2cSDavid du Colombier /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1590*0b459c2cSDavid du Colombier #define BUF_PUSH_2(c1, c2) \
1591*0b459c2cSDavid du Colombier do { \
1592*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (2); \
1593*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c1); \
1594*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c2); \
1595*0b459c2cSDavid du Colombier } while (0)
1596*0b459c2cSDavid du Colombier
1597*0b459c2cSDavid du Colombier
1598*0b459c2cSDavid du Colombier /* As with BUF_PUSH_2, except for three bytes. */
1599*0b459c2cSDavid du Colombier #define BUF_PUSH_3(c1, c2, c3) \
1600*0b459c2cSDavid du Colombier do { \
1601*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3); \
1602*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c1); \
1603*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c2); \
1604*0b459c2cSDavid du Colombier *b++ = (unsigned char) (c3); \
1605*0b459c2cSDavid du Colombier } while (0)
1606*0b459c2cSDavid du Colombier
1607*0b459c2cSDavid du Colombier
1608*0b459c2cSDavid du Colombier /* Store a jump with opcode OP at LOC to location TO. We store a
1609*0b459c2cSDavid du Colombier relative address offset by the three bytes the jump itself occupies. */
1610*0b459c2cSDavid du Colombier #define STORE_JUMP(op, loc, to) \
1611*0b459c2cSDavid du Colombier store_op1 (op, loc, (to) - (loc) - 3)
1612*0b459c2cSDavid du Colombier
1613*0b459c2cSDavid du Colombier /* Likewise, for a two-argument jump. */
1614*0b459c2cSDavid du Colombier #define STORE_JUMP2(op, loc, to, arg) \
1615*0b459c2cSDavid du Colombier store_op2 (op, loc, (to) - (loc) - 3, arg)
1616*0b459c2cSDavid du Colombier
1617*0b459c2cSDavid du Colombier /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1618*0b459c2cSDavid du Colombier #define INSERT_JUMP(op, loc, to) \
1619*0b459c2cSDavid du Colombier insert_op1 (op, loc, (to) - (loc) - 3, b)
1620*0b459c2cSDavid du Colombier
1621*0b459c2cSDavid du Colombier /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1622*0b459c2cSDavid du Colombier #define INSERT_JUMP2(op, loc, to, arg) \
1623*0b459c2cSDavid du Colombier insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1624*0b459c2cSDavid du Colombier
1625*0b459c2cSDavid du Colombier
1626*0b459c2cSDavid du Colombier /* This is not an arbitrary limit: the arguments which represent offsets
1627*0b459c2cSDavid du Colombier into the pattern are two bytes long. So if 2^16 bytes turns out to
1628*0b459c2cSDavid du Colombier be too small, many things would have to change. */
1629*0b459c2cSDavid du Colombier #define MAX_BUF_SIZE (1L << 16)
1630*0b459c2cSDavid du Colombier
1631*0b459c2cSDavid du Colombier
1632*0b459c2cSDavid du Colombier /* Extend the buffer by twice its current size via realloc and
1633*0b459c2cSDavid du Colombier reset the pointers that pointed into the old block to point to the
1634*0b459c2cSDavid du Colombier correct places in the new one. If extending the buffer results in it
1635*0b459c2cSDavid du Colombier being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1636*0b459c2cSDavid du Colombier #define EXTEND_BUFFER() \
1637*0b459c2cSDavid du Colombier do { \
1638*0b459c2cSDavid du Colombier unsigned char *old_buffer = bufp->buffer; \
1639*0b459c2cSDavid du Colombier if (bufp->allocated == MAX_BUF_SIZE) \
1640*0b459c2cSDavid du Colombier return REG_ESIZE; \
1641*0b459c2cSDavid du Colombier bufp->allocated <<= 1; \
1642*0b459c2cSDavid du Colombier if (bufp->allocated > MAX_BUF_SIZE) \
1643*0b459c2cSDavid du Colombier bufp->allocated = MAX_BUF_SIZE; \
1644*0b459c2cSDavid du Colombier bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1645*0b459c2cSDavid du Colombier if (bufp->buffer == NULL) \
1646*0b459c2cSDavid du Colombier return REG_ESPACE; \
1647*0b459c2cSDavid du Colombier /* If the buffer moved, move all the pointers into it. */ \
1648*0b459c2cSDavid du Colombier if (old_buffer != bufp->buffer) \
1649*0b459c2cSDavid du Colombier { \
1650*0b459c2cSDavid du Colombier b = (b - old_buffer) + bufp->buffer; \
1651*0b459c2cSDavid du Colombier begalt = (begalt - old_buffer) + bufp->buffer; \
1652*0b459c2cSDavid du Colombier if (fixup_alt_jump) \
1653*0b459c2cSDavid du Colombier fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1654*0b459c2cSDavid du Colombier if (laststart) \
1655*0b459c2cSDavid du Colombier laststart = (laststart - old_buffer) + bufp->buffer; \
1656*0b459c2cSDavid du Colombier if (pending_exact) \
1657*0b459c2cSDavid du Colombier pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1658*0b459c2cSDavid du Colombier } \
1659*0b459c2cSDavid du Colombier } while (0)
1660*0b459c2cSDavid du Colombier
1661*0b459c2cSDavid du Colombier
1662*0b459c2cSDavid du Colombier /* Since we have one byte reserved for the register number argument to
1663*0b459c2cSDavid du Colombier {start,stop}_memory, the maximum number of groups we can report
1664*0b459c2cSDavid du Colombier things about is what fits in that byte. */
1665*0b459c2cSDavid du Colombier #define MAX_REGNUM 255
1666*0b459c2cSDavid du Colombier
1667*0b459c2cSDavid du Colombier /* But patterns can have more than `MAX_REGNUM' registers. We just
1668*0b459c2cSDavid du Colombier ignore the excess. */
1669*0b459c2cSDavid du Colombier typedef unsigned regnum_t;
1670*0b459c2cSDavid du Colombier
1671*0b459c2cSDavid du Colombier
1672*0b459c2cSDavid du Colombier /* Macros for the compile stack. */
1673*0b459c2cSDavid du Colombier
1674*0b459c2cSDavid du Colombier /* Since offsets can go either forwards or backwards, this type needs to
1675*0b459c2cSDavid du Colombier be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1676*0b459c2cSDavid du Colombier typedef int pattern_offset_t;
1677*0b459c2cSDavid du Colombier
1678*0b459c2cSDavid du Colombier typedef struct
1679*0b459c2cSDavid du Colombier {
1680*0b459c2cSDavid du Colombier pattern_offset_t begalt_offset;
1681*0b459c2cSDavid du Colombier pattern_offset_t fixup_alt_jump;
1682*0b459c2cSDavid du Colombier pattern_offset_t inner_group_offset;
1683*0b459c2cSDavid du Colombier pattern_offset_t laststart_offset;
1684*0b459c2cSDavid du Colombier regnum_t regnum;
1685*0b459c2cSDavid du Colombier } compile_stack_elt_t;
1686*0b459c2cSDavid du Colombier
1687*0b459c2cSDavid du Colombier
1688*0b459c2cSDavid du Colombier typedef struct
1689*0b459c2cSDavid du Colombier {
1690*0b459c2cSDavid du Colombier compile_stack_elt_t *stack;
1691*0b459c2cSDavid du Colombier unsigned size;
1692*0b459c2cSDavid du Colombier unsigned avail; /* Offset of next open position. */
1693*0b459c2cSDavid du Colombier } compile_stack_type;
1694*0b459c2cSDavid du Colombier
1695*0b459c2cSDavid du Colombier
1696*0b459c2cSDavid du Colombier #define INIT_COMPILE_STACK_SIZE 32
1697*0b459c2cSDavid du Colombier
1698*0b459c2cSDavid du Colombier #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1699*0b459c2cSDavid du Colombier #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1700*0b459c2cSDavid du Colombier
1701*0b459c2cSDavid du Colombier /* The next available element. */
1702*0b459c2cSDavid du Colombier #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1703*0b459c2cSDavid du Colombier
1704*0b459c2cSDavid du Colombier
1705*0b459c2cSDavid du Colombier /* Structure to manage work area for range table. */
1706*0b459c2cSDavid du Colombier struct range_table_work_area
1707*0b459c2cSDavid du Colombier {
1708*0b459c2cSDavid du Colombier int *table; /* actual work area. */
1709*0b459c2cSDavid du Colombier int allocated; /* allocated size for work area in bytes. */
1710*0b459c2cSDavid du Colombier int used; /* actually used size in words. */
1711*0b459c2cSDavid du Colombier };
1712*0b459c2cSDavid du Colombier
1713*0b459c2cSDavid du Colombier /* Make sure that WORK_AREA can hold more N multibyte characters. */
1714*0b459c2cSDavid du Colombier #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \
1715*0b459c2cSDavid du Colombier do { \
1716*0b459c2cSDavid du Colombier if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1717*0b459c2cSDavid du Colombier { \
1718*0b459c2cSDavid du Colombier (work_area).allocated += 16 * sizeof (int); \
1719*0b459c2cSDavid du Colombier if ((work_area).table) \
1720*0b459c2cSDavid du Colombier (work_area).table \
1721*0b459c2cSDavid du Colombier = (int *) realloc ((work_area).table, (work_area).allocated); \
1722*0b459c2cSDavid du Colombier else \
1723*0b459c2cSDavid du Colombier (work_area).table \
1724*0b459c2cSDavid du Colombier = (int *) malloc ((work_area).allocated); \
1725*0b459c2cSDavid du Colombier if ((work_area).table == 0) \
1726*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ESPACE); \
1727*0b459c2cSDavid du Colombier } \
1728*0b459c2cSDavid du Colombier } while (0)
1729*0b459c2cSDavid du Colombier
1730*0b459c2cSDavid du Colombier /* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
1731*0b459c2cSDavid du Colombier #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1732*0b459c2cSDavid du Colombier do { \
1733*0b459c2cSDavid du Colombier EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
1734*0b459c2cSDavid du Colombier (work_area).table[(work_area).used++] = (range_start); \
1735*0b459c2cSDavid du Colombier (work_area).table[(work_area).used++] = (range_end); \
1736*0b459c2cSDavid du Colombier } while (0)
1737*0b459c2cSDavid du Colombier
1738*0b459c2cSDavid du Colombier /* Free allocated memory for WORK_AREA. */
1739*0b459c2cSDavid du Colombier #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1740*0b459c2cSDavid du Colombier do { \
1741*0b459c2cSDavid du Colombier if ((work_area).table) \
1742*0b459c2cSDavid du Colombier free ((work_area).table); \
1743*0b459c2cSDavid du Colombier } while (0)
1744*0b459c2cSDavid du Colombier
1745*0b459c2cSDavid du Colombier #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
1746*0b459c2cSDavid du Colombier #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1747*0b459c2cSDavid du Colombier #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1748*0b459c2cSDavid du Colombier
1749*0b459c2cSDavid du Colombier
1750*0b459c2cSDavid du Colombier /* Set the bit for character C in a list. */
1751*0b459c2cSDavid du Colombier #define SET_LIST_BIT(c) \
1752*0b459c2cSDavid du Colombier (b[((unsigned char) (c)) / BYTEWIDTH] \
1753*0b459c2cSDavid du Colombier |= 1 << (((unsigned char) c) % BYTEWIDTH))
1754*0b459c2cSDavid du Colombier
1755*0b459c2cSDavid du Colombier
1756*0b459c2cSDavid du Colombier /* Get the next unsigned number in the uncompiled pattern. */
1757*0b459c2cSDavid du Colombier #define GET_UNSIGNED_NUMBER(num) \
1758*0b459c2cSDavid du Colombier { if (p != pend) \
1759*0b459c2cSDavid du Colombier { \
1760*0b459c2cSDavid du Colombier PATFETCH (c); \
1761*0b459c2cSDavid du Colombier while (ISDIGIT (c)) \
1762*0b459c2cSDavid du Colombier { \
1763*0b459c2cSDavid du Colombier if (num < 0) \
1764*0b459c2cSDavid du Colombier num = 0; \
1765*0b459c2cSDavid du Colombier num = num * 10 + c - '0'; \
1766*0b459c2cSDavid du Colombier if (p == pend) \
1767*0b459c2cSDavid du Colombier break; \
1768*0b459c2cSDavid du Colombier PATFETCH (c); \
1769*0b459c2cSDavid du Colombier } \
1770*0b459c2cSDavid du Colombier } \
1771*0b459c2cSDavid du Colombier }
1772*0b459c2cSDavid du Colombier
1773*0b459c2cSDavid du Colombier #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1774*0b459c2cSDavid du Colombier
1775*0b459c2cSDavid du Colombier #define IS_CHAR_CLASS(string) \
1776*0b459c2cSDavid du Colombier (STREQ (string, "alpha") || STREQ (string, "upper") \
1777*0b459c2cSDavid du Colombier || STREQ (string, "lower") || STREQ (string, "digit") \
1778*0b459c2cSDavid du Colombier || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1779*0b459c2cSDavid du Colombier || STREQ (string, "space") || STREQ (string, "print") \
1780*0b459c2cSDavid du Colombier || STREQ (string, "punct") || STREQ (string, "graph") \
1781*0b459c2cSDavid du Colombier || STREQ (string, "cntrl") || STREQ (string, "blank"))
1782*0b459c2cSDavid du Colombier
1783*0b459c2cSDavid du Colombier #ifndef MATCH_MAY_ALLOCATE
1784*0b459c2cSDavid du Colombier
1785*0b459c2cSDavid du Colombier /* If we cannot allocate large objects within re_match_2_internal,
1786*0b459c2cSDavid du Colombier we make the fail stack and register vectors global.
1787*0b459c2cSDavid du Colombier The fail stack, we grow to the maximum size when a regexp
1788*0b459c2cSDavid du Colombier is compiled.
1789*0b459c2cSDavid du Colombier The register vectors, we adjust in size each time we
1790*0b459c2cSDavid du Colombier compile a regexp, according to the number of registers it needs. */
1791*0b459c2cSDavid du Colombier
1792*0b459c2cSDavid du Colombier static fail_stack_type fail_stack;
1793*0b459c2cSDavid du Colombier
1794*0b459c2cSDavid du Colombier /* Size with which the following vectors are currently allocated.
1795*0b459c2cSDavid du Colombier That is so we can make them bigger as needed,
1796*0b459c2cSDavid du Colombier but never make them smaller. */
1797*0b459c2cSDavid du Colombier static int regs_allocated_size;
1798*0b459c2cSDavid du Colombier
1799*0b459c2cSDavid du Colombier static const char ** regstart, ** regend;
1800*0b459c2cSDavid du Colombier static const char ** old_regstart, ** old_regend;
1801*0b459c2cSDavid du Colombier static const char **best_regstart, **best_regend;
1802*0b459c2cSDavid du Colombier static register_info_type *reg_info;
1803*0b459c2cSDavid du Colombier static const char **reg_dummy;
1804*0b459c2cSDavid du Colombier static register_info_type *reg_info_dummy;
1805*0b459c2cSDavid du Colombier
1806*0b459c2cSDavid du Colombier /* Make the register vectors big enough for NUM_REGS registers,
1807*0b459c2cSDavid du Colombier but don't make them smaller. */
1808*0b459c2cSDavid du Colombier
1809*0b459c2cSDavid du Colombier static
regex_grow_registers(num_regs)1810*0b459c2cSDavid du Colombier regex_grow_registers (num_regs)
1811*0b459c2cSDavid du Colombier int num_regs;
1812*0b459c2cSDavid du Colombier {
1813*0b459c2cSDavid du Colombier if (num_regs > regs_allocated_size)
1814*0b459c2cSDavid du Colombier {
1815*0b459c2cSDavid du Colombier RETALLOC_IF (regstart, num_regs, const char *);
1816*0b459c2cSDavid du Colombier RETALLOC_IF (regend, num_regs, const char *);
1817*0b459c2cSDavid du Colombier RETALLOC_IF (old_regstart, num_regs, const char *);
1818*0b459c2cSDavid du Colombier RETALLOC_IF (old_regend, num_regs, const char *);
1819*0b459c2cSDavid du Colombier RETALLOC_IF (best_regstart, num_regs, const char *);
1820*0b459c2cSDavid du Colombier RETALLOC_IF (best_regend, num_regs, const char *);
1821*0b459c2cSDavid du Colombier RETALLOC_IF (reg_info, num_regs, register_info_type);
1822*0b459c2cSDavid du Colombier RETALLOC_IF (reg_dummy, num_regs, const char *);
1823*0b459c2cSDavid du Colombier RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1824*0b459c2cSDavid du Colombier
1825*0b459c2cSDavid du Colombier regs_allocated_size = num_regs;
1826*0b459c2cSDavid du Colombier }
1827*0b459c2cSDavid du Colombier }
1828*0b459c2cSDavid du Colombier
1829*0b459c2cSDavid du Colombier #endif /* not MATCH_MAY_ALLOCATE */
1830*0b459c2cSDavid du Colombier
1831*0b459c2cSDavid du Colombier /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1832*0b459c2cSDavid du Colombier Returns one of error codes defined in `regex.h', or zero for success.
1833*0b459c2cSDavid du Colombier
1834*0b459c2cSDavid du Colombier Assumes the `allocated' (and perhaps `buffer') and `translate'
1835*0b459c2cSDavid du Colombier fields are set in BUFP on entry.
1836*0b459c2cSDavid du Colombier
1837*0b459c2cSDavid du Colombier If it succeeds, results are put in BUFP (if it returns an error, the
1838*0b459c2cSDavid du Colombier contents of BUFP are undefined):
1839*0b459c2cSDavid du Colombier `buffer' is the compiled pattern;
1840*0b459c2cSDavid du Colombier `syntax' is set to SYNTAX;
1841*0b459c2cSDavid du Colombier `used' is set to the length of the compiled pattern;
1842*0b459c2cSDavid du Colombier `fastmap_accurate' is zero;
1843*0b459c2cSDavid du Colombier `re_nsub' is the number of subexpressions in PATTERN;
1844*0b459c2cSDavid du Colombier `not_bol' and `not_eol' are zero;
1845*0b459c2cSDavid du Colombier
1846*0b459c2cSDavid du Colombier The `fastmap' and `newline_anchor' fields are neither
1847*0b459c2cSDavid du Colombier examined nor set. */
1848*0b459c2cSDavid du Colombier
1849*0b459c2cSDavid du Colombier /* Return, freeing storage we allocated. */
1850*0b459c2cSDavid du Colombier #define FREE_STACK_RETURN(value) \
1851*0b459c2cSDavid du Colombier do { \
1852*0b459c2cSDavid du Colombier FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
1853*0b459c2cSDavid du Colombier free (compile_stack.stack); \
1854*0b459c2cSDavid du Colombier return value; \
1855*0b459c2cSDavid du Colombier } while (0)
1856*0b459c2cSDavid du Colombier
1857*0b459c2cSDavid du Colombier static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1858*0b459c2cSDavid du Colombier regex_compile (pattern, size, syntax, bufp)
1859*0b459c2cSDavid du Colombier const char *pattern;
1860*0b459c2cSDavid du Colombier int size;
1861*0b459c2cSDavid du Colombier reg_syntax_t syntax;
1862*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
1863*0b459c2cSDavid du Colombier {
1864*0b459c2cSDavid du Colombier /* We fetch characters from PATTERN here. Even though PATTERN is
1865*0b459c2cSDavid du Colombier `char *' (i.e., signed), we declare these variables as unsigned, so
1866*0b459c2cSDavid du Colombier they can be reliably used as array indices. */
1867*0b459c2cSDavid du Colombier register unsigned int c, c1;
1868*0b459c2cSDavid du Colombier
1869*0b459c2cSDavid du Colombier /* A random temporary spot in PATTERN. */
1870*0b459c2cSDavid du Colombier const char *p1;
1871*0b459c2cSDavid du Colombier
1872*0b459c2cSDavid du Colombier /* Points to the end of the buffer, where we should append. */
1873*0b459c2cSDavid du Colombier register unsigned char *b;
1874*0b459c2cSDavid du Colombier
1875*0b459c2cSDavid du Colombier /* Keeps track of unclosed groups. */
1876*0b459c2cSDavid du Colombier compile_stack_type compile_stack;
1877*0b459c2cSDavid du Colombier
1878*0b459c2cSDavid du Colombier /* Points to the current (ending) position in the pattern. */
1879*0b459c2cSDavid du Colombier #ifdef AIX
1880*0b459c2cSDavid du Colombier /* `const' makes AIX compiler fail. */
1881*0b459c2cSDavid du Colombier char *p = pattern;
1882*0b459c2cSDavid du Colombier #else
1883*0b459c2cSDavid du Colombier const char *p = pattern;
1884*0b459c2cSDavid du Colombier #endif
1885*0b459c2cSDavid du Colombier const char *pend = pattern + size;
1886*0b459c2cSDavid du Colombier
1887*0b459c2cSDavid du Colombier /* How to translate the characters in the pattern. */
1888*0b459c2cSDavid du Colombier RE_TRANSLATE_TYPE translate = bufp->translate;
1889*0b459c2cSDavid du Colombier
1890*0b459c2cSDavid du Colombier /* Address of the count-byte of the most recently inserted `exactn'
1891*0b459c2cSDavid du Colombier command. This makes it possible to tell if a new exact-match
1892*0b459c2cSDavid du Colombier character can be added to that command or if the character requires
1893*0b459c2cSDavid du Colombier a new `exactn' command. */
1894*0b459c2cSDavid du Colombier unsigned char *pending_exact = 0;
1895*0b459c2cSDavid du Colombier
1896*0b459c2cSDavid du Colombier /* Address of start of the most recently finished expression.
1897*0b459c2cSDavid du Colombier This tells, e.g., postfix * where to find the start of its
1898*0b459c2cSDavid du Colombier operand. Reset at the beginning of groups and alternatives. */
1899*0b459c2cSDavid du Colombier unsigned char *laststart = 0;
1900*0b459c2cSDavid du Colombier
1901*0b459c2cSDavid du Colombier /* Address of beginning of regexp, or inside of last group. */
1902*0b459c2cSDavid du Colombier unsigned char *begalt;
1903*0b459c2cSDavid du Colombier
1904*0b459c2cSDavid du Colombier /* Place in the uncompiled pattern (i.e., the {) to
1905*0b459c2cSDavid du Colombier which to go back if the interval is invalid. */
1906*0b459c2cSDavid du Colombier const char *beg_interval;
1907*0b459c2cSDavid du Colombier
1908*0b459c2cSDavid du Colombier /* Address of the place where a forward jump should go to the end of
1909*0b459c2cSDavid du Colombier the containing expression. Each alternative of an `or' -- except the
1910*0b459c2cSDavid du Colombier last -- ends with a forward jump of this sort. */
1911*0b459c2cSDavid du Colombier unsigned char *fixup_alt_jump = 0;
1912*0b459c2cSDavid du Colombier
1913*0b459c2cSDavid du Colombier /* Counts open-groups as they are encountered. Remembered for the
1914*0b459c2cSDavid du Colombier matching close-group on the compile stack, so the same register
1915*0b459c2cSDavid du Colombier number is put in the stop_memory as the start_memory. */
1916*0b459c2cSDavid du Colombier regnum_t regnum = 0;
1917*0b459c2cSDavid du Colombier
1918*0b459c2cSDavid du Colombier /* Work area for range table of charset. */
1919*0b459c2cSDavid du Colombier struct range_table_work_area range_table_work;
1920*0b459c2cSDavid du Colombier
1921*0b459c2cSDavid du Colombier #ifdef DEBUG
1922*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\nCompiling pattern: ");
1923*0b459c2cSDavid du Colombier if (debug)
1924*0b459c2cSDavid du Colombier {
1925*0b459c2cSDavid du Colombier unsigned debug_count;
1926*0b459c2cSDavid du Colombier
1927*0b459c2cSDavid du Colombier for (debug_count = 0; debug_count < size; debug_count++)
1928*0b459c2cSDavid du Colombier putchar (pattern[debug_count]);
1929*0b459c2cSDavid du Colombier putchar ('\n');
1930*0b459c2cSDavid du Colombier }
1931*0b459c2cSDavid du Colombier #endif /* DEBUG */
1932*0b459c2cSDavid du Colombier
1933*0b459c2cSDavid du Colombier /* Initialize the compile stack. */
1934*0b459c2cSDavid du Colombier compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1935*0b459c2cSDavid du Colombier if (compile_stack.stack == NULL)
1936*0b459c2cSDavid du Colombier return REG_ESPACE;
1937*0b459c2cSDavid du Colombier
1938*0b459c2cSDavid du Colombier compile_stack.size = INIT_COMPILE_STACK_SIZE;
1939*0b459c2cSDavid du Colombier compile_stack.avail = 0;
1940*0b459c2cSDavid du Colombier
1941*0b459c2cSDavid du Colombier range_table_work.table = 0;
1942*0b459c2cSDavid du Colombier range_table_work.allocated = 0;
1943*0b459c2cSDavid du Colombier
1944*0b459c2cSDavid du Colombier /* Initialize the pattern buffer. */
1945*0b459c2cSDavid du Colombier bufp->syntax = syntax;
1946*0b459c2cSDavid du Colombier bufp->fastmap_accurate = 0;
1947*0b459c2cSDavid du Colombier bufp->not_bol = bufp->not_eol = 0;
1948*0b459c2cSDavid du Colombier
1949*0b459c2cSDavid du Colombier /* Set `used' to zero, so that if we return an error, the pattern
1950*0b459c2cSDavid du Colombier printer (for debugging) will think there's no pattern. We reset it
1951*0b459c2cSDavid du Colombier at the end. */
1952*0b459c2cSDavid du Colombier bufp->used = 0;
1953*0b459c2cSDavid du Colombier
1954*0b459c2cSDavid du Colombier /* Always count groups, whether or not bufp->no_sub is set. */
1955*0b459c2cSDavid du Colombier bufp->re_nsub = 0;
1956*0b459c2cSDavid du Colombier
1957*0b459c2cSDavid du Colombier #ifdef emacs
1958*0b459c2cSDavid du Colombier /* bufp->multibyte is set before regex_compile is called, so don't alter
1959*0b459c2cSDavid du Colombier it. */
1960*0b459c2cSDavid du Colombier #else /* not emacs */
1961*0b459c2cSDavid du Colombier /* Nothing is recognized as a multibyte character. */
1962*0b459c2cSDavid du Colombier bufp->multibyte = 0;
1963*0b459c2cSDavid du Colombier #endif
1964*0b459c2cSDavid du Colombier
1965*0b459c2cSDavid du Colombier #if !defined (emacs) && !defined (SYNTAX_TABLE)
1966*0b459c2cSDavid du Colombier /* Initialize the syntax table. */
1967*0b459c2cSDavid du Colombier init_syntax_once ();
1968*0b459c2cSDavid du Colombier #endif
1969*0b459c2cSDavid du Colombier
1970*0b459c2cSDavid du Colombier if (bufp->allocated == 0)
1971*0b459c2cSDavid du Colombier {
1972*0b459c2cSDavid du Colombier if (bufp->buffer)
1973*0b459c2cSDavid du Colombier { /* If zero allocated, but buffer is non-null, try to realloc
1974*0b459c2cSDavid du Colombier enough space. This loses if buffer's address is bogus, but
1975*0b459c2cSDavid du Colombier that is the user's responsibility. */
1976*0b459c2cSDavid du Colombier RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1977*0b459c2cSDavid du Colombier }
1978*0b459c2cSDavid du Colombier else
1979*0b459c2cSDavid du Colombier { /* Caller did not allocate a buffer. Do it for them. */
1980*0b459c2cSDavid du Colombier bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1981*0b459c2cSDavid du Colombier }
1982*0b459c2cSDavid du Colombier if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1983*0b459c2cSDavid du Colombier
1984*0b459c2cSDavid du Colombier bufp->allocated = INIT_BUF_SIZE;
1985*0b459c2cSDavid du Colombier }
1986*0b459c2cSDavid du Colombier
1987*0b459c2cSDavid du Colombier begalt = b = bufp->buffer;
1988*0b459c2cSDavid du Colombier
1989*0b459c2cSDavid du Colombier /* Loop through the uncompiled pattern until we're at the end. */
1990*0b459c2cSDavid du Colombier while (p != pend)
1991*0b459c2cSDavid du Colombier {
1992*0b459c2cSDavid du Colombier PATFETCH (c);
1993*0b459c2cSDavid du Colombier
1994*0b459c2cSDavid du Colombier switch (c)
1995*0b459c2cSDavid du Colombier {
1996*0b459c2cSDavid du Colombier case '^':
1997*0b459c2cSDavid du Colombier {
1998*0b459c2cSDavid du Colombier if ( /* If at start of pattern, it's an operator. */
1999*0b459c2cSDavid du Colombier p == pattern + 1
2000*0b459c2cSDavid du Colombier /* If context independent, it's an operator. */
2001*0b459c2cSDavid du Colombier || syntax & RE_CONTEXT_INDEP_ANCHORS
2002*0b459c2cSDavid du Colombier /* Otherwise, depends on what's come before. */
2003*0b459c2cSDavid du Colombier || at_begline_loc_p (pattern, p, syntax))
2004*0b459c2cSDavid du Colombier BUF_PUSH (begline);
2005*0b459c2cSDavid du Colombier else
2006*0b459c2cSDavid du Colombier goto normal_char;
2007*0b459c2cSDavid du Colombier }
2008*0b459c2cSDavid du Colombier break;
2009*0b459c2cSDavid du Colombier
2010*0b459c2cSDavid du Colombier
2011*0b459c2cSDavid du Colombier case '$':
2012*0b459c2cSDavid du Colombier {
2013*0b459c2cSDavid du Colombier if ( /* If at end of pattern, it's an operator. */
2014*0b459c2cSDavid du Colombier p == pend
2015*0b459c2cSDavid du Colombier /* If context independent, it's an operator. */
2016*0b459c2cSDavid du Colombier || syntax & RE_CONTEXT_INDEP_ANCHORS
2017*0b459c2cSDavid du Colombier /* Otherwise, depends on what's next. */
2018*0b459c2cSDavid du Colombier || at_endline_loc_p (p, pend, syntax))
2019*0b459c2cSDavid du Colombier BUF_PUSH (endline);
2020*0b459c2cSDavid du Colombier else
2021*0b459c2cSDavid du Colombier goto normal_char;
2022*0b459c2cSDavid du Colombier }
2023*0b459c2cSDavid du Colombier break;
2024*0b459c2cSDavid du Colombier
2025*0b459c2cSDavid du Colombier
2026*0b459c2cSDavid du Colombier case '+':
2027*0b459c2cSDavid du Colombier case '?':
2028*0b459c2cSDavid du Colombier if ((syntax & RE_BK_PLUS_QM)
2029*0b459c2cSDavid du Colombier || (syntax & RE_LIMITED_OPS))
2030*0b459c2cSDavid du Colombier goto normal_char;
2031*0b459c2cSDavid du Colombier handle_plus:
2032*0b459c2cSDavid du Colombier case '*':
2033*0b459c2cSDavid du Colombier /* If there is no previous pattern... */
2034*0b459c2cSDavid du Colombier if (!laststart)
2035*0b459c2cSDavid du Colombier {
2036*0b459c2cSDavid du Colombier if (syntax & RE_CONTEXT_INVALID_OPS)
2037*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_BADRPT);
2038*0b459c2cSDavid du Colombier else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2039*0b459c2cSDavid du Colombier goto normal_char;
2040*0b459c2cSDavid du Colombier }
2041*0b459c2cSDavid du Colombier
2042*0b459c2cSDavid du Colombier {
2043*0b459c2cSDavid du Colombier /* Are we optimizing this jump? */
2044*0b459c2cSDavid du Colombier boolean keep_string_p = false;
2045*0b459c2cSDavid du Colombier
2046*0b459c2cSDavid du Colombier /* 1 means zero (many) matches is allowed. */
2047*0b459c2cSDavid du Colombier char zero_times_ok = 0, many_times_ok = 0;
2048*0b459c2cSDavid du Colombier
2049*0b459c2cSDavid du Colombier /* If there is a sequence of repetition chars, collapse it
2050*0b459c2cSDavid du Colombier down to just one (the right one). We can't combine
2051*0b459c2cSDavid du Colombier interval operators with these because of, e.g., `a{2}*',
2052*0b459c2cSDavid du Colombier which should only match an even number of `a's. */
2053*0b459c2cSDavid du Colombier
2054*0b459c2cSDavid du Colombier for (;;)
2055*0b459c2cSDavid du Colombier {
2056*0b459c2cSDavid du Colombier zero_times_ok |= c != '+';
2057*0b459c2cSDavid du Colombier many_times_ok |= c != '?';
2058*0b459c2cSDavid du Colombier
2059*0b459c2cSDavid du Colombier if (p == pend)
2060*0b459c2cSDavid du Colombier break;
2061*0b459c2cSDavid du Colombier
2062*0b459c2cSDavid du Colombier PATFETCH (c);
2063*0b459c2cSDavid du Colombier
2064*0b459c2cSDavid du Colombier if (c == '*'
2065*0b459c2cSDavid du Colombier || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2066*0b459c2cSDavid du Colombier ;
2067*0b459c2cSDavid du Colombier
2068*0b459c2cSDavid du Colombier else if (syntax & RE_BK_PLUS_QM && c == '\\')
2069*0b459c2cSDavid du Colombier {
2070*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2071*0b459c2cSDavid du Colombier
2072*0b459c2cSDavid du Colombier PATFETCH (c1);
2073*0b459c2cSDavid du Colombier if (!(c1 == '+' || c1 == '?'))
2074*0b459c2cSDavid du Colombier {
2075*0b459c2cSDavid du Colombier PATUNFETCH;
2076*0b459c2cSDavid du Colombier PATUNFETCH;
2077*0b459c2cSDavid du Colombier break;
2078*0b459c2cSDavid du Colombier }
2079*0b459c2cSDavid du Colombier
2080*0b459c2cSDavid du Colombier c = c1;
2081*0b459c2cSDavid du Colombier }
2082*0b459c2cSDavid du Colombier else
2083*0b459c2cSDavid du Colombier {
2084*0b459c2cSDavid du Colombier PATUNFETCH;
2085*0b459c2cSDavid du Colombier break;
2086*0b459c2cSDavid du Colombier }
2087*0b459c2cSDavid du Colombier
2088*0b459c2cSDavid du Colombier /* If we get here, we found another repeat character. */
2089*0b459c2cSDavid du Colombier }
2090*0b459c2cSDavid du Colombier
2091*0b459c2cSDavid du Colombier /* Star, etc. applied to an empty pattern is equivalent
2092*0b459c2cSDavid du Colombier to an empty pattern. */
2093*0b459c2cSDavid du Colombier if (!laststart)
2094*0b459c2cSDavid du Colombier break;
2095*0b459c2cSDavid du Colombier
2096*0b459c2cSDavid du Colombier /* Now we know whether or not zero matches is allowed
2097*0b459c2cSDavid du Colombier and also whether or not two or more matches is allowed. */
2098*0b459c2cSDavid du Colombier if (many_times_ok)
2099*0b459c2cSDavid du Colombier { /* More than one repetition is allowed, so put in at the
2100*0b459c2cSDavid du Colombier end a backward relative jump from `b' to before the next
2101*0b459c2cSDavid du Colombier jump we're going to put in below (which jumps from
2102*0b459c2cSDavid du Colombier laststart to after this jump).
2103*0b459c2cSDavid du Colombier
2104*0b459c2cSDavid du Colombier But if we are at the `*' in the exact sequence `.*\n',
2105*0b459c2cSDavid du Colombier insert an unconditional jump backwards to the .,
2106*0b459c2cSDavid du Colombier instead of the beginning of the loop. This way we only
2107*0b459c2cSDavid du Colombier push a failure point once, instead of every time
2108*0b459c2cSDavid du Colombier through the loop. */
2109*0b459c2cSDavid du Colombier assert (p - 1 > pattern);
2110*0b459c2cSDavid du Colombier
2111*0b459c2cSDavid du Colombier /* Allocate the space for the jump. */
2112*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2113*0b459c2cSDavid du Colombier
2114*0b459c2cSDavid du Colombier /* We know we are not at the first character of the pattern,
2115*0b459c2cSDavid du Colombier because laststart was nonzero. And we've already
2116*0b459c2cSDavid du Colombier incremented `p', by the way, to be the character after
2117*0b459c2cSDavid du Colombier the `*'. Do we have to do something analogous here
2118*0b459c2cSDavid du Colombier for null bytes, because of RE_DOT_NOT_NULL? */
2119*0b459c2cSDavid du Colombier if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
2120*0b459c2cSDavid du Colombier && zero_times_ok
2121*0b459c2cSDavid du Colombier && p < pend
2122*0b459c2cSDavid du Colombier && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
2123*0b459c2cSDavid du Colombier && !(syntax & RE_DOT_NEWLINE))
2124*0b459c2cSDavid du Colombier { /* We have .*\n. */
2125*0b459c2cSDavid du Colombier STORE_JUMP (jump, b, laststart);
2126*0b459c2cSDavid du Colombier keep_string_p = true;
2127*0b459c2cSDavid du Colombier }
2128*0b459c2cSDavid du Colombier else
2129*0b459c2cSDavid du Colombier /* Anything else. */
2130*0b459c2cSDavid du Colombier STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2131*0b459c2cSDavid du Colombier
2132*0b459c2cSDavid du Colombier /* We've added more stuff to the buffer. */
2133*0b459c2cSDavid du Colombier b += 3;
2134*0b459c2cSDavid du Colombier }
2135*0b459c2cSDavid du Colombier
2136*0b459c2cSDavid du Colombier /* On failure, jump from laststart to b + 3, which will be the
2137*0b459c2cSDavid du Colombier end of the buffer after this jump is inserted. */
2138*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2139*0b459c2cSDavid du Colombier INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2140*0b459c2cSDavid du Colombier : on_failure_jump,
2141*0b459c2cSDavid du Colombier laststart, b + 3);
2142*0b459c2cSDavid du Colombier pending_exact = 0;
2143*0b459c2cSDavid du Colombier b += 3;
2144*0b459c2cSDavid du Colombier
2145*0b459c2cSDavid du Colombier if (!zero_times_ok)
2146*0b459c2cSDavid du Colombier {
2147*0b459c2cSDavid du Colombier /* At least one repetition is required, so insert a
2148*0b459c2cSDavid du Colombier `dummy_failure_jump' before the initial
2149*0b459c2cSDavid du Colombier `on_failure_jump' instruction of the loop. This
2150*0b459c2cSDavid du Colombier effects a skip over that instruction the first time
2151*0b459c2cSDavid du Colombier we hit that loop. */
2152*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2153*0b459c2cSDavid du Colombier INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2154*0b459c2cSDavid du Colombier b += 3;
2155*0b459c2cSDavid du Colombier }
2156*0b459c2cSDavid du Colombier }
2157*0b459c2cSDavid du Colombier break;
2158*0b459c2cSDavid du Colombier
2159*0b459c2cSDavid du Colombier
2160*0b459c2cSDavid du Colombier case '.':
2161*0b459c2cSDavid du Colombier laststart = b;
2162*0b459c2cSDavid du Colombier BUF_PUSH (anychar);
2163*0b459c2cSDavid du Colombier break;
2164*0b459c2cSDavid du Colombier
2165*0b459c2cSDavid du Colombier
2166*0b459c2cSDavid du Colombier case '[':
2167*0b459c2cSDavid du Colombier {
2168*0b459c2cSDavid du Colombier CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2169*0b459c2cSDavid du Colombier
2170*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2171*0b459c2cSDavid du Colombier
2172*0b459c2cSDavid du Colombier /* Ensure that we have enough space to push a charset: the
2173*0b459c2cSDavid du Colombier opcode, the length count, and the bitset; 34 bytes in all. */
2174*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (34);
2175*0b459c2cSDavid du Colombier
2176*0b459c2cSDavid du Colombier laststart = b;
2177*0b459c2cSDavid du Colombier
2178*0b459c2cSDavid du Colombier /* We test `*p == '^' twice, instead of using an if
2179*0b459c2cSDavid du Colombier statement, so we only need one BUF_PUSH. */
2180*0b459c2cSDavid du Colombier BUF_PUSH (*p == '^' ? charset_not : charset);
2181*0b459c2cSDavid du Colombier if (*p == '^')
2182*0b459c2cSDavid du Colombier p++;
2183*0b459c2cSDavid du Colombier
2184*0b459c2cSDavid du Colombier /* Remember the first position in the bracket expression. */
2185*0b459c2cSDavid du Colombier p1 = p;
2186*0b459c2cSDavid du Colombier
2187*0b459c2cSDavid du Colombier /* Push the number of bytes in the bitmap. */
2188*0b459c2cSDavid du Colombier BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2189*0b459c2cSDavid du Colombier
2190*0b459c2cSDavid du Colombier /* Clear the whole map. */
2191*0b459c2cSDavid du Colombier bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2192*0b459c2cSDavid du Colombier
2193*0b459c2cSDavid du Colombier /* charset_not matches newline according to a syntax bit. */
2194*0b459c2cSDavid du Colombier if ((re_opcode_t) b[-2] == charset_not
2195*0b459c2cSDavid du Colombier && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2196*0b459c2cSDavid du Colombier SET_LIST_BIT ('\n');
2197*0b459c2cSDavid du Colombier
2198*0b459c2cSDavid du Colombier /* Read in characters and ranges, setting map bits. */
2199*0b459c2cSDavid du Colombier for (;;)
2200*0b459c2cSDavid du Colombier {
2201*0b459c2cSDavid du Colombier int len;
2202*0b459c2cSDavid du Colombier boolean escaped_char = false;
2203*0b459c2cSDavid du Colombier
2204*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2205*0b459c2cSDavid du Colombier
2206*0b459c2cSDavid du Colombier PATFETCH (c);
2207*0b459c2cSDavid du Colombier
2208*0b459c2cSDavid du Colombier /* \ might escape characters inside [...] and [^...]. */
2209*0b459c2cSDavid du Colombier if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2210*0b459c2cSDavid du Colombier {
2211*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2212*0b459c2cSDavid du Colombier
2213*0b459c2cSDavid du Colombier PATFETCH (c);
2214*0b459c2cSDavid du Colombier escaped_char = true;
2215*0b459c2cSDavid du Colombier }
2216*0b459c2cSDavid du Colombier else
2217*0b459c2cSDavid du Colombier {
2218*0b459c2cSDavid du Colombier /* Could be the end of the bracket expression. If it's
2219*0b459c2cSDavid du Colombier not (i.e., when the bracket expression is `[]' so
2220*0b459c2cSDavid du Colombier far), the ']' character bit gets set way below. */
2221*0b459c2cSDavid du Colombier if (c == ']' && p != p1 + 1)
2222*0b459c2cSDavid du Colombier break;
2223*0b459c2cSDavid du Colombier }
2224*0b459c2cSDavid du Colombier
2225*0b459c2cSDavid du Colombier /* If C indicates start of multibyte char, get the
2226*0b459c2cSDavid du Colombier actual character code in C, and set the pattern
2227*0b459c2cSDavid du Colombier pointer P to the next character boundary. */
2228*0b459c2cSDavid du Colombier if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2229*0b459c2cSDavid du Colombier {
2230*0b459c2cSDavid du Colombier PATUNFETCH;
2231*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2232*0b459c2cSDavid du Colombier p += len;
2233*0b459c2cSDavid du Colombier }
2234*0b459c2cSDavid du Colombier /* What should we do for the character which is
2235*0b459c2cSDavid du Colombier greater than 0x7F, but not BASE_LEADING_CODE_P?
2236*0b459c2cSDavid du Colombier XXX */
2237*0b459c2cSDavid du Colombier
2238*0b459c2cSDavid du Colombier /* See if we're at the beginning of a possible character
2239*0b459c2cSDavid du Colombier class. */
2240*0b459c2cSDavid du Colombier
2241*0b459c2cSDavid du Colombier else if (!escaped_char &&
2242*0b459c2cSDavid du Colombier syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2243*0b459c2cSDavid du Colombier {
2244*0b459c2cSDavid du Colombier /* Leave room for the null. */
2245*0b459c2cSDavid du Colombier char str[CHAR_CLASS_MAX_LENGTH + 1];
2246*0b459c2cSDavid du Colombier
2247*0b459c2cSDavid du Colombier PATFETCH (c);
2248*0b459c2cSDavid du Colombier c1 = 0;
2249*0b459c2cSDavid du Colombier
2250*0b459c2cSDavid du Colombier /* If pattern is `[[:'. */
2251*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2252*0b459c2cSDavid du Colombier
2253*0b459c2cSDavid du Colombier for (;;)
2254*0b459c2cSDavid du Colombier {
2255*0b459c2cSDavid du Colombier PATFETCH (c);
2256*0b459c2cSDavid du Colombier if (c == ':' || c == ']' || p == pend
2257*0b459c2cSDavid du Colombier || c1 == CHAR_CLASS_MAX_LENGTH)
2258*0b459c2cSDavid du Colombier break;
2259*0b459c2cSDavid du Colombier str[c1++] = c;
2260*0b459c2cSDavid du Colombier }
2261*0b459c2cSDavid du Colombier str[c1] = '\0';
2262*0b459c2cSDavid du Colombier
2263*0b459c2cSDavid du Colombier /* If isn't a word bracketed by `[:' and `:]':
2264*0b459c2cSDavid du Colombier undo the ending character, the letters, and
2265*0b459c2cSDavid du Colombier leave the leading `:' and `[' (but set bits for
2266*0b459c2cSDavid du Colombier them). */
2267*0b459c2cSDavid du Colombier if (c == ':' && *p == ']')
2268*0b459c2cSDavid du Colombier {
2269*0b459c2cSDavid du Colombier int ch;
2270*0b459c2cSDavid du Colombier boolean is_alnum = STREQ (str, "alnum");
2271*0b459c2cSDavid du Colombier boolean is_alpha = STREQ (str, "alpha");
2272*0b459c2cSDavid du Colombier boolean is_blank = STREQ (str, "blank");
2273*0b459c2cSDavid du Colombier boolean is_cntrl = STREQ (str, "cntrl");
2274*0b459c2cSDavid du Colombier boolean is_digit = STREQ (str, "digit");
2275*0b459c2cSDavid du Colombier boolean is_graph = STREQ (str, "graph");
2276*0b459c2cSDavid du Colombier boolean is_lower = STREQ (str, "lower");
2277*0b459c2cSDavid du Colombier boolean is_print = STREQ (str, "print");
2278*0b459c2cSDavid du Colombier boolean is_punct = STREQ (str, "punct");
2279*0b459c2cSDavid du Colombier boolean is_space = STREQ (str, "space");
2280*0b459c2cSDavid du Colombier boolean is_upper = STREQ (str, "upper");
2281*0b459c2cSDavid du Colombier boolean is_xdigit = STREQ (str, "xdigit");
2282*0b459c2cSDavid du Colombier
2283*0b459c2cSDavid du Colombier if (!IS_CHAR_CLASS (str))
2284*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ECTYPE);
2285*0b459c2cSDavid du Colombier
2286*0b459c2cSDavid du Colombier /* Throw away the ] at the end of the character
2287*0b459c2cSDavid du Colombier class. */
2288*0b459c2cSDavid du Colombier PATFETCH (c);
2289*0b459c2cSDavid du Colombier
2290*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2291*0b459c2cSDavid du Colombier
2292*0b459c2cSDavid du Colombier for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2293*0b459c2cSDavid du Colombier {
2294*0b459c2cSDavid du Colombier int translated = TRANSLATE (ch);
2295*0b459c2cSDavid du Colombier /* This was split into 3 if's to
2296*0b459c2cSDavid du Colombier avoid an arbitrary limit in some compiler. */
2297*0b459c2cSDavid du Colombier if ( (is_alnum && ISALNUM (ch))
2298*0b459c2cSDavid du Colombier || (is_alpha && ISALPHA (ch))
2299*0b459c2cSDavid du Colombier || (is_blank && ISBLANK (ch))
2300*0b459c2cSDavid du Colombier || (is_cntrl && ISCNTRL (ch)))
2301*0b459c2cSDavid du Colombier SET_LIST_BIT (translated);
2302*0b459c2cSDavid du Colombier if ( (is_digit && ISDIGIT (ch))
2303*0b459c2cSDavid du Colombier || (is_graph && ISGRAPH (ch))
2304*0b459c2cSDavid du Colombier || (is_lower && ISLOWER (ch))
2305*0b459c2cSDavid du Colombier || (is_print && ISPRINT (ch)))
2306*0b459c2cSDavid du Colombier SET_LIST_BIT (translated);
2307*0b459c2cSDavid du Colombier if ( (is_punct && ISPUNCT (ch))
2308*0b459c2cSDavid du Colombier || (is_space && ISSPACE (ch))
2309*0b459c2cSDavid du Colombier || (is_upper && ISUPPER (ch))
2310*0b459c2cSDavid du Colombier || (is_xdigit && ISXDIGIT (ch)))
2311*0b459c2cSDavid du Colombier SET_LIST_BIT (translated);
2312*0b459c2cSDavid du Colombier }
2313*0b459c2cSDavid du Colombier
2314*0b459c2cSDavid du Colombier /* Repeat the loop. */
2315*0b459c2cSDavid du Colombier continue;
2316*0b459c2cSDavid du Colombier }
2317*0b459c2cSDavid du Colombier else
2318*0b459c2cSDavid du Colombier {
2319*0b459c2cSDavid du Colombier c1++;
2320*0b459c2cSDavid du Colombier while (c1--)
2321*0b459c2cSDavid du Colombier PATUNFETCH;
2322*0b459c2cSDavid du Colombier SET_LIST_BIT ('[');
2323*0b459c2cSDavid du Colombier
2324*0b459c2cSDavid du Colombier /* Because the `:' may starts the range, we
2325*0b459c2cSDavid du Colombier can't simply set bit and repeat the loop.
2326*0b459c2cSDavid du Colombier Instead, just set it to C and handle below. */
2327*0b459c2cSDavid du Colombier c = ':';
2328*0b459c2cSDavid du Colombier }
2329*0b459c2cSDavid du Colombier }
2330*0b459c2cSDavid du Colombier
2331*0b459c2cSDavid du Colombier if (p < pend && p[0] == '-' && p[1] != ']')
2332*0b459c2cSDavid du Colombier {
2333*0b459c2cSDavid du Colombier
2334*0b459c2cSDavid du Colombier /* Discard the `-'. */
2335*0b459c2cSDavid du Colombier PATFETCH (c1);
2336*0b459c2cSDavid du Colombier
2337*0b459c2cSDavid du Colombier /* Fetch the character which ends the range. */
2338*0b459c2cSDavid du Colombier PATFETCH (c1);
2339*0b459c2cSDavid du Colombier if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
2340*0b459c2cSDavid du Colombier {
2341*0b459c2cSDavid du Colombier PATUNFETCH;
2342*0b459c2cSDavid du Colombier c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2343*0b459c2cSDavid du Colombier p += len;
2344*0b459c2cSDavid du Colombier }
2345*0b459c2cSDavid du Colombier
2346*0b459c2cSDavid du Colombier if (SINGLE_BYTE_CHAR_P (c)
2347*0b459c2cSDavid du Colombier && ! SINGLE_BYTE_CHAR_P (c1))
2348*0b459c2cSDavid du Colombier {
2349*0b459c2cSDavid du Colombier /* Handle a range such as \177-\377 in multibyte mode.
2350*0b459c2cSDavid du Colombier Split that into two ranges,,
2351*0b459c2cSDavid du Colombier the low one ending at 0237, and the high one
2352*0b459c2cSDavid du Colombier starting at ...040. */
2353*0b459c2cSDavid du Colombier int c1_base = (c1 & ~0177) | 040;
2354*0b459c2cSDavid du Colombier SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2355*0b459c2cSDavid du Colombier c1 = 0237;
2356*0b459c2cSDavid du Colombier }
2357*0b459c2cSDavid du Colombier else if (!SAME_CHARSET_P (c, c1))
2358*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ERANGE);
2359*0b459c2cSDavid du Colombier }
2360*0b459c2cSDavid du Colombier else
2361*0b459c2cSDavid du Colombier /* Range from C to C. */
2362*0b459c2cSDavid du Colombier c1 = c;
2363*0b459c2cSDavid du Colombier
2364*0b459c2cSDavid du Colombier /* Set the range ... */
2365*0b459c2cSDavid du Colombier if (SINGLE_BYTE_CHAR_P (c))
2366*0b459c2cSDavid du Colombier /* ... into bitmap. */
2367*0b459c2cSDavid du Colombier {
2368*0b459c2cSDavid du Colombier unsigned this_char;
2369*0b459c2cSDavid du Colombier int range_start = c, range_end = c1;
2370*0b459c2cSDavid du Colombier
2371*0b459c2cSDavid du Colombier /* If the start is after the end, the range is empty. */
2372*0b459c2cSDavid du Colombier if (range_start > range_end)
2373*0b459c2cSDavid du Colombier {
2374*0b459c2cSDavid du Colombier if (syntax & RE_NO_EMPTY_RANGES)
2375*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ERANGE);
2376*0b459c2cSDavid du Colombier /* Else, repeat the loop. */
2377*0b459c2cSDavid du Colombier }
2378*0b459c2cSDavid du Colombier else
2379*0b459c2cSDavid du Colombier {
2380*0b459c2cSDavid du Colombier for (this_char = range_start; this_char <= range_end;
2381*0b459c2cSDavid du Colombier this_char++)
2382*0b459c2cSDavid du Colombier SET_LIST_BIT (TRANSLATE (this_char));
2383*0b459c2cSDavid du Colombier }
2384*0b459c2cSDavid du Colombier }
2385*0b459c2cSDavid du Colombier else
2386*0b459c2cSDavid du Colombier /* ... into range table. */
2387*0b459c2cSDavid du Colombier SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2388*0b459c2cSDavid du Colombier }
2389*0b459c2cSDavid du Colombier
2390*0b459c2cSDavid du Colombier /* Discard any (non)matching list bytes that are all 0 at the
2391*0b459c2cSDavid du Colombier end of the map. Decrease the map-length byte too. */
2392*0b459c2cSDavid du Colombier while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2393*0b459c2cSDavid du Colombier b[-1]--;
2394*0b459c2cSDavid du Colombier b += b[-1];
2395*0b459c2cSDavid du Colombier
2396*0b459c2cSDavid du Colombier /* Build real range table from work area. */
2397*0b459c2cSDavid du Colombier if (RANGE_TABLE_WORK_USED (range_table_work))
2398*0b459c2cSDavid du Colombier {
2399*0b459c2cSDavid du Colombier int i;
2400*0b459c2cSDavid du Colombier int used = RANGE_TABLE_WORK_USED (range_table_work);
2401*0b459c2cSDavid du Colombier
2402*0b459c2cSDavid du Colombier /* Allocate space for COUNT + RANGE_TABLE. Needs two
2403*0b459c2cSDavid du Colombier bytes for COUNT and three bytes for each character. */
2404*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (2 + used * 3);
2405*0b459c2cSDavid du Colombier
2406*0b459c2cSDavid du Colombier /* Indicate the existence of range table. */
2407*0b459c2cSDavid du Colombier laststart[1] |= 0x80;
2408*0b459c2cSDavid du Colombier
2409*0b459c2cSDavid du Colombier STORE_NUMBER_AND_INCR (b, used / 2);
2410*0b459c2cSDavid du Colombier for (i = 0; i < used; i++)
2411*0b459c2cSDavid du Colombier STORE_CHARACTER_AND_INCR
2412*0b459c2cSDavid du Colombier (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2413*0b459c2cSDavid du Colombier }
2414*0b459c2cSDavid du Colombier }
2415*0b459c2cSDavid du Colombier break;
2416*0b459c2cSDavid du Colombier
2417*0b459c2cSDavid du Colombier
2418*0b459c2cSDavid du Colombier case '(':
2419*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_PARENS)
2420*0b459c2cSDavid du Colombier goto handle_open;
2421*0b459c2cSDavid du Colombier else
2422*0b459c2cSDavid du Colombier goto normal_char;
2423*0b459c2cSDavid du Colombier
2424*0b459c2cSDavid du Colombier
2425*0b459c2cSDavid du Colombier case ')':
2426*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_PARENS)
2427*0b459c2cSDavid du Colombier goto handle_close;
2428*0b459c2cSDavid du Colombier else
2429*0b459c2cSDavid du Colombier goto normal_char;
2430*0b459c2cSDavid du Colombier
2431*0b459c2cSDavid du Colombier
2432*0b459c2cSDavid du Colombier case '\n':
2433*0b459c2cSDavid du Colombier if (syntax & RE_NEWLINE_ALT)
2434*0b459c2cSDavid du Colombier goto handle_alt;
2435*0b459c2cSDavid du Colombier else
2436*0b459c2cSDavid du Colombier goto normal_char;
2437*0b459c2cSDavid du Colombier
2438*0b459c2cSDavid du Colombier
2439*0b459c2cSDavid du Colombier case '|':
2440*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_VBAR)
2441*0b459c2cSDavid du Colombier goto handle_alt;
2442*0b459c2cSDavid du Colombier else
2443*0b459c2cSDavid du Colombier goto normal_char;
2444*0b459c2cSDavid du Colombier
2445*0b459c2cSDavid du Colombier
2446*0b459c2cSDavid du Colombier case '{':
2447*0b459c2cSDavid du Colombier if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2448*0b459c2cSDavid du Colombier goto handle_interval;
2449*0b459c2cSDavid du Colombier else
2450*0b459c2cSDavid du Colombier goto normal_char;
2451*0b459c2cSDavid du Colombier
2452*0b459c2cSDavid du Colombier
2453*0b459c2cSDavid du Colombier case '\\':
2454*0b459c2cSDavid du Colombier if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2455*0b459c2cSDavid du Colombier
2456*0b459c2cSDavid du Colombier /* Do not translate the character after the \, so that we can
2457*0b459c2cSDavid du Colombier distinguish, e.g., \B from \b, even if we normally would
2458*0b459c2cSDavid du Colombier translate, e.g., B to b. */
2459*0b459c2cSDavid du Colombier PATFETCH_RAW (c);
2460*0b459c2cSDavid du Colombier
2461*0b459c2cSDavid du Colombier switch (c)
2462*0b459c2cSDavid du Colombier {
2463*0b459c2cSDavid du Colombier case '(':
2464*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_PARENS)
2465*0b459c2cSDavid du Colombier goto normal_backslash;
2466*0b459c2cSDavid du Colombier
2467*0b459c2cSDavid du Colombier handle_open:
2468*0b459c2cSDavid du Colombier bufp->re_nsub++;
2469*0b459c2cSDavid du Colombier regnum++;
2470*0b459c2cSDavid du Colombier
2471*0b459c2cSDavid du Colombier if (COMPILE_STACK_FULL)
2472*0b459c2cSDavid du Colombier {
2473*0b459c2cSDavid du Colombier RETALLOC (compile_stack.stack, compile_stack.size << 1,
2474*0b459c2cSDavid du Colombier compile_stack_elt_t);
2475*0b459c2cSDavid du Colombier if (compile_stack.stack == NULL) return REG_ESPACE;
2476*0b459c2cSDavid du Colombier
2477*0b459c2cSDavid du Colombier compile_stack.size <<= 1;
2478*0b459c2cSDavid du Colombier }
2479*0b459c2cSDavid du Colombier
2480*0b459c2cSDavid du Colombier /* These are the values to restore when we hit end of this
2481*0b459c2cSDavid du Colombier group. They are all relative offsets, so that if the
2482*0b459c2cSDavid du Colombier whole pattern moves because of realloc, they will still
2483*0b459c2cSDavid du Colombier be valid. */
2484*0b459c2cSDavid du Colombier COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2485*0b459c2cSDavid du Colombier COMPILE_STACK_TOP.fixup_alt_jump
2486*0b459c2cSDavid du Colombier = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2487*0b459c2cSDavid du Colombier COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2488*0b459c2cSDavid du Colombier COMPILE_STACK_TOP.regnum = regnum;
2489*0b459c2cSDavid du Colombier
2490*0b459c2cSDavid du Colombier /* We will eventually replace the 0 with the number of
2491*0b459c2cSDavid du Colombier groups inner to this one. But do not push a
2492*0b459c2cSDavid du Colombier start_memory for groups beyond the last one we can
2493*0b459c2cSDavid du Colombier represent in the compiled pattern. */
2494*0b459c2cSDavid du Colombier if (regnum <= MAX_REGNUM)
2495*0b459c2cSDavid du Colombier {
2496*0b459c2cSDavid du Colombier COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2497*0b459c2cSDavid du Colombier BUF_PUSH_3 (start_memory, regnum, 0);
2498*0b459c2cSDavid du Colombier }
2499*0b459c2cSDavid du Colombier
2500*0b459c2cSDavid du Colombier compile_stack.avail++;
2501*0b459c2cSDavid du Colombier
2502*0b459c2cSDavid du Colombier fixup_alt_jump = 0;
2503*0b459c2cSDavid du Colombier laststart = 0;
2504*0b459c2cSDavid du Colombier begalt = b;
2505*0b459c2cSDavid du Colombier /* If we've reached MAX_REGNUM groups, then this open
2506*0b459c2cSDavid du Colombier won't actually generate any code, so we'll have to
2507*0b459c2cSDavid du Colombier clear pending_exact explicitly. */
2508*0b459c2cSDavid du Colombier pending_exact = 0;
2509*0b459c2cSDavid du Colombier break;
2510*0b459c2cSDavid du Colombier
2511*0b459c2cSDavid du Colombier
2512*0b459c2cSDavid du Colombier case ')':
2513*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2514*0b459c2cSDavid du Colombier
2515*0b459c2cSDavid du Colombier if (COMPILE_STACK_EMPTY)
2516*0b459c2cSDavid du Colombier if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2517*0b459c2cSDavid du Colombier goto normal_backslash;
2518*0b459c2cSDavid du Colombier else
2519*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ERPAREN);
2520*0b459c2cSDavid du Colombier
2521*0b459c2cSDavid du Colombier handle_close:
2522*0b459c2cSDavid du Colombier if (fixup_alt_jump)
2523*0b459c2cSDavid du Colombier { /* Push a dummy failure point at the end of the
2524*0b459c2cSDavid du Colombier alternative for a possible future
2525*0b459c2cSDavid du Colombier `pop_failure_jump' to pop. See comments at
2526*0b459c2cSDavid du Colombier `push_dummy_failure' in `re_match_2'. */
2527*0b459c2cSDavid du Colombier BUF_PUSH (push_dummy_failure);
2528*0b459c2cSDavid du Colombier
2529*0b459c2cSDavid du Colombier /* We allocated space for this jump when we assigned
2530*0b459c2cSDavid du Colombier to `fixup_alt_jump', in the `handle_alt' case below. */
2531*0b459c2cSDavid du Colombier STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2532*0b459c2cSDavid du Colombier }
2533*0b459c2cSDavid du Colombier
2534*0b459c2cSDavid du Colombier /* See similar code for backslashed left paren above. */
2535*0b459c2cSDavid du Colombier if (COMPILE_STACK_EMPTY)
2536*0b459c2cSDavid du Colombier if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2537*0b459c2cSDavid du Colombier goto normal_char;
2538*0b459c2cSDavid du Colombier else
2539*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ERPAREN);
2540*0b459c2cSDavid du Colombier
2541*0b459c2cSDavid du Colombier /* Since we just checked for an empty stack above, this
2542*0b459c2cSDavid du Colombier ``can't happen''. */
2543*0b459c2cSDavid du Colombier assert (compile_stack.avail != 0);
2544*0b459c2cSDavid du Colombier {
2545*0b459c2cSDavid du Colombier /* We don't just want to restore into `regnum', because
2546*0b459c2cSDavid du Colombier later groups should continue to be numbered higher,
2547*0b459c2cSDavid du Colombier as in `(ab)c(de)' -- the second group is #2. */
2548*0b459c2cSDavid du Colombier regnum_t this_group_regnum;
2549*0b459c2cSDavid du Colombier
2550*0b459c2cSDavid du Colombier compile_stack.avail--;
2551*0b459c2cSDavid du Colombier begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2552*0b459c2cSDavid du Colombier fixup_alt_jump
2553*0b459c2cSDavid du Colombier = COMPILE_STACK_TOP.fixup_alt_jump
2554*0b459c2cSDavid du Colombier ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2555*0b459c2cSDavid du Colombier : 0;
2556*0b459c2cSDavid du Colombier laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2557*0b459c2cSDavid du Colombier this_group_regnum = COMPILE_STACK_TOP.regnum;
2558*0b459c2cSDavid du Colombier /* If we've reached MAX_REGNUM groups, then this open
2559*0b459c2cSDavid du Colombier won't actually generate any code, so we'll have to
2560*0b459c2cSDavid du Colombier clear pending_exact explicitly. */
2561*0b459c2cSDavid du Colombier pending_exact = 0;
2562*0b459c2cSDavid du Colombier
2563*0b459c2cSDavid du Colombier /* We're at the end of the group, so now we know how many
2564*0b459c2cSDavid du Colombier groups were inside this one. */
2565*0b459c2cSDavid du Colombier if (this_group_regnum <= MAX_REGNUM)
2566*0b459c2cSDavid du Colombier {
2567*0b459c2cSDavid du Colombier unsigned char *inner_group_loc
2568*0b459c2cSDavid du Colombier = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2569*0b459c2cSDavid du Colombier
2570*0b459c2cSDavid du Colombier *inner_group_loc = regnum - this_group_regnum;
2571*0b459c2cSDavid du Colombier BUF_PUSH_3 (stop_memory, this_group_regnum,
2572*0b459c2cSDavid du Colombier regnum - this_group_regnum);
2573*0b459c2cSDavid du Colombier }
2574*0b459c2cSDavid du Colombier }
2575*0b459c2cSDavid du Colombier break;
2576*0b459c2cSDavid du Colombier
2577*0b459c2cSDavid du Colombier
2578*0b459c2cSDavid du Colombier case '|': /* `\|'. */
2579*0b459c2cSDavid du Colombier if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2580*0b459c2cSDavid du Colombier goto normal_backslash;
2581*0b459c2cSDavid du Colombier handle_alt:
2582*0b459c2cSDavid du Colombier if (syntax & RE_LIMITED_OPS)
2583*0b459c2cSDavid du Colombier goto normal_char;
2584*0b459c2cSDavid du Colombier
2585*0b459c2cSDavid du Colombier /* Insert before the previous alternative a jump which
2586*0b459c2cSDavid du Colombier jumps to this alternative if the former fails. */
2587*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2588*0b459c2cSDavid du Colombier INSERT_JUMP (on_failure_jump, begalt, b + 6);
2589*0b459c2cSDavid du Colombier pending_exact = 0;
2590*0b459c2cSDavid du Colombier b += 3;
2591*0b459c2cSDavid du Colombier
2592*0b459c2cSDavid du Colombier /* The alternative before this one has a jump after it
2593*0b459c2cSDavid du Colombier which gets executed if it gets matched. Adjust that
2594*0b459c2cSDavid du Colombier jump so it will jump to this alternative's analogous
2595*0b459c2cSDavid du Colombier jump (put in below, which in turn will jump to the next
2596*0b459c2cSDavid du Colombier (if any) alternative's such jump, etc.). The last such
2597*0b459c2cSDavid du Colombier jump jumps to the correct final destination. A picture:
2598*0b459c2cSDavid du Colombier _____ _____
2599*0b459c2cSDavid du Colombier | | | |
2600*0b459c2cSDavid du Colombier | v | v
2601*0b459c2cSDavid du Colombier a | b | c
2602*0b459c2cSDavid du Colombier
2603*0b459c2cSDavid du Colombier If we are at `b', then fixup_alt_jump right now points to a
2604*0b459c2cSDavid du Colombier three-byte space after `a'. We'll put in the jump, set
2605*0b459c2cSDavid du Colombier fixup_alt_jump to right after `b', and leave behind three
2606*0b459c2cSDavid du Colombier bytes which we'll fill in when we get to after `c'. */
2607*0b459c2cSDavid du Colombier
2608*0b459c2cSDavid du Colombier if (fixup_alt_jump)
2609*0b459c2cSDavid du Colombier STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2610*0b459c2cSDavid du Colombier
2611*0b459c2cSDavid du Colombier /* Mark and leave space for a jump after this alternative,
2612*0b459c2cSDavid du Colombier to be filled in later either by next alternative or
2613*0b459c2cSDavid du Colombier when know we're at the end of a series of alternatives. */
2614*0b459c2cSDavid du Colombier fixup_alt_jump = b;
2615*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2616*0b459c2cSDavid du Colombier b += 3;
2617*0b459c2cSDavid du Colombier
2618*0b459c2cSDavid du Colombier laststart = 0;
2619*0b459c2cSDavid du Colombier begalt = b;
2620*0b459c2cSDavid du Colombier break;
2621*0b459c2cSDavid du Colombier
2622*0b459c2cSDavid du Colombier
2623*0b459c2cSDavid du Colombier case '{':
2624*0b459c2cSDavid du Colombier /* If \{ is a literal. */
2625*0b459c2cSDavid du Colombier if (!(syntax & RE_INTERVALS)
2626*0b459c2cSDavid du Colombier /* If we're at `\{' and it's not the open-interval
2627*0b459c2cSDavid du Colombier operator. */
2628*0b459c2cSDavid du Colombier || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2629*0b459c2cSDavid du Colombier || (p - 2 == pattern && p == pend))
2630*0b459c2cSDavid du Colombier goto normal_backslash;
2631*0b459c2cSDavid du Colombier
2632*0b459c2cSDavid du Colombier handle_interval:
2633*0b459c2cSDavid du Colombier {
2634*0b459c2cSDavid du Colombier /* If got here, then the syntax allows intervals. */
2635*0b459c2cSDavid du Colombier
2636*0b459c2cSDavid du Colombier /* At least (most) this many matches must be made. */
2637*0b459c2cSDavid du Colombier int lower_bound = -1, upper_bound = -1;
2638*0b459c2cSDavid du Colombier
2639*0b459c2cSDavid du Colombier beg_interval = p - 1;
2640*0b459c2cSDavid du Colombier
2641*0b459c2cSDavid du Colombier if (p == pend)
2642*0b459c2cSDavid du Colombier {
2643*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_BRACES)
2644*0b459c2cSDavid du Colombier goto unfetch_interval;
2645*0b459c2cSDavid du Colombier else
2646*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_EBRACE);
2647*0b459c2cSDavid du Colombier }
2648*0b459c2cSDavid du Colombier
2649*0b459c2cSDavid du Colombier GET_UNSIGNED_NUMBER (lower_bound);
2650*0b459c2cSDavid du Colombier
2651*0b459c2cSDavid du Colombier if (c == ',')
2652*0b459c2cSDavid du Colombier {
2653*0b459c2cSDavid du Colombier GET_UNSIGNED_NUMBER (upper_bound);
2654*0b459c2cSDavid du Colombier if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2655*0b459c2cSDavid du Colombier }
2656*0b459c2cSDavid du Colombier else
2657*0b459c2cSDavid du Colombier /* Interval such as `{1}' => match exactly once. */
2658*0b459c2cSDavid du Colombier upper_bound = lower_bound;
2659*0b459c2cSDavid du Colombier
2660*0b459c2cSDavid du Colombier if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2661*0b459c2cSDavid du Colombier || lower_bound > upper_bound)
2662*0b459c2cSDavid du Colombier {
2663*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_BRACES)
2664*0b459c2cSDavid du Colombier goto unfetch_interval;
2665*0b459c2cSDavid du Colombier else
2666*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_BADBR);
2667*0b459c2cSDavid du Colombier }
2668*0b459c2cSDavid du Colombier
2669*0b459c2cSDavid du Colombier if (!(syntax & RE_NO_BK_BRACES))
2670*0b459c2cSDavid du Colombier {
2671*0b459c2cSDavid du Colombier if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2672*0b459c2cSDavid du Colombier
2673*0b459c2cSDavid du Colombier PATFETCH (c);
2674*0b459c2cSDavid du Colombier }
2675*0b459c2cSDavid du Colombier
2676*0b459c2cSDavid du Colombier if (c != '}')
2677*0b459c2cSDavid du Colombier {
2678*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_BRACES)
2679*0b459c2cSDavid du Colombier goto unfetch_interval;
2680*0b459c2cSDavid du Colombier else
2681*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_BADBR);
2682*0b459c2cSDavid du Colombier }
2683*0b459c2cSDavid du Colombier
2684*0b459c2cSDavid du Colombier /* We just parsed a valid interval. */
2685*0b459c2cSDavid du Colombier
2686*0b459c2cSDavid du Colombier /* If it's invalid to have no preceding re. */
2687*0b459c2cSDavid du Colombier if (!laststart)
2688*0b459c2cSDavid du Colombier {
2689*0b459c2cSDavid du Colombier if (syntax & RE_CONTEXT_INVALID_OPS)
2690*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_BADRPT);
2691*0b459c2cSDavid du Colombier else if (syntax & RE_CONTEXT_INDEP_OPS)
2692*0b459c2cSDavid du Colombier laststart = b;
2693*0b459c2cSDavid du Colombier else
2694*0b459c2cSDavid du Colombier goto unfetch_interval;
2695*0b459c2cSDavid du Colombier }
2696*0b459c2cSDavid du Colombier
2697*0b459c2cSDavid du Colombier /* If the upper bound is zero, don't want to succeed at
2698*0b459c2cSDavid du Colombier all; jump from `laststart' to `b + 3', which will be
2699*0b459c2cSDavid du Colombier the end of the buffer after we insert the jump. */
2700*0b459c2cSDavid du Colombier if (upper_bound == 0)
2701*0b459c2cSDavid du Colombier {
2702*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (3);
2703*0b459c2cSDavid du Colombier INSERT_JUMP (jump, laststart, b + 3);
2704*0b459c2cSDavid du Colombier b += 3;
2705*0b459c2cSDavid du Colombier }
2706*0b459c2cSDavid du Colombier
2707*0b459c2cSDavid du Colombier /* Otherwise, we have a nontrivial interval. When
2708*0b459c2cSDavid du Colombier we're all done, the pattern will look like:
2709*0b459c2cSDavid du Colombier set_number_at <jump count> <upper bound>
2710*0b459c2cSDavid du Colombier set_number_at <succeed_n count> <lower bound>
2711*0b459c2cSDavid du Colombier succeed_n <after jump addr> <succeed_n count>
2712*0b459c2cSDavid du Colombier <body of loop>
2713*0b459c2cSDavid du Colombier jump_n <succeed_n addr> <jump count>
2714*0b459c2cSDavid du Colombier (The upper bound and `jump_n' are omitted if
2715*0b459c2cSDavid du Colombier `upper_bound' is 1, though.) */
2716*0b459c2cSDavid du Colombier else
2717*0b459c2cSDavid du Colombier { /* If the upper bound is > 1, we need to insert
2718*0b459c2cSDavid du Colombier more at the end of the loop. */
2719*0b459c2cSDavid du Colombier unsigned nbytes = 10 + (upper_bound > 1) * 10;
2720*0b459c2cSDavid du Colombier
2721*0b459c2cSDavid du Colombier GET_BUFFER_SPACE (nbytes);
2722*0b459c2cSDavid du Colombier
2723*0b459c2cSDavid du Colombier /* Initialize lower bound of the `succeed_n', even
2724*0b459c2cSDavid du Colombier though it will be set during matching by its
2725*0b459c2cSDavid du Colombier attendant `set_number_at' (inserted next),
2726*0b459c2cSDavid du Colombier because `re_compile_fastmap' needs to know.
2727*0b459c2cSDavid du Colombier Jump to the `jump_n' we might insert below. */
2728*0b459c2cSDavid du Colombier INSERT_JUMP2 (succeed_n, laststart,
2729*0b459c2cSDavid du Colombier b + 5 + (upper_bound > 1) * 5,
2730*0b459c2cSDavid du Colombier lower_bound);
2731*0b459c2cSDavid du Colombier b += 5;
2732*0b459c2cSDavid du Colombier
2733*0b459c2cSDavid du Colombier /* Code to initialize the lower bound. Insert
2734*0b459c2cSDavid du Colombier before the `succeed_n'. The `5' is the last two
2735*0b459c2cSDavid du Colombier bytes of this `set_number_at', plus 3 bytes of
2736*0b459c2cSDavid du Colombier the following `succeed_n'. */
2737*0b459c2cSDavid du Colombier insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2738*0b459c2cSDavid du Colombier b += 5;
2739*0b459c2cSDavid du Colombier
2740*0b459c2cSDavid du Colombier if (upper_bound > 1)
2741*0b459c2cSDavid du Colombier { /* More than one repetition is allowed, so
2742*0b459c2cSDavid du Colombier append a backward jump to the `succeed_n'
2743*0b459c2cSDavid du Colombier that starts this interval.
2744*0b459c2cSDavid du Colombier
2745*0b459c2cSDavid du Colombier When we've reached this during matching,
2746*0b459c2cSDavid du Colombier we'll have matched the interval once, so
2747*0b459c2cSDavid du Colombier jump back only `upper_bound - 1' times. */
2748*0b459c2cSDavid du Colombier STORE_JUMP2 (jump_n, b, laststart + 5,
2749*0b459c2cSDavid du Colombier upper_bound - 1);
2750*0b459c2cSDavid du Colombier b += 5;
2751*0b459c2cSDavid du Colombier
2752*0b459c2cSDavid du Colombier /* The location we want to set is the second
2753*0b459c2cSDavid du Colombier parameter of the `jump_n'; that is `b-2' as
2754*0b459c2cSDavid du Colombier an absolute address. `laststart' will be
2755*0b459c2cSDavid du Colombier the `set_number_at' we're about to insert;
2756*0b459c2cSDavid du Colombier `laststart+3' the number to set, the source
2757*0b459c2cSDavid du Colombier for the relative address. But we are
2758*0b459c2cSDavid du Colombier inserting into the middle of the pattern --
2759*0b459c2cSDavid du Colombier so everything is getting moved up by 5.
2760*0b459c2cSDavid du Colombier Conclusion: (b - 2) - (laststart + 3) + 5,
2761*0b459c2cSDavid du Colombier i.e., b - laststart.
2762*0b459c2cSDavid du Colombier
2763*0b459c2cSDavid du Colombier We insert this at the beginning of the loop
2764*0b459c2cSDavid du Colombier so that if we fail during matching, we'll
2765*0b459c2cSDavid du Colombier reinitialize the bounds. */
2766*0b459c2cSDavid du Colombier insert_op2 (set_number_at, laststart, b - laststart,
2767*0b459c2cSDavid du Colombier upper_bound - 1, b);
2768*0b459c2cSDavid du Colombier b += 5;
2769*0b459c2cSDavid du Colombier }
2770*0b459c2cSDavid du Colombier }
2771*0b459c2cSDavid du Colombier pending_exact = 0;
2772*0b459c2cSDavid du Colombier beg_interval = NULL;
2773*0b459c2cSDavid du Colombier }
2774*0b459c2cSDavid du Colombier break;
2775*0b459c2cSDavid du Colombier
2776*0b459c2cSDavid du Colombier unfetch_interval:
2777*0b459c2cSDavid du Colombier /* If an invalid interval, match the characters as literals. */
2778*0b459c2cSDavid du Colombier assert (beg_interval);
2779*0b459c2cSDavid du Colombier p = beg_interval;
2780*0b459c2cSDavid du Colombier beg_interval = NULL;
2781*0b459c2cSDavid du Colombier
2782*0b459c2cSDavid du Colombier /* normal_char and normal_backslash need `c'. */
2783*0b459c2cSDavid du Colombier PATFETCH (c);
2784*0b459c2cSDavid du Colombier
2785*0b459c2cSDavid du Colombier if (!(syntax & RE_NO_BK_BRACES))
2786*0b459c2cSDavid du Colombier {
2787*0b459c2cSDavid du Colombier if (p > pattern && p[-1] == '\\')
2788*0b459c2cSDavid du Colombier goto normal_backslash;
2789*0b459c2cSDavid du Colombier }
2790*0b459c2cSDavid du Colombier goto normal_char;
2791*0b459c2cSDavid du Colombier
2792*0b459c2cSDavid du Colombier #ifdef emacs
2793*0b459c2cSDavid du Colombier /* There is no way to specify the before_dot and after_dot
2794*0b459c2cSDavid du Colombier operators. rms says this is ok. --karl */
2795*0b459c2cSDavid du Colombier case '=':
2796*0b459c2cSDavid du Colombier BUF_PUSH (at_dot);
2797*0b459c2cSDavid du Colombier break;
2798*0b459c2cSDavid du Colombier
2799*0b459c2cSDavid du Colombier case 's':
2800*0b459c2cSDavid du Colombier laststart = b;
2801*0b459c2cSDavid du Colombier PATFETCH (c);
2802*0b459c2cSDavid du Colombier BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2803*0b459c2cSDavid du Colombier break;
2804*0b459c2cSDavid du Colombier
2805*0b459c2cSDavid du Colombier case 'S':
2806*0b459c2cSDavid du Colombier laststart = b;
2807*0b459c2cSDavid du Colombier PATFETCH (c);
2808*0b459c2cSDavid du Colombier BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2809*0b459c2cSDavid du Colombier break;
2810*0b459c2cSDavid du Colombier
2811*0b459c2cSDavid du Colombier case 'c':
2812*0b459c2cSDavid du Colombier laststart = b;
2813*0b459c2cSDavid du Colombier PATFETCH_RAW (c);
2814*0b459c2cSDavid du Colombier BUF_PUSH_2 (categoryspec, c);
2815*0b459c2cSDavid du Colombier break;
2816*0b459c2cSDavid du Colombier
2817*0b459c2cSDavid du Colombier case 'C':
2818*0b459c2cSDavid du Colombier laststart = b;
2819*0b459c2cSDavid du Colombier PATFETCH_RAW (c);
2820*0b459c2cSDavid du Colombier BUF_PUSH_2 (notcategoryspec, c);
2821*0b459c2cSDavid du Colombier break;
2822*0b459c2cSDavid du Colombier #endif /* emacs */
2823*0b459c2cSDavid du Colombier
2824*0b459c2cSDavid du Colombier
2825*0b459c2cSDavid du Colombier case 'w':
2826*0b459c2cSDavid du Colombier laststart = b;
2827*0b459c2cSDavid du Colombier BUF_PUSH (wordchar);
2828*0b459c2cSDavid du Colombier break;
2829*0b459c2cSDavid du Colombier
2830*0b459c2cSDavid du Colombier
2831*0b459c2cSDavid du Colombier case 'W':
2832*0b459c2cSDavid du Colombier laststart = b;
2833*0b459c2cSDavid du Colombier BUF_PUSH (notwordchar);
2834*0b459c2cSDavid du Colombier break;
2835*0b459c2cSDavid du Colombier
2836*0b459c2cSDavid du Colombier
2837*0b459c2cSDavid du Colombier case '<':
2838*0b459c2cSDavid du Colombier BUF_PUSH (wordbeg);
2839*0b459c2cSDavid du Colombier break;
2840*0b459c2cSDavid du Colombier
2841*0b459c2cSDavid du Colombier case '>':
2842*0b459c2cSDavid du Colombier BUF_PUSH (wordend);
2843*0b459c2cSDavid du Colombier break;
2844*0b459c2cSDavid du Colombier
2845*0b459c2cSDavid du Colombier case 'b':
2846*0b459c2cSDavid du Colombier BUF_PUSH (wordbound);
2847*0b459c2cSDavid du Colombier break;
2848*0b459c2cSDavid du Colombier
2849*0b459c2cSDavid du Colombier case 'B':
2850*0b459c2cSDavid du Colombier BUF_PUSH (notwordbound);
2851*0b459c2cSDavid du Colombier break;
2852*0b459c2cSDavid du Colombier
2853*0b459c2cSDavid du Colombier case '`':
2854*0b459c2cSDavid du Colombier BUF_PUSH (begbuf);
2855*0b459c2cSDavid du Colombier break;
2856*0b459c2cSDavid du Colombier
2857*0b459c2cSDavid du Colombier case '\'':
2858*0b459c2cSDavid du Colombier BUF_PUSH (endbuf);
2859*0b459c2cSDavid du Colombier break;
2860*0b459c2cSDavid du Colombier
2861*0b459c2cSDavid du Colombier case '1': case '2': case '3': case '4': case '5':
2862*0b459c2cSDavid du Colombier case '6': case '7': case '8': case '9':
2863*0b459c2cSDavid du Colombier if (syntax & RE_NO_BK_REFS)
2864*0b459c2cSDavid du Colombier goto normal_char;
2865*0b459c2cSDavid du Colombier
2866*0b459c2cSDavid du Colombier c1 = c - '0';
2867*0b459c2cSDavid du Colombier
2868*0b459c2cSDavid du Colombier if (c1 > regnum)
2869*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_ESUBREG);
2870*0b459c2cSDavid du Colombier
2871*0b459c2cSDavid du Colombier /* Can't back reference to a subexpression if inside of it. */
2872*0b459c2cSDavid du Colombier if (group_in_compile_stack (compile_stack, c1))
2873*0b459c2cSDavid du Colombier goto normal_char;
2874*0b459c2cSDavid du Colombier
2875*0b459c2cSDavid du Colombier laststart = b;
2876*0b459c2cSDavid du Colombier BUF_PUSH_2 (duplicate, c1);
2877*0b459c2cSDavid du Colombier break;
2878*0b459c2cSDavid du Colombier
2879*0b459c2cSDavid du Colombier
2880*0b459c2cSDavid du Colombier case '+':
2881*0b459c2cSDavid du Colombier case '?':
2882*0b459c2cSDavid du Colombier if (syntax & RE_BK_PLUS_QM)
2883*0b459c2cSDavid du Colombier goto handle_plus;
2884*0b459c2cSDavid du Colombier else
2885*0b459c2cSDavid du Colombier goto normal_backslash;
2886*0b459c2cSDavid du Colombier
2887*0b459c2cSDavid du Colombier default:
2888*0b459c2cSDavid du Colombier normal_backslash:
2889*0b459c2cSDavid du Colombier /* You might think it would be useful for \ to mean
2890*0b459c2cSDavid du Colombier not to translate; but if we don't translate it
2891*0b459c2cSDavid du Colombier it will never match anything. */
2892*0b459c2cSDavid du Colombier c = TRANSLATE (c);
2893*0b459c2cSDavid du Colombier goto normal_char;
2894*0b459c2cSDavid du Colombier }
2895*0b459c2cSDavid du Colombier break;
2896*0b459c2cSDavid du Colombier
2897*0b459c2cSDavid du Colombier
2898*0b459c2cSDavid du Colombier default:
2899*0b459c2cSDavid du Colombier /* Expects the character in `c'. */
2900*0b459c2cSDavid du Colombier normal_char:
2901*0b459c2cSDavid du Colombier p1 = p - 1; /* P1 points the head of C. */
2902*0b459c2cSDavid du Colombier #ifdef emacs
2903*0b459c2cSDavid du Colombier if (bufp->multibyte)
2904*0b459c2cSDavid du Colombier {
2905*0b459c2cSDavid du Colombier c = STRING_CHAR (p1, pend - p1);
2906*0b459c2cSDavid du Colombier c = TRANSLATE (c);
2907*0b459c2cSDavid du Colombier /* Set P to the next character boundary. */
2908*0b459c2cSDavid du Colombier p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
2909*0b459c2cSDavid du Colombier }
2910*0b459c2cSDavid du Colombier #endif
2911*0b459c2cSDavid du Colombier /* If no exactn currently being built. */
2912*0b459c2cSDavid du Colombier if (!pending_exact
2913*0b459c2cSDavid du Colombier
2914*0b459c2cSDavid du Colombier /* If last exactn not at current position. */
2915*0b459c2cSDavid du Colombier || pending_exact + *pending_exact + 1 != b
2916*0b459c2cSDavid du Colombier
2917*0b459c2cSDavid du Colombier /* We have only one byte following the exactn for the count. */
2918*0b459c2cSDavid du Colombier || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
2919*0b459c2cSDavid du Colombier
2920*0b459c2cSDavid du Colombier /* If followed by a repetition operator. */
2921*0b459c2cSDavid du Colombier || (p != pend && (*p == '*' || *p == '^'))
2922*0b459c2cSDavid du Colombier || ((syntax & RE_BK_PLUS_QM)
2923*0b459c2cSDavid du Colombier ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
2924*0b459c2cSDavid du Colombier : p != pend && (*p == '+' || *p == '?'))
2925*0b459c2cSDavid du Colombier || ((syntax & RE_INTERVALS)
2926*0b459c2cSDavid du Colombier && ((syntax & RE_NO_BK_BRACES)
2927*0b459c2cSDavid du Colombier ? p != pend && *p == '{'
2928*0b459c2cSDavid du Colombier : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
2929*0b459c2cSDavid du Colombier {
2930*0b459c2cSDavid du Colombier /* Start building a new exactn. */
2931*0b459c2cSDavid du Colombier
2932*0b459c2cSDavid du Colombier laststart = b;
2933*0b459c2cSDavid du Colombier
2934*0b459c2cSDavid du Colombier BUF_PUSH_2 (exactn, 0);
2935*0b459c2cSDavid du Colombier pending_exact = b - 1;
2936*0b459c2cSDavid du Colombier }
2937*0b459c2cSDavid du Colombier
2938*0b459c2cSDavid du Colombier #ifdef emacs
2939*0b459c2cSDavid du Colombier if (! SINGLE_BYTE_CHAR_P (c))
2940*0b459c2cSDavid du Colombier {
2941*0b459c2cSDavid du Colombier unsigned char work[4], *str;
2942*0b459c2cSDavid du Colombier int i = CHAR_STRING (c, work, str);
2943*0b459c2cSDavid du Colombier int j;
2944*0b459c2cSDavid du Colombier for (j = 0; j < i; j++)
2945*0b459c2cSDavid du Colombier {
2946*0b459c2cSDavid du Colombier BUF_PUSH (str[j]);
2947*0b459c2cSDavid du Colombier (*pending_exact)++;
2948*0b459c2cSDavid du Colombier }
2949*0b459c2cSDavid du Colombier }
2950*0b459c2cSDavid du Colombier else
2951*0b459c2cSDavid du Colombier #endif
2952*0b459c2cSDavid du Colombier {
2953*0b459c2cSDavid du Colombier BUF_PUSH (c);
2954*0b459c2cSDavid du Colombier (*pending_exact)++;
2955*0b459c2cSDavid du Colombier }
2956*0b459c2cSDavid du Colombier break;
2957*0b459c2cSDavid du Colombier } /* switch (c) */
2958*0b459c2cSDavid du Colombier } /* while p != pend */
2959*0b459c2cSDavid du Colombier
2960*0b459c2cSDavid du Colombier
2961*0b459c2cSDavid du Colombier /* Through the pattern now. */
2962*0b459c2cSDavid du Colombier
2963*0b459c2cSDavid du Colombier if (fixup_alt_jump)
2964*0b459c2cSDavid du Colombier STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2965*0b459c2cSDavid du Colombier
2966*0b459c2cSDavid du Colombier if (!COMPILE_STACK_EMPTY)
2967*0b459c2cSDavid du Colombier FREE_STACK_RETURN (REG_EPAREN);
2968*0b459c2cSDavid du Colombier
2969*0b459c2cSDavid du Colombier /* If we don't want backtracking, force success
2970*0b459c2cSDavid du Colombier the first time we reach the end of the compiled pattern. */
2971*0b459c2cSDavid du Colombier if (syntax & RE_NO_POSIX_BACKTRACKING)
2972*0b459c2cSDavid du Colombier BUF_PUSH (succeed);
2973*0b459c2cSDavid du Colombier
2974*0b459c2cSDavid du Colombier free (compile_stack.stack);
2975*0b459c2cSDavid du Colombier
2976*0b459c2cSDavid du Colombier /* We have succeeded; set the length of the buffer. */
2977*0b459c2cSDavid du Colombier bufp->used = b - bufp->buffer;
2978*0b459c2cSDavid du Colombier
2979*0b459c2cSDavid du Colombier #ifdef DEBUG
2980*0b459c2cSDavid du Colombier if (debug)
2981*0b459c2cSDavid du Colombier {
2982*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\nCompiled pattern: \n");
2983*0b459c2cSDavid du Colombier print_compiled_pattern (bufp);
2984*0b459c2cSDavid du Colombier }
2985*0b459c2cSDavid du Colombier #endif /* DEBUG */
2986*0b459c2cSDavid du Colombier
2987*0b459c2cSDavid du Colombier #ifndef MATCH_MAY_ALLOCATE
2988*0b459c2cSDavid du Colombier /* Initialize the failure stack to the largest possible stack. This
2989*0b459c2cSDavid du Colombier isn't necessary unless we're trying to avoid calling alloca in
2990*0b459c2cSDavid du Colombier the search and match routines. */
2991*0b459c2cSDavid du Colombier {
2992*0b459c2cSDavid du Colombier int num_regs = bufp->re_nsub + 1;
2993*0b459c2cSDavid du Colombier
2994*0b459c2cSDavid du Colombier if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
2995*0b459c2cSDavid du Colombier {
2996*0b459c2cSDavid du Colombier fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
2997*0b459c2cSDavid du Colombier
2998*0b459c2cSDavid du Colombier #ifdef emacs
2999*0b459c2cSDavid du Colombier if (! fail_stack.stack)
3000*0b459c2cSDavid du Colombier fail_stack.stack
3001*0b459c2cSDavid du Colombier = (fail_stack_elt_t *) xmalloc (fail_stack.size
3002*0b459c2cSDavid du Colombier * sizeof (fail_stack_elt_t));
3003*0b459c2cSDavid du Colombier else
3004*0b459c2cSDavid du Colombier fail_stack.stack
3005*0b459c2cSDavid du Colombier = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3006*0b459c2cSDavid du Colombier (fail_stack.size
3007*0b459c2cSDavid du Colombier * sizeof (fail_stack_elt_t)));
3008*0b459c2cSDavid du Colombier #else /* not emacs */
3009*0b459c2cSDavid du Colombier if (! fail_stack.stack)
3010*0b459c2cSDavid du Colombier fail_stack.stack
3011*0b459c2cSDavid du Colombier = (fail_stack_elt_t *) malloc (fail_stack.size
3012*0b459c2cSDavid du Colombier * sizeof (fail_stack_elt_t));
3013*0b459c2cSDavid du Colombier else
3014*0b459c2cSDavid du Colombier fail_stack.stack
3015*0b459c2cSDavid du Colombier = (fail_stack_elt_t *) realloc (fail_stack.stack,
3016*0b459c2cSDavid du Colombier (fail_stack.size
3017*0b459c2cSDavid du Colombier * sizeof (fail_stack_elt_t)));
3018*0b459c2cSDavid du Colombier #endif /* not emacs */
3019*0b459c2cSDavid du Colombier }
3020*0b459c2cSDavid du Colombier
3021*0b459c2cSDavid du Colombier regex_grow_registers (num_regs);
3022*0b459c2cSDavid du Colombier }
3023*0b459c2cSDavid du Colombier #endif /* not MATCH_MAY_ALLOCATE */
3024*0b459c2cSDavid du Colombier
3025*0b459c2cSDavid du Colombier return REG_NOERROR;
3026*0b459c2cSDavid du Colombier } /* regex_compile */
3027*0b459c2cSDavid du Colombier
3028*0b459c2cSDavid du Colombier /* Subroutines for `regex_compile'. */
3029*0b459c2cSDavid du Colombier
3030*0b459c2cSDavid du Colombier /* Store OP at LOC followed by two-byte integer parameter ARG. */
3031*0b459c2cSDavid du Colombier
3032*0b459c2cSDavid du Colombier static void
store_op1(op,loc,arg)3033*0b459c2cSDavid du Colombier store_op1 (op, loc, arg)
3034*0b459c2cSDavid du Colombier re_opcode_t op;
3035*0b459c2cSDavid du Colombier unsigned char *loc;
3036*0b459c2cSDavid du Colombier int arg;
3037*0b459c2cSDavid du Colombier {
3038*0b459c2cSDavid du Colombier *loc = (unsigned char) op;
3039*0b459c2cSDavid du Colombier STORE_NUMBER (loc + 1, arg);
3040*0b459c2cSDavid du Colombier }
3041*0b459c2cSDavid du Colombier
3042*0b459c2cSDavid du Colombier
3043*0b459c2cSDavid du Colombier /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
3044*0b459c2cSDavid du Colombier
3045*0b459c2cSDavid du Colombier static void
store_op2(op,loc,arg1,arg2)3046*0b459c2cSDavid du Colombier store_op2 (op, loc, arg1, arg2)
3047*0b459c2cSDavid du Colombier re_opcode_t op;
3048*0b459c2cSDavid du Colombier unsigned char *loc;
3049*0b459c2cSDavid du Colombier int arg1, arg2;
3050*0b459c2cSDavid du Colombier {
3051*0b459c2cSDavid du Colombier *loc = (unsigned char) op;
3052*0b459c2cSDavid du Colombier STORE_NUMBER (loc + 1, arg1);
3053*0b459c2cSDavid du Colombier STORE_NUMBER (loc + 3, arg2);
3054*0b459c2cSDavid du Colombier }
3055*0b459c2cSDavid du Colombier
3056*0b459c2cSDavid du Colombier
3057*0b459c2cSDavid du Colombier /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3058*0b459c2cSDavid du Colombier for OP followed by two-byte integer parameter ARG. */
3059*0b459c2cSDavid du Colombier
3060*0b459c2cSDavid du Colombier static void
insert_op1(op,loc,arg,end)3061*0b459c2cSDavid du Colombier insert_op1 (op, loc, arg, end)
3062*0b459c2cSDavid du Colombier re_opcode_t op;
3063*0b459c2cSDavid du Colombier unsigned char *loc;
3064*0b459c2cSDavid du Colombier int arg;
3065*0b459c2cSDavid du Colombier unsigned char *end;
3066*0b459c2cSDavid du Colombier {
3067*0b459c2cSDavid du Colombier register unsigned char *pfrom = end;
3068*0b459c2cSDavid du Colombier register unsigned char *pto = end + 3;
3069*0b459c2cSDavid du Colombier
3070*0b459c2cSDavid du Colombier while (pfrom != loc)
3071*0b459c2cSDavid du Colombier *--pto = *--pfrom;
3072*0b459c2cSDavid du Colombier
3073*0b459c2cSDavid du Colombier store_op1 (op, loc, arg);
3074*0b459c2cSDavid du Colombier }
3075*0b459c2cSDavid du Colombier
3076*0b459c2cSDavid du Colombier
3077*0b459c2cSDavid du Colombier /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3078*0b459c2cSDavid du Colombier
3079*0b459c2cSDavid du Colombier static void
insert_op2(op,loc,arg1,arg2,end)3080*0b459c2cSDavid du Colombier insert_op2 (op, loc, arg1, arg2, end)
3081*0b459c2cSDavid du Colombier re_opcode_t op;
3082*0b459c2cSDavid du Colombier unsigned char *loc;
3083*0b459c2cSDavid du Colombier int arg1, arg2;
3084*0b459c2cSDavid du Colombier unsigned char *end;
3085*0b459c2cSDavid du Colombier {
3086*0b459c2cSDavid du Colombier register unsigned char *pfrom = end;
3087*0b459c2cSDavid du Colombier register unsigned char *pto = end + 5;
3088*0b459c2cSDavid du Colombier
3089*0b459c2cSDavid du Colombier while (pfrom != loc)
3090*0b459c2cSDavid du Colombier *--pto = *--pfrom;
3091*0b459c2cSDavid du Colombier
3092*0b459c2cSDavid du Colombier store_op2 (op, loc, arg1, arg2);
3093*0b459c2cSDavid du Colombier }
3094*0b459c2cSDavid du Colombier
3095*0b459c2cSDavid du Colombier
3096*0b459c2cSDavid du Colombier /* P points to just after a ^ in PATTERN. Return true if that ^ comes
3097*0b459c2cSDavid du Colombier after an alternative or a begin-subexpression. We assume there is at
3098*0b459c2cSDavid du Colombier least one character before the ^. */
3099*0b459c2cSDavid du Colombier
3100*0b459c2cSDavid du Colombier static boolean
at_begline_loc_p(pattern,p,syntax)3101*0b459c2cSDavid du Colombier at_begline_loc_p (pattern, p, syntax)
3102*0b459c2cSDavid du Colombier const char *pattern, *p;
3103*0b459c2cSDavid du Colombier reg_syntax_t syntax;
3104*0b459c2cSDavid du Colombier {
3105*0b459c2cSDavid du Colombier const char *prev = p - 2;
3106*0b459c2cSDavid du Colombier boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3107*0b459c2cSDavid du Colombier
3108*0b459c2cSDavid du Colombier return
3109*0b459c2cSDavid du Colombier /* After a subexpression? */
3110*0b459c2cSDavid du Colombier (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3111*0b459c2cSDavid du Colombier /* After an alternative? */
3112*0b459c2cSDavid du Colombier || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3113*0b459c2cSDavid du Colombier }
3114*0b459c2cSDavid du Colombier
3115*0b459c2cSDavid du Colombier
3116*0b459c2cSDavid du Colombier /* The dual of at_begline_loc_p. This one is for $. We assume there is
3117*0b459c2cSDavid du Colombier at least one character after the $, i.e., `P < PEND'. */
3118*0b459c2cSDavid du Colombier
3119*0b459c2cSDavid du Colombier static boolean
at_endline_loc_p(p,pend,syntax)3120*0b459c2cSDavid du Colombier at_endline_loc_p (p, pend, syntax)
3121*0b459c2cSDavid du Colombier const char *p, *pend;
3122*0b459c2cSDavid du Colombier int syntax;
3123*0b459c2cSDavid du Colombier {
3124*0b459c2cSDavid du Colombier const char *next = p;
3125*0b459c2cSDavid du Colombier boolean next_backslash = *next == '\\';
3126*0b459c2cSDavid du Colombier const char *next_next = p + 1 < pend ? p + 1 : 0;
3127*0b459c2cSDavid du Colombier
3128*0b459c2cSDavid du Colombier return
3129*0b459c2cSDavid du Colombier /* Before a subexpression? */
3130*0b459c2cSDavid du Colombier (syntax & RE_NO_BK_PARENS ? *next == ')'
3131*0b459c2cSDavid du Colombier : next_backslash && next_next && *next_next == ')')
3132*0b459c2cSDavid du Colombier /* Before an alternative? */
3133*0b459c2cSDavid du Colombier || (syntax & RE_NO_BK_VBAR ? *next == '|'
3134*0b459c2cSDavid du Colombier : next_backslash && next_next && *next_next == '|');
3135*0b459c2cSDavid du Colombier }
3136*0b459c2cSDavid du Colombier
3137*0b459c2cSDavid du Colombier
3138*0b459c2cSDavid du Colombier /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3139*0b459c2cSDavid du Colombier false if it's not. */
3140*0b459c2cSDavid du Colombier
3141*0b459c2cSDavid du Colombier static boolean
group_in_compile_stack(compile_stack,regnum)3142*0b459c2cSDavid du Colombier group_in_compile_stack (compile_stack, regnum)
3143*0b459c2cSDavid du Colombier compile_stack_type compile_stack;
3144*0b459c2cSDavid du Colombier regnum_t regnum;
3145*0b459c2cSDavid du Colombier {
3146*0b459c2cSDavid du Colombier int this_element;
3147*0b459c2cSDavid du Colombier
3148*0b459c2cSDavid du Colombier for (this_element = compile_stack.avail - 1;
3149*0b459c2cSDavid du Colombier this_element >= 0;
3150*0b459c2cSDavid du Colombier this_element--)
3151*0b459c2cSDavid du Colombier if (compile_stack.stack[this_element].regnum == regnum)
3152*0b459c2cSDavid du Colombier return true;
3153*0b459c2cSDavid du Colombier
3154*0b459c2cSDavid du Colombier return false;
3155*0b459c2cSDavid du Colombier }
3156*0b459c2cSDavid du Colombier
3157*0b459c2cSDavid du Colombier /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3158*0b459c2cSDavid du Colombier BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
3159*0b459c2cSDavid du Colombier characters can start a string that matches the pattern. This fastmap
3160*0b459c2cSDavid du Colombier is used by re_search to skip quickly over impossible starting points.
3161*0b459c2cSDavid du Colombier
3162*0b459c2cSDavid du Colombier The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3163*0b459c2cSDavid du Colombier area as BUFP->fastmap.
3164*0b459c2cSDavid du Colombier
3165*0b459c2cSDavid du Colombier We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3166*0b459c2cSDavid du Colombier the pattern buffer.
3167*0b459c2cSDavid du Colombier
3168*0b459c2cSDavid du Colombier Returns 0 if we succeed, -2 if an internal error. */
3169*0b459c2cSDavid du Colombier
3170*0b459c2cSDavid du Colombier int
re_compile_fastmap(bufp)3171*0b459c2cSDavid du Colombier re_compile_fastmap (bufp)
3172*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
3173*0b459c2cSDavid du Colombier {
3174*0b459c2cSDavid du Colombier int i, j, k;
3175*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE
3176*0b459c2cSDavid du Colombier fail_stack_type fail_stack;
3177*0b459c2cSDavid du Colombier #endif
3178*0b459c2cSDavid du Colombier #ifndef REGEX_MALLOC
3179*0b459c2cSDavid du Colombier char *destination;
3180*0b459c2cSDavid du Colombier #endif
3181*0b459c2cSDavid du Colombier /* We don't push any register information onto the failure stack. */
3182*0b459c2cSDavid du Colombier unsigned num_regs = 0;
3183*0b459c2cSDavid du Colombier
3184*0b459c2cSDavid du Colombier register char *fastmap = bufp->fastmap;
3185*0b459c2cSDavid du Colombier unsigned char *pattern = bufp->buffer;
3186*0b459c2cSDavid du Colombier unsigned long size = bufp->used;
3187*0b459c2cSDavid du Colombier unsigned char *p = pattern;
3188*0b459c2cSDavid du Colombier register unsigned char *pend = pattern + size;
3189*0b459c2cSDavid du Colombier
3190*0b459c2cSDavid du Colombier /* This holds the pointer to the failure stack, when
3191*0b459c2cSDavid du Colombier it is allocated relocatably. */
3192*0b459c2cSDavid du Colombier fail_stack_elt_t *failure_stack_ptr;
3193*0b459c2cSDavid du Colombier
3194*0b459c2cSDavid du Colombier /* Assume that each path through the pattern can be null until
3195*0b459c2cSDavid du Colombier proven otherwise. We set this false at the bottom of switch
3196*0b459c2cSDavid du Colombier statement, to which we get only if a particular path doesn't
3197*0b459c2cSDavid du Colombier match the empty string. */
3198*0b459c2cSDavid du Colombier boolean path_can_be_null = true;
3199*0b459c2cSDavid du Colombier
3200*0b459c2cSDavid du Colombier /* We aren't doing a `succeed_n' to begin with. */
3201*0b459c2cSDavid du Colombier boolean succeed_n_p = false;
3202*0b459c2cSDavid du Colombier
3203*0b459c2cSDavid du Colombier /* If all elements for base leading-codes in fastmap is set, this
3204*0b459c2cSDavid du Colombier flag is set true. */
3205*0b459c2cSDavid du Colombier boolean match_any_multibyte_characters = false;
3206*0b459c2cSDavid du Colombier
3207*0b459c2cSDavid du Colombier /* Maximum code of simple (single byte) character. */
3208*0b459c2cSDavid du Colombier int simple_char_max;
3209*0b459c2cSDavid du Colombier
3210*0b459c2cSDavid du Colombier assert (fastmap != NULL && p != NULL);
3211*0b459c2cSDavid du Colombier
3212*0b459c2cSDavid du Colombier INIT_FAIL_STACK ();
3213*0b459c2cSDavid du Colombier bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3214*0b459c2cSDavid du Colombier bufp->fastmap_accurate = 1; /* It will be when we're done. */
3215*0b459c2cSDavid du Colombier bufp->can_be_null = 0;
3216*0b459c2cSDavid du Colombier
3217*0b459c2cSDavid du Colombier while (1)
3218*0b459c2cSDavid du Colombier {
3219*0b459c2cSDavid du Colombier if (p == pend || *p == succeed)
3220*0b459c2cSDavid du Colombier {
3221*0b459c2cSDavid du Colombier /* We have reached the (effective) end of pattern. */
3222*0b459c2cSDavid du Colombier if (!FAIL_STACK_EMPTY ())
3223*0b459c2cSDavid du Colombier {
3224*0b459c2cSDavid du Colombier bufp->can_be_null |= path_can_be_null;
3225*0b459c2cSDavid du Colombier
3226*0b459c2cSDavid du Colombier /* Reset for next path. */
3227*0b459c2cSDavid du Colombier path_can_be_null = true;
3228*0b459c2cSDavid du Colombier
3229*0b459c2cSDavid du Colombier p = fail_stack.stack[--fail_stack.avail].pointer;
3230*0b459c2cSDavid du Colombier
3231*0b459c2cSDavid du Colombier continue;
3232*0b459c2cSDavid du Colombier }
3233*0b459c2cSDavid du Colombier else
3234*0b459c2cSDavid du Colombier break;
3235*0b459c2cSDavid du Colombier }
3236*0b459c2cSDavid du Colombier
3237*0b459c2cSDavid du Colombier /* We should never be about to go beyond the end of the pattern. */
3238*0b459c2cSDavid du Colombier assert (p < pend);
3239*0b459c2cSDavid du Colombier
3240*0b459c2cSDavid du Colombier switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3241*0b459c2cSDavid du Colombier {
3242*0b459c2cSDavid du Colombier
3243*0b459c2cSDavid du Colombier /* I guess the idea here is to simply not bother with a fastmap
3244*0b459c2cSDavid du Colombier if a backreference is used, since it's too hard to figure out
3245*0b459c2cSDavid du Colombier the fastmap for the corresponding group. Setting
3246*0b459c2cSDavid du Colombier `can_be_null' stops `re_search_2' from using the fastmap, so
3247*0b459c2cSDavid du Colombier that is all we do. */
3248*0b459c2cSDavid du Colombier case duplicate:
3249*0b459c2cSDavid du Colombier bufp->can_be_null = 1;
3250*0b459c2cSDavid du Colombier goto done;
3251*0b459c2cSDavid du Colombier
3252*0b459c2cSDavid du Colombier
3253*0b459c2cSDavid du Colombier /* Following are the cases which match a character. These end
3254*0b459c2cSDavid du Colombier with `break'. */
3255*0b459c2cSDavid du Colombier
3256*0b459c2cSDavid du Colombier case exactn:
3257*0b459c2cSDavid du Colombier fastmap[p[1]] = 1;
3258*0b459c2cSDavid du Colombier break;
3259*0b459c2cSDavid du Colombier
3260*0b459c2cSDavid du Colombier
3261*0b459c2cSDavid du Colombier #ifndef emacs
3262*0b459c2cSDavid du Colombier case charset:
3263*0b459c2cSDavid du Colombier for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3264*0b459c2cSDavid du Colombier if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3265*0b459c2cSDavid du Colombier fastmap[j] = 1;
3266*0b459c2cSDavid du Colombier break;
3267*0b459c2cSDavid du Colombier
3268*0b459c2cSDavid du Colombier
3269*0b459c2cSDavid du Colombier case charset_not:
3270*0b459c2cSDavid du Colombier /* Chars beyond end of map must be allowed. */
3271*0b459c2cSDavid du Colombier for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3272*0b459c2cSDavid du Colombier fastmap[j] = 1;
3273*0b459c2cSDavid du Colombier
3274*0b459c2cSDavid du Colombier for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3275*0b459c2cSDavid du Colombier if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3276*0b459c2cSDavid du Colombier fastmap[j] = 1;
3277*0b459c2cSDavid du Colombier break;
3278*0b459c2cSDavid du Colombier
3279*0b459c2cSDavid du Colombier
3280*0b459c2cSDavid du Colombier case wordchar:
3281*0b459c2cSDavid du Colombier for (j = 0; j < (1 << BYTEWIDTH); j++)
3282*0b459c2cSDavid du Colombier if (SYNTAX (j) == Sword)
3283*0b459c2cSDavid du Colombier fastmap[j] = 1;
3284*0b459c2cSDavid du Colombier break;
3285*0b459c2cSDavid du Colombier
3286*0b459c2cSDavid du Colombier
3287*0b459c2cSDavid du Colombier case notwordchar:
3288*0b459c2cSDavid du Colombier for (j = 0; j < (1 << BYTEWIDTH); j++)
3289*0b459c2cSDavid du Colombier if (SYNTAX (j) != Sword)
3290*0b459c2cSDavid du Colombier fastmap[j] = 1;
3291*0b459c2cSDavid du Colombier break;
3292*0b459c2cSDavid du Colombier #else /* emacs */
3293*0b459c2cSDavid du Colombier case charset:
3294*0b459c2cSDavid du Colombier for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3295*0b459c2cSDavid du Colombier j >= 0; j--)
3296*0b459c2cSDavid du Colombier if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3297*0b459c2cSDavid du Colombier fastmap[j] = 1;
3298*0b459c2cSDavid du Colombier
3299*0b459c2cSDavid du Colombier if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3300*0b459c2cSDavid du Colombier && match_any_multibyte_characters == false)
3301*0b459c2cSDavid du Colombier {
3302*0b459c2cSDavid du Colombier /* Set fastmap[I] 1 where I is a base leading code of each
3303*0b459c2cSDavid du Colombier multibyte character in the range table. */
3304*0b459c2cSDavid du Colombier int c, count;
3305*0b459c2cSDavid du Colombier
3306*0b459c2cSDavid du Colombier /* Make P points the range table. */
3307*0b459c2cSDavid du Colombier p += CHARSET_BITMAP_SIZE (&p[-2]);
3308*0b459c2cSDavid du Colombier
3309*0b459c2cSDavid du Colombier /* Extract the number of ranges in range table into
3310*0b459c2cSDavid du Colombier COUNT. */
3311*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (count, p);
3312*0b459c2cSDavid du Colombier for (; count > 0; count--, p += 2 * 3) /* XXX */
3313*0b459c2cSDavid du Colombier {
3314*0b459c2cSDavid du Colombier /* Extract the start of each range. */
3315*0b459c2cSDavid du Colombier EXTRACT_CHARACTER (c, p);
3316*0b459c2cSDavid du Colombier j = CHAR_CHARSET (c);
3317*0b459c2cSDavid du Colombier fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3318*0b459c2cSDavid du Colombier }
3319*0b459c2cSDavid du Colombier }
3320*0b459c2cSDavid du Colombier break;
3321*0b459c2cSDavid du Colombier
3322*0b459c2cSDavid du Colombier
3323*0b459c2cSDavid du Colombier case charset_not:
3324*0b459c2cSDavid du Colombier /* Chars beyond end of bitmap are possible matches.
3325*0b459c2cSDavid du Colombier All the single-byte codes can occur in multibyte buffers.
3326*0b459c2cSDavid du Colombier So any that are not listed in the charset
3327*0b459c2cSDavid du Colombier are possible matches, even in multibyte buffers. */
3328*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3329*0b459c2cSDavid du Colombier for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3330*0b459c2cSDavid du Colombier j < simple_char_max; j++)
3331*0b459c2cSDavid du Colombier fastmap[j] = 1;
3332*0b459c2cSDavid du Colombier
3333*0b459c2cSDavid du Colombier for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3334*0b459c2cSDavid du Colombier j >= 0; j--)
3335*0b459c2cSDavid du Colombier if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3336*0b459c2cSDavid du Colombier fastmap[j] = 1;
3337*0b459c2cSDavid du Colombier
3338*0b459c2cSDavid du Colombier if (bufp->multibyte)
3339*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3340*0b459c2cSDavid du Colombier which doesn't match the specified set of characters. */
3341*0b459c2cSDavid du Colombier {
3342*0b459c2cSDavid du Colombier set_fastmap_for_multibyte_characters:
3343*0b459c2cSDavid du Colombier if (match_any_multibyte_characters == false)
3344*0b459c2cSDavid du Colombier {
3345*0b459c2cSDavid du Colombier for (j = 0x80; j < 0xA0; j++) /* XXX */
3346*0b459c2cSDavid du Colombier if (BASE_LEADING_CODE_P (j))
3347*0b459c2cSDavid du Colombier fastmap[j] = 1;
3348*0b459c2cSDavid du Colombier match_any_multibyte_characters = true;
3349*0b459c2cSDavid du Colombier }
3350*0b459c2cSDavid du Colombier }
3351*0b459c2cSDavid du Colombier break;
3352*0b459c2cSDavid du Colombier
3353*0b459c2cSDavid du Colombier
3354*0b459c2cSDavid du Colombier case wordchar:
3355*0b459c2cSDavid du Colombier /* All the single-byte codes can occur in multibyte buffers,
3356*0b459c2cSDavid du Colombier and they may have word syntax. So do consider them. */
3357*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3358*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3359*0b459c2cSDavid du Colombier if (SYNTAX (j) == Sword)
3360*0b459c2cSDavid du Colombier fastmap[j] = 1;
3361*0b459c2cSDavid du Colombier
3362*0b459c2cSDavid du Colombier if (bufp->multibyte)
3363*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3364*0b459c2cSDavid du Colombier whose syntax is `Sword'. */
3365*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3366*0b459c2cSDavid du Colombier break;
3367*0b459c2cSDavid du Colombier
3368*0b459c2cSDavid du Colombier
3369*0b459c2cSDavid du Colombier case notwordchar:
3370*0b459c2cSDavid du Colombier /* All the single-byte codes can occur in multibyte buffers,
3371*0b459c2cSDavid du Colombier and they may not have word syntax. So do consider them. */
3372*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3373*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3374*0b459c2cSDavid du Colombier if (SYNTAX (j) != Sword)
3375*0b459c2cSDavid du Colombier fastmap[j] = 1;
3376*0b459c2cSDavid du Colombier
3377*0b459c2cSDavid du Colombier if (bufp->multibyte)
3378*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3379*0b459c2cSDavid du Colombier whose syntax is not `Sword'. */
3380*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3381*0b459c2cSDavid du Colombier break;
3382*0b459c2cSDavid du Colombier #endif
3383*0b459c2cSDavid du Colombier
3384*0b459c2cSDavid du Colombier case anychar:
3385*0b459c2cSDavid du Colombier {
3386*0b459c2cSDavid du Colombier int fastmap_newline = fastmap['\n'];
3387*0b459c2cSDavid du Colombier
3388*0b459c2cSDavid du Colombier /* `.' matches anything, except perhaps newline.
3389*0b459c2cSDavid du Colombier Even in a multibyte buffer, it should match any
3390*0b459c2cSDavid du Colombier conceivable byte value for the fastmap. */
3391*0b459c2cSDavid du Colombier if (bufp->multibyte)
3392*0b459c2cSDavid du Colombier match_any_multibyte_characters = true;
3393*0b459c2cSDavid du Colombier
3394*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3395*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3396*0b459c2cSDavid du Colombier fastmap[j] = 1;
3397*0b459c2cSDavid du Colombier
3398*0b459c2cSDavid du Colombier /* ... except perhaps newline. */
3399*0b459c2cSDavid du Colombier if (!(bufp->syntax & RE_DOT_NEWLINE))
3400*0b459c2cSDavid du Colombier fastmap['\n'] = fastmap_newline;
3401*0b459c2cSDavid du Colombier
3402*0b459c2cSDavid du Colombier /* Return if we have already set `can_be_null'; if we have,
3403*0b459c2cSDavid du Colombier then the fastmap is irrelevant. Something's wrong here. */
3404*0b459c2cSDavid du Colombier else if (bufp->can_be_null)
3405*0b459c2cSDavid du Colombier goto done;
3406*0b459c2cSDavid du Colombier
3407*0b459c2cSDavid du Colombier /* Otherwise, have to check alternative paths. */
3408*0b459c2cSDavid du Colombier break;
3409*0b459c2cSDavid du Colombier }
3410*0b459c2cSDavid du Colombier
3411*0b459c2cSDavid du Colombier #ifdef emacs
3412*0b459c2cSDavid du Colombier case wordbound:
3413*0b459c2cSDavid du Colombier case notwordbound:
3414*0b459c2cSDavid du Colombier case wordbeg:
3415*0b459c2cSDavid du Colombier case wordend:
3416*0b459c2cSDavid du Colombier case notsyntaxspec:
3417*0b459c2cSDavid du Colombier case syntaxspec:
3418*0b459c2cSDavid du Colombier /* This match depends on text properties. These end with
3419*0b459c2cSDavid du Colombier aborting optimizations. */
3420*0b459c2cSDavid du Colombier bufp->can_be_null = 1;
3421*0b459c2cSDavid du Colombier goto done;
3422*0b459c2cSDavid du Colombier #if 0
3423*0b459c2cSDavid du Colombier k = *p++;
3424*0b459c2cSDavid du Colombier simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3425*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3426*0b459c2cSDavid du Colombier if (SYNTAX (j) == (enum syntaxcode) k)
3427*0b459c2cSDavid du Colombier fastmap[j] = 1;
3428*0b459c2cSDavid du Colombier
3429*0b459c2cSDavid du Colombier if (bufp->multibyte)
3430*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3431*0b459c2cSDavid du Colombier whose syntax is K. */
3432*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3433*0b459c2cSDavid du Colombier break;
3434*0b459c2cSDavid du Colombier
3435*0b459c2cSDavid du Colombier case notsyntaxspec:
3436*0b459c2cSDavid du Colombier k = *p++;
3437*0b459c2cSDavid du Colombier simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3438*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3439*0b459c2cSDavid du Colombier if (SYNTAX (j) != (enum syntaxcode) k)
3440*0b459c2cSDavid du Colombier fastmap[j] = 1;
3441*0b459c2cSDavid du Colombier
3442*0b459c2cSDavid du Colombier if (bufp->multibyte)
3443*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3444*0b459c2cSDavid du Colombier whose syntax is not K. */
3445*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3446*0b459c2cSDavid du Colombier break;
3447*0b459c2cSDavid du Colombier #endif
3448*0b459c2cSDavid du Colombier
3449*0b459c2cSDavid du Colombier
3450*0b459c2cSDavid du Colombier case categoryspec:
3451*0b459c2cSDavid du Colombier k = *p++;
3452*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3453*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3454*0b459c2cSDavid du Colombier if (CHAR_HAS_CATEGORY (j, k))
3455*0b459c2cSDavid du Colombier fastmap[j] = 1;
3456*0b459c2cSDavid du Colombier
3457*0b459c2cSDavid du Colombier if (bufp->multibyte)
3458*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3459*0b459c2cSDavid du Colombier whose category is K. */
3460*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3461*0b459c2cSDavid du Colombier break;
3462*0b459c2cSDavid du Colombier
3463*0b459c2cSDavid du Colombier
3464*0b459c2cSDavid du Colombier case notcategoryspec:
3465*0b459c2cSDavid du Colombier k = *p++;
3466*0b459c2cSDavid du Colombier simple_char_max = (1 << BYTEWIDTH);
3467*0b459c2cSDavid du Colombier for (j = 0; j < simple_char_max; j++)
3468*0b459c2cSDavid du Colombier if (!CHAR_HAS_CATEGORY (j, k))
3469*0b459c2cSDavid du Colombier fastmap[j] = 1;
3470*0b459c2cSDavid du Colombier
3471*0b459c2cSDavid du Colombier if (bufp->multibyte)
3472*0b459c2cSDavid du Colombier /* Any character set can possibly contain a character
3473*0b459c2cSDavid du Colombier whose category is not K. */
3474*0b459c2cSDavid du Colombier goto set_fastmap_for_multibyte_characters;
3475*0b459c2cSDavid du Colombier break;
3476*0b459c2cSDavid du Colombier
3477*0b459c2cSDavid du Colombier /* All cases after this match the empty string. These end with
3478*0b459c2cSDavid du Colombier `continue'. */
3479*0b459c2cSDavid du Colombier
3480*0b459c2cSDavid du Colombier
3481*0b459c2cSDavid du Colombier case before_dot:
3482*0b459c2cSDavid du Colombier case at_dot:
3483*0b459c2cSDavid du Colombier case after_dot:
3484*0b459c2cSDavid du Colombier continue;
3485*0b459c2cSDavid du Colombier #endif /* emacs */
3486*0b459c2cSDavid du Colombier
3487*0b459c2cSDavid du Colombier
3488*0b459c2cSDavid du Colombier case no_op:
3489*0b459c2cSDavid du Colombier case begline:
3490*0b459c2cSDavid du Colombier case endline:
3491*0b459c2cSDavid du Colombier case begbuf:
3492*0b459c2cSDavid du Colombier case endbuf:
3493*0b459c2cSDavid du Colombier #ifndef emacs
3494*0b459c2cSDavid du Colombier case wordbound:
3495*0b459c2cSDavid du Colombier case notwordbound:
3496*0b459c2cSDavid du Colombier case wordbeg:
3497*0b459c2cSDavid du Colombier case wordend:
3498*0b459c2cSDavid du Colombier #endif
3499*0b459c2cSDavid du Colombier case push_dummy_failure:
3500*0b459c2cSDavid du Colombier continue;
3501*0b459c2cSDavid du Colombier
3502*0b459c2cSDavid du Colombier
3503*0b459c2cSDavid du Colombier case jump_n:
3504*0b459c2cSDavid du Colombier case pop_failure_jump:
3505*0b459c2cSDavid du Colombier case maybe_pop_jump:
3506*0b459c2cSDavid du Colombier case jump:
3507*0b459c2cSDavid du Colombier case jump_past_alt:
3508*0b459c2cSDavid du Colombier case dummy_failure_jump:
3509*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (j, p);
3510*0b459c2cSDavid du Colombier p += j;
3511*0b459c2cSDavid du Colombier if (j > 0)
3512*0b459c2cSDavid du Colombier continue;
3513*0b459c2cSDavid du Colombier
3514*0b459c2cSDavid du Colombier /* Jump backward implies we just went through the body of a
3515*0b459c2cSDavid du Colombier loop and matched nothing. Opcode jumped to should be
3516*0b459c2cSDavid du Colombier `on_failure_jump' or `succeed_n'. Just treat it like an
3517*0b459c2cSDavid du Colombier ordinary jump. For a * loop, it has pushed its failure
3518*0b459c2cSDavid du Colombier point already; if so, discard that as redundant. */
3519*0b459c2cSDavid du Colombier if ((re_opcode_t) *p != on_failure_jump
3520*0b459c2cSDavid du Colombier && (re_opcode_t) *p != succeed_n)
3521*0b459c2cSDavid du Colombier continue;
3522*0b459c2cSDavid du Colombier
3523*0b459c2cSDavid du Colombier p++;
3524*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (j, p);
3525*0b459c2cSDavid du Colombier p += j;
3526*0b459c2cSDavid du Colombier
3527*0b459c2cSDavid du Colombier /* If what's on the stack is where we are now, pop it. */
3528*0b459c2cSDavid du Colombier if (!FAIL_STACK_EMPTY ()
3529*0b459c2cSDavid du Colombier && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3530*0b459c2cSDavid du Colombier fail_stack.avail--;
3531*0b459c2cSDavid du Colombier
3532*0b459c2cSDavid du Colombier continue;
3533*0b459c2cSDavid du Colombier
3534*0b459c2cSDavid du Colombier
3535*0b459c2cSDavid du Colombier case on_failure_jump:
3536*0b459c2cSDavid du Colombier case on_failure_keep_string_jump:
3537*0b459c2cSDavid du Colombier handle_on_failure_jump:
3538*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (j, p);
3539*0b459c2cSDavid du Colombier
3540*0b459c2cSDavid du Colombier /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3541*0b459c2cSDavid du Colombier end of the pattern. We don't want to push such a point,
3542*0b459c2cSDavid du Colombier since when we restore it above, entering the switch will
3543*0b459c2cSDavid du Colombier increment `p' past the end of the pattern. We don't need
3544*0b459c2cSDavid du Colombier to push such a point since we obviously won't find any more
3545*0b459c2cSDavid du Colombier fastmap entries beyond `pend'. Such a pattern can match
3546*0b459c2cSDavid du Colombier the null string, though. */
3547*0b459c2cSDavid du Colombier if (p + j < pend)
3548*0b459c2cSDavid du Colombier {
3549*0b459c2cSDavid du Colombier if (!PUSH_PATTERN_OP (p + j, fail_stack))
3550*0b459c2cSDavid du Colombier {
3551*0b459c2cSDavid du Colombier RESET_FAIL_STACK ();
3552*0b459c2cSDavid du Colombier return -2;
3553*0b459c2cSDavid du Colombier }
3554*0b459c2cSDavid du Colombier }
3555*0b459c2cSDavid du Colombier else
3556*0b459c2cSDavid du Colombier bufp->can_be_null = 1;
3557*0b459c2cSDavid du Colombier
3558*0b459c2cSDavid du Colombier if (succeed_n_p)
3559*0b459c2cSDavid du Colombier {
3560*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3561*0b459c2cSDavid du Colombier succeed_n_p = false;
3562*0b459c2cSDavid du Colombier }
3563*0b459c2cSDavid du Colombier
3564*0b459c2cSDavid du Colombier continue;
3565*0b459c2cSDavid du Colombier
3566*0b459c2cSDavid du Colombier
3567*0b459c2cSDavid du Colombier case succeed_n:
3568*0b459c2cSDavid du Colombier /* Get to the number of times to succeed. */
3569*0b459c2cSDavid du Colombier p += 2;
3570*0b459c2cSDavid du Colombier
3571*0b459c2cSDavid du Colombier /* Increment p past the n for when k != 0. */
3572*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (k, p);
3573*0b459c2cSDavid du Colombier if (k == 0)
3574*0b459c2cSDavid du Colombier {
3575*0b459c2cSDavid du Colombier p -= 4;
3576*0b459c2cSDavid du Colombier succeed_n_p = true; /* Spaghetti code alert. */
3577*0b459c2cSDavid du Colombier goto handle_on_failure_jump;
3578*0b459c2cSDavid du Colombier }
3579*0b459c2cSDavid du Colombier continue;
3580*0b459c2cSDavid du Colombier
3581*0b459c2cSDavid du Colombier
3582*0b459c2cSDavid du Colombier case set_number_at:
3583*0b459c2cSDavid du Colombier p += 4;
3584*0b459c2cSDavid du Colombier continue;
3585*0b459c2cSDavid du Colombier
3586*0b459c2cSDavid du Colombier
3587*0b459c2cSDavid du Colombier case start_memory:
3588*0b459c2cSDavid du Colombier case stop_memory:
3589*0b459c2cSDavid du Colombier p += 2;
3590*0b459c2cSDavid du Colombier continue;
3591*0b459c2cSDavid du Colombier
3592*0b459c2cSDavid du Colombier
3593*0b459c2cSDavid du Colombier default:
3594*0b459c2cSDavid du Colombier abort (); /* We have listed all the cases. */
3595*0b459c2cSDavid du Colombier } /* switch *p++ */
3596*0b459c2cSDavid du Colombier
3597*0b459c2cSDavid du Colombier /* Getting here means we have found the possible starting
3598*0b459c2cSDavid du Colombier characters for one path of the pattern -- and that the empty
3599*0b459c2cSDavid du Colombier string does not match. We need not follow this path further.
3600*0b459c2cSDavid du Colombier Instead, look at the next alternative (remembered on the
3601*0b459c2cSDavid du Colombier stack), or quit if no more. The test at the top of the loop
3602*0b459c2cSDavid du Colombier does these things. */
3603*0b459c2cSDavid du Colombier path_can_be_null = false;
3604*0b459c2cSDavid du Colombier p = pend;
3605*0b459c2cSDavid du Colombier } /* while p */
3606*0b459c2cSDavid du Colombier
3607*0b459c2cSDavid du Colombier /* Set `can_be_null' for the last path (also the first path, if the
3608*0b459c2cSDavid du Colombier pattern is empty). */
3609*0b459c2cSDavid du Colombier bufp->can_be_null |= path_can_be_null;
3610*0b459c2cSDavid du Colombier
3611*0b459c2cSDavid du Colombier done:
3612*0b459c2cSDavid du Colombier RESET_FAIL_STACK ();
3613*0b459c2cSDavid du Colombier return 0;
3614*0b459c2cSDavid du Colombier } /* re_compile_fastmap */
3615*0b459c2cSDavid du Colombier
3616*0b459c2cSDavid du Colombier /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3617*0b459c2cSDavid du Colombier ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3618*0b459c2cSDavid du Colombier this memory for recording register information. STARTS and ENDS
3619*0b459c2cSDavid du Colombier must be allocated using the malloc library routine, and must each
3620*0b459c2cSDavid du Colombier be at least NUM_REGS * sizeof (regoff_t) bytes long.
3621*0b459c2cSDavid du Colombier
3622*0b459c2cSDavid du Colombier If NUM_REGS == 0, then subsequent matches should allocate their own
3623*0b459c2cSDavid du Colombier register data.
3624*0b459c2cSDavid du Colombier
3625*0b459c2cSDavid du Colombier Unless this function is called, the first search or match using
3626*0b459c2cSDavid du Colombier PATTERN_BUFFER will allocate its own register data, without
3627*0b459c2cSDavid du Colombier freeing the old data. */
3628*0b459c2cSDavid du Colombier
3629*0b459c2cSDavid du Colombier void
re_set_registers(bufp,regs,num_regs,starts,ends)3630*0b459c2cSDavid du Colombier re_set_registers (bufp, regs, num_regs, starts, ends)
3631*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
3632*0b459c2cSDavid du Colombier struct re_registers *regs;
3633*0b459c2cSDavid du Colombier unsigned num_regs;
3634*0b459c2cSDavid du Colombier regoff_t *starts, *ends;
3635*0b459c2cSDavid du Colombier {
3636*0b459c2cSDavid du Colombier if (num_regs)
3637*0b459c2cSDavid du Colombier {
3638*0b459c2cSDavid du Colombier bufp->regs_allocated = REGS_REALLOCATE;
3639*0b459c2cSDavid du Colombier regs->num_regs = num_regs;
3640*0b459c2cSDavid du Colombier regs->start = starts;
3641*0b459c2cSDavid du Colombier regs->end = ends;
3642*0b459c2cSDavid du Colombier }
3643*0b459c2cSDavid du Colombier else
3644*0b459c2cSDavid du Colombier {
3645*0b459c2cSDavid du Colombier bufp->regs_allocated = REGS_UNALLOCATED;
3646*0b459c2cSDavid du Colombier regs->num_regs = 0;
3647*0b459c2cSDavid du Colombier regs->start = regs->end = (regoff_t *) 0;
3648*0b459c2cSDavid du Colombier }
3649*0b459c2cSDavid du Colombier }
3650*0b459c2cSDavid du Colombier
3651*0b459c2cSDavid du Colombier /* Searching routines. */
3652*0b459c2cSDavid du Colombier
3653*0b459c2cSDavid du Colombier /* Like re_search_2, below, but only one string is specified, and
3654*0b459c2cSDavid du Colombier doesn't let you say where to stop matching. */
3655*0b459c2cSDavid du Colombier
3656*0b459c2cSDavid du Colombier int
re_search(bufp,string,size,startpos,range,regs)3657*0b459c2cSDavid du Colombier re_search (bufp, string, size, startpos, range, regs)
3658*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
3659*0b459c2cSDavid du Colombier const char *string;
3660*0b459c2cSDavid du Colombier int size, startpos, range;
3661*0b459c2cSDavid du Colombier struct re_registers *regs;
3662*0b459c2cSDavid du Colombier {
3663*0b459c2cSDavid du Colombier return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3664*0b459c2cSDavid du Colombier regs, size);
3665*0b459c2cSDavid du Colombier }
3666*0b459c2cSDavid du Colombier
3667*0b459c2cSDavid du Colombier /* End address of virtual concatenation of string. */
3668*0b459c2cSDavid du Colombier #define STOP_ADDR_VSTRING(P) \
3669*0b459c2cSDavid du Colombier (((P) >= size1 ? string2 + size2 : string1 + size1))
3670*0b459c2cSDavid du Colombier
3671*0b459c2cSDavid du Colombier /* Address of POS in the concatenation of virtual string. */
3672*0b459c2cSDavid du Colombier #define POS_ADDR_VSTRING(POS) \
3673*0b459c2cSDavid du Colombier (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3674*0b459c2cSDavid du Colombier
3675*0b459c2cSDavid du Colombier /* Using the compiled pattern in BUFP->buffer, first tries to match the
3676*0b459c2cSDavid du Colombier virtual concatenation of STRING1 and STRING2, starting first at index
3677*0b459c2cSDavid du Colombier STARTPOS, then at STARTPOS + 1, and so on.
3678*0b459c2cSDavid du Colombier
3679*0b459c2cSDavid du Colombier STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3680*0b459c2cSDavid du Colombier
3681*0b459c2cSDavid du Colombier RANGE is how far to scan while trying to match. RANGE = 0 means try
3682*0b459c2cSDavid du Colombier only at STARTPOS; in general, the last start tried is STARTPOS +
3683*0b459c2cSDavid du Colombier RANGE.
3684*0b459c2cSDavid du Colombier
3685*0b459c2cSDavid du Colombier In REGS, return the indices of the virtual concatenation of STRING1
3686*0b459c2cSDavid du Colombier and STRING2 that matched the entire BUFP->buffer and its contained
3687*0b459c2cSDavid du Colombier subexpressions.
3688*0b459c2cSDavid du Colombier
3689*0b459c2cSDavid du Colombier Do not consider matching one past the index STOP in the virtual
3690*0b459c2cSDavid du Colombier concatenation of STRING1 and STRING2.
3691*0b459c2cSDavid du Colombier
3692*0b459c2cSDavid du Colombier We return either the position in the strings at which the match was
3693*0b459c2cSDavid du Colombier found, -1 if no match, or -2 if error (such as failure
3694*0b459c2cSDavid du Colombier stack overflow). */
3695*0b459c2cSDavid du Colombier
3696*0b459c2cSDavid du Colombier int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)3697*0b459c2cSDavid du Colombier re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3698*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
3699*0b459c2cSDavid du Colombier const char *string1, *string2;
3700*0b459c2cSDavid du Colombier int size1, size2;
3701*0b459c2cSDavid du Colombier int startpos;
3702*0b459c2cSDavid du Colombier int range;
3703*0b459c2cSDavid du Colombier struct re_registers *regs;
3704*0b459c2cSDavid du Colombier int stop;
3705*0b459c2cSDavid du Colombier {
3706*0b459c2cSDavid du Colombier int val;
3707*0b459c2cSDavid du Colombier register char *fastmap = bufp->fastmap;
3708*0b459c2cSDavid du Colombier register RE_TRANSLATE_TYPE translate = bufp->translate;
3709*0b459c2cSDavid du Colombier int total_size = size1 + size2;
3710*0b459c2cSDavid du Colombier int endpos = startpos + range;
3711*0b459c2cSDavid du Colombier int anchored_start = 0;
3712*0b459c2cSDavid du Colombier
3713*0b459c2cSDavid du Colombier /* Nonzero if we have to concern multibyte character. */
3714*0b459c2cSDavid du Colombier int multibyte = bufp->multibyte;
3715*0b459c2cSDavid du Colombier
3716*0b459c2cSDavid du Colombier /* Check for out-of-range STARTPOS. */
3717*0b459c2cSDavid du Colombier if (startpos < 0 || startpos > total_size)
3718*0b459c2cSDavid du Colombier return -1;
3719*0b459c2cSDavid du Colombier
3720*0b459c2cSDavid du Colombier /* Fix up RANGE if it might eventually take us outside
3721*0b459c2cSDavid du Colombier the virtual concatenation of STRING1 and STRING2.
3722*0b459c2cSDavid du Colombier Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
3723*0b459c2cSDavid du Colombier if (endpos < 0)
3724*0b459c2cSDavid du Colombier range = 0 - startpos;
3725*0b459c2cSDavid du Colombier else if (endpos > total_size)
3726*0b459c2cSDavid du Colombier range = total_size - startpos;
3727*0b459c2cSDavid du Colombier
3728*0b459c2cSDavid du Colombier /* If the search isn't to be a backwards one, don't waste time in a
3729*0b459c2cSDavid du Colombier search for a pattern anchored at beginning of buffer. */
3730*0b459c2cSDavid du Colombier if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3731*0b459c2cSDavid du Colombier {
3732*0b459c2cSDavid du Colombier if (startpos > 0)
3733*0b459c2cSDavid du Colombier return -1;
3734*0b459c2cSDavid du Colombier else
3735*0b459c2cSDavid du Colombier range = 0;
3736*0b459c2cSDavid du Colombier }
3737*0b459c2cSDavid du Colombier
3738*0b459c2cSDavid du Colombier #ifdef emacs
3739*0b459c2cSDavid du Colombier /* In a forward search for something that starts with \=.
3740*0b459c2cSDavid du Colombier don't keep searching past point. */
3741*0b459c2cSDavid du Colombier if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3742*0b459c2cSDavid du Colombier {
3743*0b459c2cSDavid du Colombier range = PT_BYTE - BEGV_BYTE - startpos;
3744*0b459c2cSDavid du Colombier if (range < 0)
3745*0b459c2cSDavid du Colombier return -1;
3746*0b459c2cSDavid du Colombier }
3747*0b459c2cSDavid du Colombier #endif /* emacs */
3748*0b459c2cSDavid du Colombier
3749*0b459c2cSDavid du Colombier /* Update the fastmap now if not correct already. */
3750*0b459c2cSDavid du Colombier if (fastmap && !bufp->fastmap_accurate)
3751*0b459c2cSDavid du Colombier if (re_compile_fastmap (bufp) == -2)
3752*0b459c2cSDavid du Colombier return -2;
3753*0b459c2cSDavid du Colombier
3754*0b459c2cSDavid du Colombier /* See whether the pattern is anchored. */
3755*0b459c2cSDavid du Colombier if (bufp->buffer[0] == begline)
3756*0b459c2cSDavid du Colombier anchored_start = 1;
3757*0b459c2cSDavid du Colombier
3758*0b459c2cSDavid du Colombier #ifdef emacs
3759*0b459c2cSDavid du Colombier gl_state.object = re_match_object;
3760*0b459c2cSDavid du Colombier {
3761*0b459c2cSDavid du Colombier int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
3762*0b459c2cSDavid du Colombier int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
3763*0b459c2cSDavid du Colombier
3764*0b459c2cSDavid du Colombier SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3765*0b459c2cSDavid du Colombier }
3766*0b459c2cSDavid du Colombier #endif
3767*0b459c2cSDavid du Colombier
3768*0b459c2cSDavid du Colombier /* Loop through the string, looking for a place to start matching. */
3769*0b459c2cSDavid du Colombier for (;;)
3770*0b459c2cSDavid du Colombier {
3771*0b459c2cSDavid du Colombier /* If the pattern is anchored,
3772*0b459c2cSDavid du Colombier skip quickly past places we cannot match.
3773*0b459c2cSDavid du Colombier We don't bother to treat startpos == 0 specially
3774*0b459c2cSDavid du Colombier because that case doesn't repeat. */
3775*0b459c2cSDavid du Colombier if (anchored_start && startpos > 0)
3776*0b459c2cSDavid du Colombier {
3777*0b459c2cSDavid du Colombier if (! (bufp->newline_anchor
3778*0b459c2cSDavid du Colombier && ((startpos <= size1 ? string1[startpos - 1]
3779*0b459c2cSDavid du Colombier : string2[startpos - size1 - 1])
3780*0b459c2cSDavid du Colombier == '\n')))
3781*0b459c2cSDavid du Colombier goto advance;
3782*0b459c2cSDavid du Colombier }
3783*0b459c2cSDavid du Colombier
3784*0b459c2cSDavid du Colombier /* If a fastmap is supplied, skip quickly over characters that
3785*0b459c2cSDavid du Colombier cannot be the start of a match. If the pattern can match the
3786*0b459c2cSDavid du Colombier null string, however, we don't need to skip characters; we want
3787*0b459c2cSDavid du Colombier the first null string. */
3788*0b459c2cSDavid du Colombier if (fastmap && startpos < total_size && !bufp->can_be_null)
3789*0b459c2cSDavid du Colombier {
3790*0b459c2cSDavid du Colombier register const char *d;
3791*0b459c2cSDavid du Colombier register unsigned int buf_ch;
3792*0b459c2cSDavid du Colombier
3793*0b459c2cSDavid du Colombier d = POS_ADDR_VSTRING (startpos);
3794*0b459c2cSDavid du Colombier
3795*0b459c2cSDavid du Colombier if (range > 0) /* Searching forwards. */
3796*0b459c2cSDavid du Colombier {
3797*0b459c2cSDavid du Colombier register int lim = 0;
3798*0b459c2cSDavid du Colombier int irange = range;
3799*0b459c2cSDavid du Colombier
3800*0b459c2cSDavid du Colombier if (startpos < size1 && startpos + range >= size1)
3801*0b459c2cSDavid du Colombier lim = range - (size1 - startpos);
3802*0b459c2cSDavid du Colombier
3803*0b459c2cSDavid du Colombier /* Written out as an if-else to avoid testing `translate'
3804*0b459c2cSDavid du Colombier inside the loop. */
3805*0b459c2cSDavid du Colombier if (RE_TRANSLATE_P (translate))
3806*0b459c2cSDavid du Colombier {
3807*0b459c2cSDavid du Colombier if (multibyte)
3808*0b459c2cSDavid du Colombier while (range > lim)
3809*0b459c2cSDavid du Colombier {
3810*0b459c2cSDavid du Colombier int buf_charlen;
3811*0b459c2cSDavid du Colombier
3812*0b459c2cSDavid du Colombier buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3813*0b459c2cSDavid du Colombier buf_charlen);
3814*0b459c2cSDavid du Colombier
3815*0b459c2cSDavid du Colombier buf_ch = RE_TRANSLATE (translate, buf_ch);
3816*0b459c2cSDavid du Colombier if (buf_ch >= 0400
3817*0b459c2cSDavid du Colombier || fastmap[buf_ch])
3818*0b459c2cSDavid du Colombier break;
3819*0b459c2cSDavid du Colombier
3820*0b459c2cSDavid du Colombier range -= buf_charlen;
3821*0b459c2cSDavid du Colombier d += buf_charlen;
3822*0b459c2cSDavid du Colombier }
3823*0b459c2cSDavid du Colombier else
3824*0b459c2cSDavid du Colombier while (range > lim
3825*0b459c2cSDavid du Colombier && !fastmap[(unsigned char)
3826*0b459c2cSDavid du Colombier RE_TRANSLATE (translate, (unsigned char) *d)])
3827*0b459c2cSDavid du Colombier {
3828*0b459c2cSDavid du Colombier d++;
3829*0b459c2cSDavid du Colombier range--;
3830*0b459c2cSDavid du Colombier }
3831*0b459c2cSDavid du Colombier }
3832*0b459c2cSDavid du Colombier else
3833*0b459c2cSDavid du Colombier while (range > lim && !fastmap[(unsigned char) *d])
3834*0b459c2cSDavid du Colombier {
3835*0b459c2cSDavid du Colombier d++;
3836*0b459c2cSDavid du Colombier range--;
3837*0b459c2cSDavid du Colombier }
3838*0b459c2cSDavid du Colombier
3839*0b459c2cSDavid du Colombier startpos += irange - range;
3840*0b459c2cSDavid du Colombier }
3841*0b459c2cSDavid du Colombier else /* Searching backwards. */
3842*0b459c2cSDavid du Colombier {
3843*0b459c2cSDavid du Colombier int room = (size1 == 0 || startpos >= size1
3844*0b459c2cSDavid du Colombier ? size2 + size1 - startpos
3845*0b459c2cSDavid du Colombier : size1 - startpos);
3846*0b459c2cSDavid du Colombier
3847*0b459c2cSDavid du Colombier buf_ch = STRING_CHAR (d, room);
3848*0b459c2cSDavid du Colombier if (RE_TRANSLATE_P (translate))
3849*0b459c2cSDavid du Colombier buf_ch = RE_TRANSLATE (translate, buf_ch);
3850*0b459c2cSDavid du Colombier
3851*0b459c2cSDavid du Colombier if (! (buf_ch >= 0400
3852*0b459c2cSDavid du Colombier || fastmap[buf_ch]))
3853*0b459c2cSDavid du Colombier goto advance;
3854*0b459c2cSDavid du Colombier }
3855*0b459c2cSDavid du Colombier }
3856*0b459c2cSDavid du Colombier
3857*0b459c2cSDavid du Colombier /* If can't match the null string, and that's all we have left, fail. */
3858*0b459c2cSDavid du Colombier if (range >= 0 && startpos == total_size && fastmap
3859*0b459c2cSDavid du Colombier && !bufp->can_be_null)
3860*0b459c2cSDavid du Colombier return -1;
3861*0b459c2cSDavid du Colombier
3862*0b459c2cSDavid du Colombier val = re_match_2_internal (bufp, string1, size1, string2, size2,
3863*0b459c2cSDavid du Colombier startpos, regs, stop);
3864*0b459c2cSDavid du Colombier #ifndef REGEX_MALLOC
3865*0b459c2cSDavid du Colombier #ifdef C_ALLOCA
3866*0b459c2cSDavid du Colombier alloca (0);
3867*0b459c2cSDavid du Colombier #endif
3868*0b459c2cSDavid du Colombier #endif
3869*0b459c2cSDavid du Colombier
3870*0b459c2cSDavid du Colombier if (val >= 0)
3871*0b459c2cSDavid du Colombier return startpos;
3872*0b459c2cSDavid du Colombier
3873*0b459c2cSDavid du Colombier if (val == -2)
3874*0b459c2cSDavid du Colombier return -2;
3875*0b459c2cSDavid du Colombier
3876*0b459c2cSDavid du Colombier advance:
3877*0b459c2cSDavid du Colombier if (!range)
3878*0b459c2cSDavid du Colombier break;
3879*0b459c2cSDavid du Colombier else if (range > 0)
3880*0b459c2cSDavid du Colombier {
3881*0b459c2cSDavid du Colombier /* Update STARTPOS to the next character boundary. */
3882*0b459c2cSDavid du Colombier if (multibyte)
3883*0b459c2cSDavid du Colombier {
3884*0b459c2cSDavid du Colombier const unsigned char *p
3885*0b459c2cSDavid du Colombier = (const unsigned char *) POS_ADDR_VSTRING (startpos);
3886*0b459c2cSDavid du Colombier const unsigned char *pend
3887*0b459c2cSDavid du Colombier = (const unsigned char *) STOP_ADDR_VSTRING (startpos);
3888*0b459c2cSDavid du Colombier int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
3889*0b459c2cSDavid du Colombier
3890*0b459c2cSDavid du Colombier range -= len;
3891*0b459c2cSDavid du Colombier if (range < 0)
3892*0b459c2cSDavid du Colombier break;
3893*0b459c2cSDavid du Colombier startpos += len;
3894*0b459c2cSDavid du Colombier }
3895*0b459c2cSDavid du Colombier else
3896*0b459c2cSDavid du Colombier {
3897*0b459c2cSDavid du Colombier range--;
3898*0b459c2cSDavid du Colombier startpos++;
3899*0b459c2cSDavid du Colombier }
3900*0b459c2cSDavid du Colombier }
3901*0b459c2cSDavid du Colombier else
3902*0b459c2cSDavid du Colombier {
3903*0b459c2cSDavid du Colombier range++;
3904*0b459c2cSDavid du Colombier startpos--;
3905*0b459c2cSDavid du Colombier
3906*0b459c2cSDavid du Colombier /* Update STARTPOS to the previous character boundary. */
3907*0b459c2cSDavid du Colombier if (multibyte)
3908*0b459c2cSDavid du Colombier {
3909*0b459c2cSDavid du Colombier const unsigned char *p
3910*0b459c2cSDavid du Colombier = (const unsigned char *) POS_ADDR_VSTRING (startpos);
3911*0b459c2cSDavid du Colombier int len = 0;
3912*0b459c2cSDavid du Colombier
3913*0b459c2cSDavid du Colombier /* Find the head of multibyte form. */
3914*0b459c2cSDavid du Colombier while (!CHAR_HEAD_P (*p))
3915*0b459c2cSDavid du Colombier p--, len++;
3916*0b459c2cSDavid du Colombier
3917*0b459c2cSDavid du Colombier /* Adjust it. */
3918*0b459c2cSDavid du Colombier #if 0 /* XXX */
3919*0b459c2cSDavid du Colombier if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
3920*0b459c2cSDavid du Colombier ;
3921*0b459c2cSDavid du Colombier else
3922*0b459c2cSDavid du Colombier #endif
3923*0b459c2cSDavid du Colombier {
3924*0b459c2cSDavid du Colombier range += len;
3925*0b459c2cSDavid du Colombier if (range > 0)
3926*0b459c2cSDavid du Colombier break;
3927*0b459c2cSDavid du Colombier
3928*0b459c2cSDavid du Colombier startpos -= len;
3929*0b459c2cSDavid du Colombier }
3930*0b459c2cSDavid du Colombier }
3931*0b459c2cSDavid du Colombier }
3932*0b459c2cSDavid du Colombier }
3933*0b459c2cSDavid du Colombier return -1;
3934*0b459c2cSDavid du Colombier } /* re_search_2 */
3935*0b459c2cSDavid du Colombier
3936*0b459c2cSDavid du Colombier /* Declarations and macros for re_match_2. */
3937*0b459c2cSDavid du Colombier
3938*0b459c2cSDavid du Colombier static int bcmp_translate ();
3939*0b459c2cSDavid du Colombier static boolean alt_match_null_string_p (),
3940*0b459c2cSDavid du Colombier common_op_match_null_string_p (),
3941*0b459c2cSDavid du Colombier group_match_null_string_p ();
3942*0b459c2cSDavid du Colombier
3943*0b459c2cSDavid du Colombier /* This converts PTR, a pointer into one of the search strings `string1'
3944*0b459c2cSDavid du Colombier and `string2' into an offset from the beginning of that string. */
3945*0b459c2cSDavid du Colombier #define POINTER_TO_OFFSET(ptr) \
3946*0b459c2cSDavid du Colombier (FIRST_STRING_P (ptr) \
3947*0b459c2cSDavid du Colombier ? ((regoff_t) ((ptr) - string1)) \
3948*0b459c2cSDavid du Colombier : ((regoff_t) ((ptr) - string2 + size1)))
3949*0b459c2cSDavid du Colombier
3950*0b459c2cSDavid du Colombier /* Macros for dealing with the split strings in re_match_2. */
3951*0b459c2cSDavid du Colombier
3952*0b459c2cSDavid du Colombier #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3953*0b459c2cSDavid du Colombier
3954*0b459c2cSDavid du Colombier /* Call before fetching a character with *d. This switches over to
3955*0b459c2cSDavid du Colombier string2 if necessary. */
3956*0b459c2cSDavid du Colombier #define PREFETCH() \
3957*0b459c2cSDavid du Colombier while (d == dend) \
3958*0b459c2cSDavid du Colombier { \
3959*0b459c2cSDavid du Colombier /* End of string2 => fail. */ \
3960*0b459c2cSDavid du Colombier if (dend == end_match_2) \
3961*0b459c2cSDavid du Colombier goto fail; \
3962*0b459c2cSDavid du Colombier /* End of string1 => advance to string2. */ \
3963*0b459c2cSDavid du Colombier d = string2; \
3964*0b459c2cSDavid du Colombier dend = end_match_2; \
3965*0b459c2cSDavid du Colombier }
3966*0b459c2cSDavid du Colombier
3967*0b459c2cSDavid du Colombier
3968*0b459c2cSDavid du Colombier /* Test if at very beginning or at very end of the virtual concatenation
3969*0b459c2cSDavid du Colombier of `string1' and `string2'. If only one string, it's `string2'. */
3970*0b459c2cSDavid du Colombier #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3971*0b459c2cSDavid du Colombier #define AT_STRINGS_END(d) ((d) == end2)
3972*0b459c2cSDavid du Colombier
3973*0b459c2cSDavid du Colombier
3974*0b459c2cSDavid du Colombier /* Test if D points to a character which is word-constituent. We have
3975*0b459c2cSDavid du Colombier two special cases to check for: if past the end of string1, look at
3976*0b459c2cSDavid du Colombier the first character in string2; and if before the beginning of
3977*0b459c2cSDavid du Colombier string2, look at the last character in string1. */
3978*0b459c2cSDavid du Colombier #define WORDCHAR_P(d) \
3979*0b459c2cSDavid du Colombier (SYNTAX ((d) == end1 ? *string2 \
3980*0b459c2cSDavid du Colombier : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3981*0b459c2cSDavid du Colombier == Sword)
3982*0b459c2cSDavid du Colombier
3983*0b459c2cSDavid du Colombier /* Disabled due to a compiler bug -- see comment at case wordbound */
3984*0b459c2cSDavid du Colombier
3985*0b459c2cSDavid du Colombier /* The comment at case wordbound is following one, but we don't use
3986*0b459c2cSDavid du Colombier AT_WORD_BOUNDARY anymore to support multibyte form.
3987*0b459c2cSDavid du Colombier
3988*0b459c2cSDavid du Colombier The DEC Alpha C compiler 3.x generates incorrect code for the
3989*0b459c2cSDavid du Colombier test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
3990*0b459c2cSDavid du Colombier AT_WORD_BOUNDARY, so this code is disabled. Expanding the
3991*0b459c2cSDavid du Colombier macro and introducing temporary variables works around the bug. */
3992*0b459c2cSDavid du Colombier
3993*0b459c2cSDavid du Colombier #if 0
3994*0b459c2cSDavid du Colombier /* Test if the character before D and the one at D differ with respect
3995*0b459c2cSDavid du Colombier to being word-constituent. */
3996*0b459c2cSDavid du Colombier #define AT_WORD_BOUNDARY(d) \
3997*0b459c2cSDavid du Colombier (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3998*0b459c2cSDavid du Colombier || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3999*0b459c2cSDavid du Colombier #endif
4000*0b459c2cSDavid du Colombier
4001*0b459c2cSDavid du Colombier /* Free everything we malloc. */
4002*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE
4003*0b459c2cSDavid du Colombier #define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4004*0b459c2cSDavid du Colombier #define FREE_VARIABLES() \
4005*0b459c2cSDavid du Colombier do { \
4006*0b459c2cSDavid du Colombier REGEX_FREE_STACK (fail_stack.stack); \
4007*0b459c2cSDavid du Colombier FREE_VAR (regstart); \
4008*0b459c2cSDavid du Colombier FREE_VAR (regend); \
4009*0b459c2cSDavid du Colombier FREE_VAR (old_regstart); \
4010*0b459c2cSDavid du Colombier FREE_VAR (old_regend); \
4011*0b459c2cSDavid du Colombier FREE_VAR (best_regstart); \
4012*0b459c2cSDavid du Colombier FREE_VAR (best_regend); \
4013*0b459c2cSDavid du Colombier FREE_VAR (reg_info); \
4014*0b459c2cSDavid du Colombier FREE_VAR (reg_dummy); \
4015*0b459c2cSDavid du Colombier FREE_VAR (reg_info_dummy); \
4016*0b459c2cSDavid du Colombier } while (0)
4017*0b459c2cSDavid du Colombier #else
4018*0b459c2cSDavid du Colombier #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4019*0b459c2cSDavid du Colombier #endif /* not MATCH_MAY_ALLOCATE */
4020*0b459c2cSDavid du Colombier
4021*0b459c2cSDavid du Colombier /* These values must meet several constraints. They must not be valid
4022*0b459c2cSDavid du Colombier register values; since we have a limit of 255 registers (because
4023*0b459c2cSDavid du Colombier we use only one byte in the pattern for the register number), we can
4024*0b459c2cSDavid du Colombier use numbers larger than 255. They must differ by 1, because of
4025*0b459c2cSDavid du Colombier NUM_FAILURE_ITEMS above. And the value for the lowest register must
4026*0b459c2cSDavid du Colombier be larger than the value for the highest register, so we do not try
4027*0b459c2cSDavid du Colombier to actually save any registers when none are active. */
4028*0b459c2cSDavid du Colombier #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4029*0b459c2cSDavid du Colombier #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4030*0b459c2cSDavid du Colombier
4031*0b459c2cSDavid du Colombier /* Matching routines. */
4032*0b459c2cSDavid du Colombier
4033*0b459c2cSDavid du Colombier #ifndef emacs /* Emacs never uses this. */
4034*0b459c2cSDavid du Colombier /* re_match is like re_match_2 except it takes only a single string. */
4035*0b459c2cSDavid du Colombier
4036*0b459c2cSDavid du Colombier int
re_match(bufp,string,size,pos,regs)4037*0b459c2cSDavid du Colombier re_match (bufp, string, size, pos, regs)
4038*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
4039*0b459c2cSDavid du Colombier const char *string;
4040*0b459c2cSDavid du Colombier int size, pos;
4041*0b459c2cSDavid du Colombier struct re_registers *regs;
4042*0b459c2cSDavid du Colombier {
4043*0b459c2cSDavid du Colombier int result = re_match_2_internal (bufp, NULL, 0, string, size,
4044*0b459c2cSDavid du Colombier pos, regs, size);
4045*0b459c2cSDavid du Colombier #ifndef REGEX_MALLOC /* CVS */
4046*0b459c2cSDavid du Colombier #ifdef C_ALLOCA /* CVS */
4047*0b459c2cSDavid du Colombier alloca (0);
4048*0b459c2cSDavid du Colombier #endif /* CVS */
4049*0b459c2cSDavid du Colombier #endif /* CVS */
4050*0b459c2cSDavid du Colombier return result;
4051*0b459c2cSDavid du Colombier }
4052*0b459c2cSDavid du Colombier #endif /* not emacs */
4053*0b459c2cSDavid du Colombier
4054*0b459c2cSDavid du Colombier #ifdef emacs
4055*0b459c2cSDavid du Colombier /* In Emacs, this is the string or buffer in which we
4056*0b459c2cSDavid du Colombier are matching. It is used for looking up syntax properties. */
4057*0b459c2cSDavid du Colombier Lisp_Object re_match_object;
4058*0b459c2cSDavid du Colombier #endif
4059*0b459c2cSDavid du Colombier
4060*0b459c2cSDavid du Colombier /* re_match_2 matches the compiled pattern in BUFP against the
4061*0b459c2cSDavid du Colombier the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4062*0b459c2cSDavid du Colombier and SIZE2, respectively). We start matching at POS, and stop
4063*0b459c2cSDavid du Colombier matching at STOP.
4064*0b459c2cSDavid du Colombier
4065*0b459c2cSDavid du Colombier If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4066*0b459c2cSDavid du Colombier store offsets for the substring each group matched in REGS. See the
4067*0b459c2cSDavid du Colombier documentation for exactly how many groups we fill.
4068*0b459c2cSDavid du Colombier
4069*0b459c2cSDavid du Colombier We return -1 if no match, -2 if an internal error (such as the
4070*0b459c2cSDavid du Colombier failure stack overflowing). Otherwise, we return the length of the
4071*0b459c2cSDavid du Colombier matched substring. */
4072*0b459c2cSDavid du Colombier
4073*0b459c2cSDavid du Colombier int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)4074*0b459c2cSDavid du Colombier re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4075*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
4076*0b459c2cSDavid du Colombier const char *string1, *string2;
4077*0b459c2cSDavid du Colombier int size1, size2;
4078*0b459c2cSDavid du Colombier int pos;
4079*0b459c2cSDavid du Colombier struct re_registers *regs;
4080*0b459c2cSDavid du Colombier int stop;
4081*0b459c2cSDavid du Colombier {
4082*0b459c2cSDavid du Colombier int result;
4083*0b459c2cSDavid du Colombier
4084*0b459c2cSDavid du Colombier #ifdef emacs
4085*0b459c2cSDavid du Colombier int charpos;
4086*0b459c2cSDavid du Colombier int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
4087*0b459c2cSDavid du Colombier gl_state.object = re_match_object;
4088*0b459c2cSDavid du Colombier charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
4089*0b459c2cSDavid du Colombier SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4090*0b459c2cSDavid du Colombier #endif
4091*0b459c2cSDavid du Colombier
4092*0b459c2cSDavid du Colombier result = re_match_2_internal (bufp, string1, size1, string2, size2,
4093*0b459c2cSDavid du Colombier pos, regs, stop);
4094*0b459c2cSDavid du Colombier #ifndef REGEX_MALLOC /* CVS */
4095*0b459c2cSDavid du Colombier #ifdef C_ALLOCA /* CVS */
4096*0b459c2cSDavid du Colombier alloca (0);
4097*0b459c2cSDavid du Colombier #endif /* CVS */
4098*0b459c2cSDavid du Colombier #endif /* CVS */
4099*0b459c2cSDavid du Colombier return result;
4100*0b459c2cSDavid du Colombier }
4101*0b459c2cSDavid du Colombier
4102*0b459c2cSDavid du Colombier /* This is a separate function so that we can force an alloca cleanup
4103*0b459c2cSDavid du Colombier afterwards. */
4104*0b459c2cSDavid du Colombier static int
re_match_2_internal(bufp,string1,size1,string2,size2,pos,regs,stop)4105*0b459c2cSDavid du Colombier re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4106*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
4107*0b459c2cSDavid du Colombier const char *string1, *string2;
4108*0b459c2cSDavid du Colombier int size1, size2;
4109*0b459c2cSDavid du Colombier int pos;
4110*0b459c2cSDavid du Colombier struct re_registers *regs;
4111*0b459c2cSDavid du Colombier int stop;
4112*0b459c2cSDavid du Colombier {
4113*0b459c2cSDavid du Colombier /* General temporaries. */
4114*0b459c2cSDavid du Colombier int mcnt;
4115*0b459c2cSDavid du Colombier unsigned char *p1;
4116*0b459c2cSDavid du Colombier
4117*0b459c2cSDavid du Colombier /* Just past the end of the corresponding string. */
4118*0b459c2cSDavid du Colombier const char *end1, *end2;
4119*0b459c2cSDavid du Colombier
4120*0b459c2cSDavid du Colombier /* Pointers into string1 and string2, just past the last characters in
4121*0b459c2cSDavid du Colombier each to consider matching. */
4122*0b459c2cSDavid du Colombier const char *end_match_1, *end_match_2;
4123*0b459c2cSDavid du Colombier
4124*0b459c2cSDavid du Colombier /* Where we are in the data, and the end of the current string. */
4125*0b459c2cSDavid du Colombier const char *d, *dend;
4126*0b459c2cSDavid du Colombier
4127*0b459c2cSDavid du Colombier /* Where we are in the pattern, and the end of the pattern. */
4128*0b459c2cSDavid du Colombier unsigned char *p = bufp->buffer;
4129*0b459c2cSDavid du Colombier register unsigned char *pend = p + bufp->used;
4130*0b459c2cSDavid du Colombier
4131*0b459c2cSDavid du Colombier /* Mark the opcode just after a start_memory, so we can test for an
4132*0b459c2cSDavid du Colombier empty subpattern when we get to the stop_memory. */
4133*0b459c2cSDavid du Colombier unsigned char *just_past_start_mem = 0;
4134*0b459c2cSDavid du Colombier
4135*0b459c2cSDavid du Colombier /* We use this to map every character in the string. */
4136*0b459c2cSDavid du Colombier RE_TRANSLATE_TYPE translate = bufp->translate;
4137*0b459c2cSDavid du Colombier
4138*0b459c2cSDavid du Colombier /* Nonzero if we have to concern multibyte character. */
4139*0b459c2cSDavid du Colombier int multibyte = bufp->multibyte;
4140*0b459c2cSDavid du Colombier
4141*0b459c2cSDavid du Colombier /* Failure point stack. Each place that can handle a failure further
4142*0b459c2cSDavid du Colombier down the line pushes a failure point on this stack. It consists of
4143*0b459c2cSDavid du Colombier restart, regend, and reg_info for all registers corresponding to
4144*0b459c2cSDavid du Colombier the subexpressions we're currently inside, plus the number of such
4145*0b459c2cSDavid du Colombier registers, and, finally, two char *'s. The first char * is where
4146*0b459c2cSDavid du Colombier to resume scanning the pattern; the second one is where to resume
4147*0b459c2cSDavid du Colombier scanning the strings. If the latter is zero, the failure point is
4148*0b459c2cSDavid du Colombier a ``dummy''; if a failure happens and the failure point is a dummy,
4149*0b459c2cSDavid du Colombier it gets discarded and the next next one is tried. */
4150*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4151*0b459c2cSDavid du Colombier fail_stack_type fail_stack;
4152*0b459c2cSDavid du Colombier #endif
4153*0b459c2cSDavid du Colombier #ifdef DEBUG
4154*0b459c2cSDavid du Colombier static unsigned failure_id = 0;
4155*0b459c2cSDavid du Colombier unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4156*0b459c2cSDavid du Colombier #endif
4157*0b459c2cSDavid du Colombier
4158*0b459c2cSDavid du Colombier /* This holds the pointer to the failure stack, when
4159*0b459c2cSDavid du Colombier it is allocated relocatably. */
4160*0b459c2cSDavid du Colombier fail_stack_elt_t *failure_stack_ptr;
4161*0b459c2cSDavid du Colombier
4162*0b459c2cSDavid du Colombier /* We fill all the registers internally, independent of what we
4163*0b459c2cSDavid du Colombier return, for use in backreferences. The number here includes
4164*0b459c2cSDavid du Colombier an element for register zero. */
4165*0b459c2cSDavid du Colombier unsigned num_regs = bufp->re_nsub + 1;
4166*0b459c2cSDavid du Colombier
4167*0b459c2cSDavid du Colombier /* The currently active registers. */
4168*0b459c2cSDavid du Colombier unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4169*0b459c2cSDavid du Colombier unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4170*0b459c2cSDavid du Colombier
4171*0b459c2cSDavid du Colombier /* Information on the contents of registers. These are pointers into
4172*0b459c2cSDavid du Colombier the input strings; they record just what was matched (on this
4173*0b459c2cSDavid du Colombier attempt) by a subexpression part of the pattern, that is, the
4174*0b459c2cSDavid du Colombier regnum-th regstart pointer points to where in the pattern we began
4175*0b459c2cSDavid du Colombier matching and the regnum-th regend points to right after where we
4176*0b459c2cSDavid du Colombier stopped matching the regnum-th subexpression. (The zeroth register
4177*0b459c2cSDavid du Colombier keeps track of what the whole pattern matches.) */
4178*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4179*0b459c2cSDavid du Colombier const char **regstart, **regend;
4180*0b459c2cSDavid du Colombier #endif
4181*0b459c2cSDavid du Colombier
4182*0b459c2cSDavid du Colombier /* If a group that's operated upon by a repetition operator fails to
4183*0b459c2cSDavid du Colombier match anything, then the register for its start will need to be
4184*0b459c2cSDavid du Colombier restored because it will have been set to wherever in the string we
4185*0b459c2cSDavid du Colombier are when we last see its open-group operator. Similarly for a
4186*0b459c2cSDavid du Colombier register's end. */
4187*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4188*0b459c2cSDavid du Colombier const char **old_regstart, **old_regend;
4189*0b459c2cSDavid du Colombier #endif
4190*0b459c2cSDavid du Colombier
4191*0b459c2cSDavid du Colombier /* The is_active field of reg_info helps us keep track of which (possibly
4192*0b459c2cSDavid du Colombier nested) subexpressions we are currently in. The matched_something
4193*0b459c2cSDavid du Colombier field of reg_info[reg_num] helps us tell whether or not we have
4194*0b459c2cSDavid du Colombier matched any of the pattern so far this time through the reg_num-th
4195*0b459c2cSDavid du Colombier subexpression. These two fields get reset each time through any
4196*0b459c2cSDavid du Colombier loop their register is in. */
4197*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4198*0b459c2cSDavid du Colombier register_info_type *reg_info;
4199*0b459c2cSDavid du Colombier #endif
4200*0b459c2cSDavid du Colombier
4201*0b459c2cSDavid du Colombier /* The following record the register info as found in the above
4202*0b459c2cSDavid du Colombier variables when we find a match better than any we've seen before.
4203*0b459c2cSDavid du Colombier This happens as we backtrack through the failure points, which in
4204*0b459c2cSDavid du Colombier turn happens only if we have not yet matched the entire string. */
4205*0b459c2cSDavid du Colombier unsigned best_regs_set = false;
4206*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4207*0b459c2cSDavid du Colombier const char **best_regstart, **best_regend;
4208*0b459c2cSDavid du Colombier #endif
4209*0b459c2cSDavid du Colombier
4210*0b459c2cSDavid du Colombier /* Logically, this is `best_regend[0]'. But we don't want to have to
4211*0b459c2cSDavid du Colombier allocate space for that if we're not allocating space for anything
4212*0b459c2cSDavid du Colombier else (see below). Also, we never need info about register 0 for
4213*0b459c2cSDavid du Colombier any of the other register vectors, and it seems rather a kludge to
4214*0b459c2cSDavid du Colombier treat `best_regend' differently than the rest. So we keep track of
4215*0b459c2cSDavid du Colombier the end of the best match so far in a separate variable. We
4216*0b459c2cSDavid du Colombier initialize this to NULL so that when we backtrack the first time
4217*0b459c2cSDavid du Colombier and need to test it, it's not garbage. */
4218*0b459c2cSDavid du Colombier const char *match_end = NULL;
4219*0b459c2cSDavid du Colombier
4220*0b459c2cSDavid du Colombier /* This helps SET_REGS_MATCHED avoid doing redundant work. */
4221*0b459c2cSDavid du Colombier int set_regs_matched_done = 0;
4222*0b459c2cSDavid du Colombier
4223*0b459c2cSDavid du Colombier /* Used when we pop values we don't care about. */
4224*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4225*0b459c2cSDavid du Colombier const char **reg_dummy;
4226*0b459c2cSDavid du Colombier register_info_type *reg_info_dummy;
4227*0b459c2cSDavid du Colombier #endif
4228*0b459c2cSDavid du Colombier
4229*0b459c2cSDavid du Colombier #ifdef DEBUG
4230*0b459c2cSDavid du Colombier /* Counts the total number of registers pushed. */
4231*0b459c2cSDavid du Colombier unsigned num_regs_pushed = 0;
4232*0b459c2cSDavid du Colombier #endif
4233*0b459c2cSDavid du Colombier
4234*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4235*0b459c2cSDavid du Colombier
4236*0b459c2cSDavid du Colombier INIT_FAIL_STACK ();
4237*0b459c2cSDavid du Colombier
4238*0b459c2cSDavid du Colombier #ifdef MATCH_MAY_ALLOCATE
4239*0b459c2cSDavid du Colombier /* Do not bother to initialize all the register variables if there are
4240*0b459c2cSDavid du Colombier no groups in the pattern, as it takes a fair amount of time. If
4241*0b459c2cSDavid du Colombier there are groups, we include space for register 0 (the whole
4242*0b459c2cSDavid du Colombier pattern), even though we never use it, since it simplifies the
4243*0b459c2cSDavid du Colombier array indexing. We should fix this. */
4244*0b459c2cSDavid du Colombier if (bufp->re_nsub)
4245*0b459c2cSDavid du Colombier {
4246*0b459c2cSDavid du Colombier regstart = REGEX_TALLOC (num_regs, const char *);
4247*0b459c2cSDavid du Colombier regend = REGEX_TALLOC (num_regs, const char *);
4248*0b459c2cSDavid du Colombier old_regstart = REGEX_TALLOC (num_regs, const char *);
4249*0b459c2cSDavid du Colombier old_regend = REGEX_TALLOC (num_regs, const char *);
4250*0b459c2cSDavid du Colombier best_regstart = REGEX_TALLOC (num_regs, const char *);
4251*0b459c2cSDavid du Colombier best_regend = REGEX_TALLOC (num_regs, const char *);
4252*0b459c2cSDavid du Colombier reg_info = REGEX_TALLOC (num_regs, register_info_type);
4253*0b459c2cSDavid du Colombier reg_dummy = REGEX_TALLOC (num_regs, const char *);
4254*0b459c2cSDavid du Colombier reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4255*0b459c2cSDavid du Colombier
4256*0b459c2cSDavid du Colombier if (!(regstart && regend && old_regstart && old_regend && reg_info
4257*0b459c2cSDavid du Colombier && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4258*0b459c2cSDavid du Colombier {
4259*0b459c2cSDavid du Colombier FREE_VARIABLES ();
4260*0b459c2cSDavid du Colombier return -2;
4261*0b459c2cSDavid du Colombier }
4262*0b459c2cSDavid du Colombier }
4263*0b459c2cSDavid du Colombier else
4264*0b459c2cSDavid du Colombier {
4265*0b459c2cSDavid du Colombier /* We must initialize all our variables to NULL, so that
4266*0b459c2cSDavid du Colombier `FREE_VARIABLES' doesn't try to free them. */
4267*0b459c2cSDavid du Colombier regstart = regend = old_regstart = old_regend = best_regstart
4268*0b459c2cSDavid du Colombier = best_regend = reg_dummy = NULL;
4269*0b459c2cSDavid du Colombier reg_info = reg_info_dummy = (register_info_type *) NULL;
4270*0b459c2cSDavid du Colombier }
4271*0b459c2cSDavid du Colombier #endif /* MATCH_MAY_ALLOCATE */
4272*0b459c2cSDavid du Colombier
4273*0b459c2cSDavid du Colombier /* The starting position is bogus. */
4274*0b459c2cSDavid du Colombier if (pos < 0 || pos > size1 + size2)
4275*0b459c2cSDavid du Colombier {
4276*0b459c2cSDavid du Colombier FREE_VARIABLES ();
4277*0b459c2cSDavid du Colombier return -1;
4278*0b459c2cSDavid du Colombier }
4279*0b459c2cSDavid du Colombier
4280*0b459c2cSDavid du Colombier /* Initialize subexpression text positions to -1 to mark ones that no
4281*0b459c2cSDavid du Colombier start_memory/stop_memory has been seen for. Also initialize the
4282*0b459c2cSDavid du Colombier register information struct. */
4283*0b459c2cSDavid du Colombier for (mcnt = 1; mcnt < num_regs; mcnt++)
4284*0b459c2cSDavid du Colombier {
4285*0b459c2cSDavid du Colombier regstart[mcnt] = regend[mcnt]
4286*0b459c2cSDavid du Colombier = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4287*0b459c2cSDavid du Colombier
4288*0b459c2cSDavid du Colombier REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4289*0b459c2cSDavid du Colombier IS_ACTIVE (reg_info[mcnt]) = 0;
4290*0b459c2cSDavid du Colombier MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4291*0b459c2cSDavid du Colombier EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4292*0b459c2cSDavid du Colombier }
4293*0b459c2cSDavid du Colombier
4294*0b459c2cSDavid du Colombier /* We move `string1' into `string2' if the latter's empty -- but not if
4295*0b459c2cSDavid du Colombier `string1' is null. */
4296*0b459c2cSDavid du Colombier if (size2 == 0 && string1 != NULL)
4297*0b459c2cSDavid du Colombier {
4298*0b459c2cSDavid du Colombier string2 = string1;
4299*0b459c2cSDavid du Colombier size2 = size1;
4300*0b459c2cSDavid du Colombier string1 = 0;
4301*0b459c2cSDavid du Colombier size1 = 0;
4302*0b459c2cSDavid du Colombier }
4303*0b459c2cSDavid du Colombier end1 = string1 + size1;
4304*0b459c2cSDavid du Colombier end2 = string2 + size2;
4305*0b459c2cSDavid du Colombier
4306*0b459c2cSDavid du Colombier /* Compute where to stop matching, within the two strings. */
4307*0b459c2cSDavid du Colombier if (stop <= size1)
4308*0b459c2cSDavid du Colombier {
4309*0b459c2cSDavid du Colombier end_match_1 = string1 + stop;
4310*0b459c2cSDavid du Colombier end_match_2 = string2;
4311*0b459c2cSDavid du Colombier }
4312*0b459c2cSDavid du Colombier else
4313*0b459c2cSDavid du Colombier {
4314*0b459c2cSDavid du Colombier end_match_1 = end1;
4315*0b459c2cSDavid du Colombier end_match_2 = string2 + stop - size1;
4316*0b459c2cSDavid du Colombier }
4317*0b459c2cSDavid du Colombier
4318*0b459c2cSDavid du Colombier /* `p' scans through the pattern as `d' scans through the data.
4319*0b459c2cSDavid du Colombier `dend' is the end of the input string that `d' points within. `d'
4320*0b459c2cSDavid du Colombier is advanced into the following input string whenever necessary, but
4321*0b459c2cSDavid du Colombier this happens before fetching; therefore, at the beginning of the
4322*0b459c2cSDavid du Colombier loop, `d' can be pointing at the end of a string, but it cannot
4323*0b459c2cSDavid du Colombier equal `string2'. */
4324*0b459c2cSDavid du Colombier if (size1 > 0 && pos <= size1)
4325*0b459c2cSDavid du Colombier {
4326*0b459c2cSDavid du Colombier d = string1 + pos;
4327*0b459c2cSDavid du Colombier dend = end_match_1;
4328*0b459c2cSDavid du Colombier }
4329*0b459c2cSDavid du Colombier else
4330*0b459c2cSDavid du Colombier {
4331*0b459c2cSDavid du Colombier d = string2 + pos - size1;
4332*0b459c2cSDavid du Colombier dend = end_match_2;
4333*0b459c2cSDavid du Colombier }
4334*0b459c2cSDavid du Colombier
4335*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("The compiled pattern is: ");
4336*0b459c2cSDavid du Colombier DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4337*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("The string to match is: `");
4338*0b459c2cSDavid du Colombier DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4339*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("'\n");
4340*0b459c2cSDavid du Colombier
4341*0b459c2cSDavid du Colombier /* This loops over pattern commands. It exits by returning from the
4342*0b459c2cSDavid du Colombier function if the match is complete, or it drops through if the match
4343*0b459c2cSDavid du Colombier fails at this starting point in the input data. */
4344*0b459c2cSDavid du Colombier for (;;)
4345*0b459c2cSDavid du Colombier {
4346*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("\n0x%x: ", p);
4347*0b459c2cSDavid du Colombier
4348*0b459c2cSDavid du Colombier if (p == pend)
4349*0b459c2cSDavid du Colombier { /* End of pattern means we might have succeeded. */
4350*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("end of pattern ... ");
4351*0b459c2cSDavid du Colombier
4352*0b459c2cSDavid du Colombier /* If we haven't matched the entire string, and we want the
4353*0b459c2cSDavid du Colombier longest match, try backtracking. */
4354*0b459c2cSDavid du Colombier if (d != end_match_2)
4355*0b459c2cSDavid du Colombier {
4356*0b459c2cSDavid du Colombier /* 1 if this match ends in the same string (string1 or string2)
4357*0b459c2cSDavid du Colombier as the best previous match. */
4358*0b459c2cSDavid du Colombier boolean same_str_p = (FIRST_STRING_P (match_end)
4359*0b459c2cSDavid du Colombier == MATCHING_IN_FIRST_STRING);
4360*0b459c2cSDavid du Colombier /* 1 if this match is the best seen so far. */
4361*0b459c2cSDavid du Colombier boolean best_match_p;
4362*0b459c2cSDavid du Colombier
4363*0b459c2cSDavid du Colombier /* AIX compiler got confused when this was combined
4364*0b459c2cSDavid du Colombier with the previous declaration. */
4365*0b459c2cSDavid du Colombier if (same_str_p)
4366*0b459c2cSDavid du Colombier best_match_p = d > match_end;
4367*0b459c2cSDavid du Colombier else
4368*0b459c2cSDavid du Colombier best_match_p = !MATCHING_IN_FIRST_STRING;
4369*0b459c2cSDavid du Colombier
4370*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("backtracking.\n");
4371*0b459c2cSDavid du Colombier
4372*0b459c2cSDavid du Colombier if (!FAIL_STACK_EMPTY ())
4373*0b459c2cSDavid du Colombier { /* More failure points to try. */
4374*0b459c2cSDavid du Colombier
4375*0b459c2cSDavid du Colombier /* If exceeds best match so far, save it. */
4376*0b459c2cSDavid du Colombier if (!best_regs_set || best_match_p)
4377*0b459c2cSDavid du Colombier {
4378*0b459c2cSDavid du Colombier best_regs_set = true;
4379*0b459c2cSDavid du Colombier match_end = d;
4380*0b459c2cSDavid du Colombier
4381*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4382*0b459c2cSDavid du Colombier
4383*0b459c2cSDavid du Colombier for (mcnt = 1; mcnt < num_regs; mcnt++)
4384*0b459c2cSDavid du Colombier {
4385*0b459c2cSDavid du Colombier best_regstart[mcnt] = regstart[mcnt];
4386*0b459c2cSDavid du Colombier best_regend[mcnt] = regend[mcnt];
4387*0b459c2cSDavid du Colombier }
4388*0b459c2cSDavid du Colombier }
4389*0b459c2cSDavid du Colombier goto fail;
4390*0b459c2cSDavid du Colombier }
4391*0b459c2cSDavid du Colombier
4392*0b459c2cSDavid du Colombier /* If no failure points, don't restore garbage. And if
4393*0b459c2cSDavid du Colombier last match is real best match, don't restore second
4394*0b459c2cSDavid du Colombier best one. */
4395*0b459c2cSDavid du Colombier else if (best_regs_set && !best_match_p)
4396*0b459c2cSDavid du Colombier {
4397*0b459c2cSDavid du Colombier restore_best_regs:
4398*0b459c2cSDavid du Colombier /* Restore best match. It may happen that `dend ==
4399*0b459c2cSDavid du Colombier end_match_1' while the restored d is in string2.
4400*0b459c2cSDavid du Colombier For example, the pattern `x.*y.*z' against the
4401*0b459c2cSDavid du Colombier strings `x-' and `y-z-', if the two strings are
4402*0b459c2cSDavid du Colombier not consecutive in memory. */
4403*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("Restoring best registers.\n");
4404*0b459c2cSDavid du Colombier
4405*0b459c2cSDavid du Colombier d = match_end;
4406*0b459c2cSDavid du Colombier dend = ((d >= string1 && d <= end1)
4407*0b459c2cSDavid du Colombier ? end_match_1 : end_match_2);
4408*0b459c2cSDavid du Colombier
4409*0b459c2cSDavid du Colombier for (mcnt = 1; mcnt < num_regs; mcnt++)
4410*0b459c2cSDavid du Colombier {
4411*0b459c2cSDavid du Colombier regstart[mcnt] = best_regstart[mcnt];
4412*0b459c2cSDavid du Colombier regend[mcnt] = best_regend[mcnt];
4413*0b459c2cSDavid du Colombier }
4414*0b459c2cSDavid du Colombier }
4415*0b459c2cSDavid du Colombier } /* d != end_match_2 */
4416*0b459c2cSDavid du Colombier
4417*0b459c2cSDavid du Colombier succeed_label:
4418*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("Accepting match.\n");
4419*0b459c2cSDavid du Colombier
4420*0b459c2cSDavid du Colombier /* If caller wants register contents data back, do it. */
4421*0b459c2cSDavid du Colombier if (regs && !bufp->no_sub)
4422*0b459c2cSDavid du Colombier {
4423*0b459c2cSDavid du Colombier /* Have the register data arrays been allocated? */
4424*0b459c2cSDavid du Colombier if (bufp->regs_allocated == REGS_UNALLOCATED)
4425*0b459c2cSDavid du Colombier { /* No. So allocate them with malloc. We need one
4426*0b459c2cSDavid du Colombier extra element beyond `num_regs' for the `-1' marker
4427*0b459c2cSDavid du Colombier GNU code uses. */
4428*0b459c2cSDavid du Colombier regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4429*0b459c2cSDavid du Colombier regs->start = TALLOC (regs->num_regs, regoff_t);
4430*0b459c2cSDavid du Colombier regs->end = TALLOC (regs->num_regs, regoff_t);
4431*0b459c2cSDavid du Colombier if (regs->start == NULL || regs->end == NULL)
4432*0b459c2cSDavid du Colombier {
4433*0b459c2cSDavid du Colombier FREE_VARIABLES ();
4434*0b459c2cSDavid du Colombier return -2;
4435*0b459c2cSDavid du Colombier }
4436*0b459c2cSDavid du Colombier bufp->regs_allocated = REGS_REALLOCATE;
4437*0b459c2cSDavid du Colombier }
4438*0b459c2cSDavid du Colombier else if (bufp->regs_allocated == REGS_REALLOCATE)
4439*0b459c2cSDavid du Colombier { /* Yes. If we need more elements than were already
4440*0b459c2cSDavid du Colombier allocated, reallocate them. If we need fewer, just
4441*0b459c2cSDavid du Colombier leave it alone. */
4442*0b459c2cSDavid du Colombier if (regs->num_regs < num_regs + 1)
4443*0b459c2cSDavid du Colombier {
4444*0b459c2cSDavid du Colombier regs->num_regs = num_regs + 1;
4445*0b459c2cSDavid du Colombier RETALLOC (regs->start, regs->num_regs, regoff_t);
4446*0b459c2cSDavid du Colombier RETALLOC (regs->end, regs->num_regs, regoff_t);
4447*0b459c2cSDavid du Colombier if (regs->start == NULL || regs->end == NULL)
4448*0b459c2cSDavid du Colombier {
4449*0b459c2cSDavid du Colombier FREE_VARIABLES ();
4450*0b459c2cSDavid du Colombier return -2;
4451*0b459c2cSDavid du Colombier }
4452*0b459c2cSDavid du Colombier }
4453*0b459c2cSDavid du Colombier }
4454*0b459c2cSDavid du Colombier else
4455*0b459c2cSDavid du Colombier {
4456*0b459c2cSDavid du Colombier /* These braces fend off a "empty body in an else-statement"
4457*0b459c2cSDavid du Colombier warning under GCC when assert expands to nothing. */
4458*0b459c2cSDavid du Colombier assert (bufp->regs_allocated == REGS_FIXED);
4459*0b459c2cSDavid du Colombier }
4460*0b459c2cSDavid du Colombier
4461*0b459c2cSDavid du Colombier /* Convert the pointer data in `regstart' and `regend' to
4462*0b459c2cSDavid du Colombier indices. Register zero has to be set differently,
4463*0b459c2cSDavid du Colombier since we haven't kept track of any info for it. */
4464*0b459c2cSDavid du Colombier if (regs->num_regs > 0)
4465*0b459c2cSDavid du Colombier {
4466*0b459c2cSDavid du Colombier regs->start[0] = pos;
4467*0b459c2cSDavid du Colombier regs->end[0] = (MATCHING_IN_FIRST_STRING
4468*0b459c2cSDavid du Colombier ? ((regoff_t) (d - string1))
4469*0b459c2cSDavid du Colombier : ((regoff_t) (d - string2 + size1)));
4470*0b459c2cSDavid du Colombier }
4471*0b459c2cSDavid du Colombier
4472*0b459c2cSDavid du Colombier /* Go through the first `min (num_regs, regs->num_regs)'
4473*0b459c2cSDavid du Colombier registers, since that is all we initialized. */
4474*0b459c2cSDavid du Colombier for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4475*0b459c2cSDavid du Colombier {
4476*0b459c2cSDavid du Colombier if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4477*0b459c2cSDavid du Colombier regs->start[mcnt] = regs->end[mcnt] = -1;
4478*0b459c2cSDavid du Colombier else
4479*0b459c2cSDavid du Colombier {
4480*0b459c2cSDavid du Colombier regs->start[mcnt]
4481*0b459c2cSDavid du Colombier = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4482*0b459c2cSDavid du Colombier regs->end[mcnt]
4483*0b459c2cSDavid du Colombier = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4484*0b459c2cSDavid du Colombier }
4485*0b459c2cSDavid du Colombier }
4486*0b459c2cSDavid du Colombier
4487*0b459c2cSDavid du Colombier /* If the regs structure we return has more elements than
4488*0b459c2cSDavid du Colombier were in the pattern, set the extra elements to -1. If
4489*0b459c2cSDavid du Colombier we (re)allocated the registers, this is the case,
4490*0b459c2cSDavid du Colombier because we always allocate enough to have at least one
4491*0b459c2cSDavid du Colombier -1 at the end. */
4492*0b459c2cSDavid du Colombier for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
4493*0b459c2cSDavid du Colombier regs->start[mcnt] = regs->end[mcnt] = -1;
4494*0b459c2cSDavid du Colombier } /* regs && !bufp->no_sub */
4495*0b459c2cSDavid du Colombier
4496*0b459c2cSDavid du Colombier DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4497*0b459c2cSDavid du Colombier nfailure_points_pushed, nfailure_points_popped,
4498*0b459c2cSDavid du Colombier nfailure_points_pushed - nfailure_points_popped);
4499*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4500*0b459c2cSDavid du Colombier
4501*0b459c2cSDavid du Colombier mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4502*0b459c2cSDavid du Colombier ? string1
4503*0b459c2cSDavid du Colombier : string2 - size1);
4504*0b459c2cSDavid du Colombier
4505*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4506*0b459c2cSDavid du Colombier
4507*0b459c2cSDavid du Colombier FREE_VARIABLES ();
4508*0b459c2cSDavid du Colombier return mcnt;
4509*0b459c2cSDavid du Colombier }
4510*0b459c2cSDavid du Colombier
4511*0b459c2cSDavid du Colombier /* Otherwise match next pattern command. */
4512*0b459c2cSDavid du Colombier switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4513*0b459c2cSDavid du Colombier {
4514*0b459c2cSDavid du Colombier /* Ignore these. Used to ignore the n of succeed_n's which
4515*0b459c2cSDavid du Colombier currently have n == 0. */
4516*0b459c2cSDavid du Colombier case no_op:
4517*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING no_op.\n");
4518*0b459c2cSDavid du Colombier break;
4519*0b459c2cSDavid du Colombier
4520*0b459c2cSDavid du Colombier case succeed:
4521*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING succeed.\n");
4522*0b459c2cSDavid du Colombier goto succeed_label;
4523*0b459c2cSDavid du Colombier
4524*0b459c2cSDavid du Colombier /* Match the next n pattern characters exactly. The following
4525*0b459c2cSDavid du Colombier byte in the pattern defines n, and the n bytes after that
4526*0b459c2cSDavid du Colombier are the characters to match. */
4527*0b459c2cSDavid du Colombier case exactn:
4528*0b459c2cSDavid du Colombier mcnt = *p++;
4529*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4530*0b459c2cSDavid du Colombier
4531*0b459c2cSDavid du Colombier /* This is written out as an if-else so we don't waste time
4532*0b459c2cSDavid du Colombier testing `translate' inside the loop. */
4533*0b459c2cSDavid du Colombier if (RE_TRANSLATE_P (translate))
4534*0b459c2cSDavid du Colombier {
4535*0b459c2cSDavid du Colombier #ifdef emacs
4536*0b459c2cSDavid du Colombier if (multibyte)
4537*0b459c2cSDavid du Colombier do
4538*0b459c2cSDavid du Colombier {
4539*0b459c2cSDavid du Colombier int pat_charlen, buf_charlen;
4540*0b459c2cSDavid du Colombier unsigned int pat_ch, buf_ch;
4541*0b459c2cSDavid du Colombier
4542*0b459c2cSDavid du Colombier PREFETCH ();
4543*0b459c2cSDavid du Colombier pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4544*0b459c2cSDavid du Colombier buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4545*0b459c2cSDavid du Colombier
4546*0b459c2cSDavid du Colombier if (RE_TRANSLATE (translate, buf_ch)
4547*0b459c2cSDavid du Colombier != pat_ch)
4548*0b459c2cSDavid du Colombier goto fail;
4549*0b459c2cSDavid du Colombier
4550*0b459c2cSDavid du Colombier p += pat_charlen;
4551*0b459c2cSDavid du Colombier d += buf_charlen;
4552*0b459c2cSDavid du Colombier mcnt -= pat_charlen;
4553*0b459c2cSDavid du Colombier }
4554*0b459c2cSDavid du Colombier while (mcnt > 0);
4555*0b459c2cSDavid du Colombier else
4556*0b459c2cSDavid du Colombier #endif /* not emacs */
4557*0b459c2cSDavid du Colombier do
4558*0b459c2cSDavid du Colombier {
4559*0b459c2cSDavid du Colombier PREFETCH ();
4560*0b459c2cSDavid du Colombier if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
4561*0b459c2cSDavid du Colombier != (unsigned char) *p++)
4562*0b459c2cSDavid du Colombier goto fail;
4563*0b459c2cSDavid du Colombier d++;
4564*0b459c2cSDavid du Colombier }
4565*0b459c2cSDavid du Colombier while (--mcnt);
4566*0b459c2cSDavid du Colombier }
4567*0b459c2cSDavid du Colombier else
4568*0b459c2cSDavid du Colombier {
4569*0b459c2cSDavid du Colombier do
4570*0b459c2cSDavid du Colombier {
4571*0b459c2cSDavid du Colombier PREFETCH ();
4572*0b459c2cSDavid du Colombier if (*d++ != (char) *p++) goto fail;
4573*0b459c2cSDavid du Colombier }
4574*0b459c2cSDavid du Colombier while (--mcnt);
4575*0b459c2cSDavid du Colombier }
4576*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
4577*0b459c2cSDavid du Colombier break;
4578*0b459c2cSDavid du Colombier
4579*0b459c2cSDavid du Colombier
4580*0b459c2cSDavid du Colombier /* Match any character except possibly a newline or a null. */
4581*0b459c2cSDavid du Colombier case anychar:
4582*0b459c2cSDavid du Colombier {
4583*0b459c2cSDavid du Colombier int buf_charlen;
4584*0b459c2cSDavid du Colombier unsigned int buf_ch;
4585*0b459c2cSDavid du Colombier
4586*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING anychar.\n");
4587*0b459c2cSDavid du Colombier
4588*0b459c2cSDavid du Colombier PREFETCH ();
4589*0b459c2cSDavid du Colombier
4590*0b459c2cSDavid du Colombier #ifdef emacs
4591*0b459c2cSDavid du Colombier if (multibyte)
4592*0b459c2cSDavid du Colombier buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4593*0b459c2cSDavid du Colombier else
4594*0b459c2cSDavid du Colombier #endif /* not emacs */
4595*0b459c2cSDavid du Colombier {
4596*0b459c2cSDavid du Colombier buf_ch = (unsigned char) *d;
4597*0b459c2cSDavid du Colombier buf_charlen = 1;
4598*0b459c2cSDavid du Colombier }
4599*0b459c2cSDavid du Colombier
4600*0b459c2cSDavid du Colombier buf_ch = TRANSLATE (buf_ch);
4601*0b459c2cSDavid du Colombier
4602*0b459c2cSDavid du Colombier if ((!(bufp->syntax & RE_DOT_NEWLINE)
4603*0b459c2cSDavid du Colombier && buf_ch == '\n')
4604*0b459c2cSDavid du Colombier || ((bufp->syntax & RE_DOT_NOT_NULL)
4605*0b459c2cSDavid du Colombier && buf_ch == '\000'))
4606*0b459c2cSDavid du Colombier goto fail;
4607*0b459c2cSDavid du Colombier
4608*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
4609*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4610*0b459c2cSDavid du Colombier d += buf_charlen;
4611*0b459c2cSDavid du Colombier }
4612*0b459c2cSDavid du Colombier break;
4613*0b459c2cSDavid du Colombier
4614*0b459c2cSDavid du Colombier
4615*0b459c2cSDavid du Colombier case charset:
4616*0b459c2cSDavid du Colombier case charset_not:
4617*0b459c2cSDavid du Colombier {
4618*0b459c2cSDavid du Colombier register unsigned int c;
4619*0b459c2cSDavid du Colombier boolean not = (re_opcode_t) *(p - 1) == charset_not;
4620*0b459c2cSDavid du Colombier int len;
4621*0b459c2cSDavid du Colombier
4622*0b459c2cSDavid du Colombier /* Start of actual range_table, or end of bitmap if there is no
4623*0b459c2cSDavid du Colombier range table. */
4624*0b459c2cSDavid du Colombier unsigned char *range_table;
4625*0b459c2cSDavid du Colombier
4626*0b459c2cSDavid du Colombier /* Nonzero if there is range table. */
4627*0b459c2cSDavid du Colombier int range_table_exists;
4628*0b459c2cSDavid du Colombier
4629*0b459c2cSDavid du Colombier /* Number of ranges of range table. Not in bytes. */
4630*0b459c2cSDavid du Colombier int count;
4631*0b459c2cSDavid du Colombier
4632*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4633*0b459c2cSDavid du Colombier
4634*0b459c2cSDavid du Colombier PREFETCH ();
4635*0b459c2cSDavid du Colombier c = (unsigned char) *d;
4636*0b459c2cSDavid du Colombier
4637*0b459c2cSDavid du Colombier range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */
4638*0b459c2cSDavid du Colombier range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4639*0b459c2cSDavid du Colombier if (range_table_exists)
4640*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (count, range_table);
4641*0b459c2cSDavid du Colombier else
4642*0b459c2cSDavid du Colombier count = 0;
4643*0b459c2cSDavid du Colombier
4644*0b459c2cSDavid du Colombier if (multibyte && BASE_LEADING_CODE_P (c))
4645*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
4646*0b459c2cSDavid du Colombier
4647*0b459c2cSDavid du Colombier if (SINGLE_BYTE_CHAR_P (c))
4648*0b459c2cSDavid du Colombier { /* Lookup bitmap. */
4649*0b459c2cSDavid du Colombier c = TRANSLATE (c); /* The character to match. */
4650*0b459c2cSDavid du Colombier len = 1;
4651*0b459c2cSDavid du Colombier
4652*0b459c2cSDavid du Colombier /* Cast to `unsigned' instead of `unsigned char' in
4653*0b459c2cSDavid du Colombier case the bit list is a full 32 bytes long. */
4654*0b459c2cSDavid du Colombier if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
4655*0b459c2cSDavid du Colombier && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4656*0b459c2cSDavid du Colombier not = !not;
4657*0b459c2cSDavid du Colombier }
4658*0b459c2cSDavid du Colombier else if (range_table_exists)
4659*0b459c2cSDavid du Colombier CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
4660*0b459c2cSDavid du Colombier
4661*0b459c2cSDavid du Colombier p = CHARSET_RANGE_TABLE_END (range_table, count);
4662*0b459c2cSDavid du Colombier
4663*0b459c2cSDavid du Colombier if (!not) goto fail;
4664*0b459c2cSDavid du Colombier
4665*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
4666*0b459c2cSDavid du Colombier d += len;
4667*0b459c2cSDavid du Colombier break;
4668*0b459c2cSDavid du Colombier }
4669*0b459c2cSDavid du Colombier
4670*0b459c2cSDavid du Colombier
4671*0b459c2cSDavid du Colombier /* The beginning of a group is represented by start_memory.
4672*0b459c2cSDavid du Colombier The arguments are the register number in the next byte, and the
4673*0b459c2cSDavid du Colombier number of groups inner to this one in the next. The text
4674*0b459c2cSDavid du Colombier matched within the group is recorded (in the internal
4675*0b459c2cSDavid du Colombier registers data structure) under the register number. */
4676*0b459c2cSDavid du Colombier case start_memory:
4677*0b459c2cSDavid du Colombier DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4678*0b459c2cSDavid du Colombier
4679*0b459c2cSDavid du Colombier /* Find out if this group can match the empty string. */
4680*0b459c2cSDavid du Colombier p1 = p; /* To send to group_match_null_string_p. */
4681*0b459c2cSDavid du Colombier
4682*0b459c2cSDavid du Colombier if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4683*0b459c2cSDavid du Colombier REG_MATCH_NULL_STRING_P (reg_info[*p])
4684*0b459c2cSDavid du Colombier = group_match_null_string_p (&p1, pend, reg_info);
4685*0b459c2cSDavid du Colombier
4686*0b459c2cSDavid du Colombier /* Save the position in the string where we were the last time
4687*0b459c2cSDavid du Colombier we were at this open-group operator in case the group is
4688*0b459c2cSDavid du Colombier operated upon by a repetition operator, e.g., with `(a*)*b'
4689*0b459c2cSDavid du Colombier against `ab'; then we want to ignore where we are now in
4690*0b459c2cSDavid du Colombier the string in case this attempt to match fails. */
4691*0b459c2cSDavid du Colombier old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4692*0b459c2cSDavid du Colombier ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4693*0b459c2cSDavid du Colombier : regstart[*p];
4694*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" old_regstart: %d\n",
4695*0b459c2cSDavid du Colombier POINTER_TO_OFFSET (old_regstart[*p]));
4696*0b459c2cSDavid du Colombier
4697*0b459c2cSDavid du Colombier regstart[*p] = d;
4698*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4699*0b459c2cSDavid du Colombier
4700*0b459c2cSDavid du Colombier IS_ACTIVE (reg_info[*p]) = 1;
4701*0b459c2cSDavid du Colombier MATCHED_SOMETHING (reg_info[*p]) = 0;
4702*0b459c2cSDavid du Colombier
4703*0b459c2cSDavid du Colombier /* Clear this whenever we change the register activity status. */
4704*0b459c2cSDavid du Colombier set_regs_matched_done = 0;
4705*0b459c2cSDavid du Colombier
4706*0b459c2cSDavid du Colombier /* This is the new highest active register. */
4707*0b459c2cSDavid du Colombier highest_active_reg = *p;
4708*0b459c2cSDavid du Colombier
4709*0b459c2cSDavid du Colombier /* If nothing was active before, this is the new lowest active
4710*0b459c2cSDavid du Colombier register. */
4711*0b459c2cSDavid du Colombier if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4712*0b459c2cSDavid du Colombier lowest_active_reg = *p;
4713*0b459c2cSDavid du Colombier
4714*0b459c2cSDavid du Colombier /* Move past the register number and inner group count. */
4715*0b459c2cSDavid du Colombier p += 2;
4716*0b459c2cSDavid du Colombier just_past_start_mem = p;
4717*0b459c2cSDavid du Colombier
4718*0b459c2cSDavid du Colombier break;
4719*0b459c2cSDavid du Colombier
4720*0b459c2cSDavid du Colombier
4721*0b459c2cSDavid du Colombier /* The stop_memory opcode represents the end of a group. Its
4722*0b459c2cSDavid du Colombier arguments are the same as start_memory's: the register
4723*0b459c2cSDavid du Colombier number, and the number of inner groups. */
4724*0b459c2cSDavid du Colombier case stop_memory:
4725*0b459c2cSDavid du Colombier DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4726*0b459c2cSDavid du Colombier
4727*0b459c2cSDavid du Colombier /* We need to save the string position the last time we were at
4728*0b459c2cSDavid du Colombier this close-group operator in case the group is operated
4729*0b459c2cSDavid du Colombier upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4730*0b459c2cSDavid du Colombier against `aba'; then we want to ignore where we are now in
4731*0b459c2cSDavid du Colombier the string in case this attempt to match fails. */
4732*0b459c2cSDavid du Colombier old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4733*0b459c2cSDavid du Colombier ? REG_UNSET (regend[*p]) ? d : regend[*p]
4734*0b459c2cSDavid du Colombier : regend[*p];
4735*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" old_regend: %d\n",
4736*0b459c2cSDavid du Colombier POINTER_TO_OFFSET (old_regend[*p]));
4737*0b459c2cSDavid du Colombier
4738*0b459c2cSDavid du Colombier regend[*p] = d;
4739*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4740*0b459c2cSDavid du Colombier
4741*0b459c2cSDavid du Colombier /* This register isn't active anymore. */
4742*0b459c2cSDavid du Colombier IS_ACTIVE (reg_info[*p]) = 0;
4743*0b459c2cSDavid du Colombier
4744*0b459c2cSDavid du Colombier /* Clear this whenever we change the register activity status. */
4745*0b459c2cSDavid du Colombier set_regs_matched_done = 0;
4746*0b459c2cSDavid du Colombier
4747*0b459c2cSDavid du Colombier /* If this was the only register active, nothing is active
4748*0b459c2cSDavid du Colombier anymore. */
4749*0b459c2cSDavid du Colombier if (lowest_active_reg == highest_active_reg)
4750*0b459c2cSDavid du Colombier {
4751*0b459c2cSDavid du Colombier lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4752*0b459c2cSDavid du Colombier highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4753*0b459c2cSDavid du Colombier }
4754*0b459c2cSDavid du Colombier else
4755*0b459c2cSDavid du Colombier { /* We must scan for the new highest active register, since
4756*0b459c2cSDavid du Colombier it isn't necessarily one less than now: consider
4757*0b459c2cSDavid du Colombier (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4758*0b459c2cSDavid du Colombier new highest active register is 1. */
4759*0b459c2cSDavid du Colombier unsigned char r = *p - 1;
4760*0b459c2cSDavid du Colombier while (r > 0 && !IS_ACTIVE (reg_info[r]))
4761*0b459c2cSDavid du Colombier r--;
4762*0b459c2cSDavid du Colombier
4763*0b459c2cSDavid du Colombier /* If we end up at register zero, that means that we saved
4764*0b459c2cSDavid du Colombier the registers as the result of an `on_failure_jump', not
4765*0b459c2cSDavid du Colombier a `start_memory', and we jumped to past the innermost
4766*0b459c2cSDavid du Colombier `stop_memory'. For example, in ((.)*) we save
4767*0b459c2cSDavid du Colombier registers 1 and 2 as a result of the *, but when we pop
4768*0b459c2cSDavid du Colombier back to the second ), we are at the stop_memory 1.
4769*0b459c2cSDavid du Colombier Thus, nothing is active. */
4770*0b459c2cSDavid du Colombier if (r == 0)
4771*0b459c2cSDavid du Colombier {
4772*0b459c2cSDavid du Colombier lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4773*0b459c2cSDavid du Colombier highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4774*0b459c2cSDavid du Colombier }
4775*0b459c2cSDavid du Colombier else
4776*0b459c2cSDavid du Colombier highest_active_reg = r;
4777*0b459c2cSDavid du Colombier }
4778*0b459c2cSDavid du Colombier
4779*0b459c2cSDavid du Colombier /* If just failed to match something this time around with a
4780*0b459c2cSDavid du Colombier group that's operated on by a repetition operator, try to
4781*0b459c2cSDavid du Colombier force exit from the ``loop'', and restore the register
4782*0b459c2cSDavid du Colombier information for this group that we had before trying this
4783*0b459c2cSDavid du Colombier last match. */
4784*0b459c2cSDavid du Colombier if ((!MATCHED_SOMETHING (reg_info[*p])
4785*0b459c2cSDavid du Colombier || just_past_start_mem == p - 1)
4786*0b459c2cSDavid du Colombier && (p + 2) < pend)
4787*0b459c2cSDavid du Colombier {
4788*0b459c2cSDavid du Colombier boolean is_a_jump_n = false;
4789*0b459c2cSDavid du Colombier
4790*0b459c2cSDavid du Colombier p1 = p + 2;
4791*0b459c2cSDavid du Colombier mcnt = 0;
4792*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p1++)
4793*0b459c2cSDavid du Colombier {
4794*0b459c2cSDavid du Colombier case jump_n:
4795*0b459c2cSDavid du Colombier is_a_jump_n = true;
4796*0b459c2cSDavid du Colombier case pop_failure_jump:
4797*0b459c2cSDavid du Colombier case maybe_pop_jump:
4798*0b459c2cSDavid du Colombier case jump:
4799*0b459c2cSDavid du Colombier case dummy_failure_jump:
4800*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4801*0b459c2cSDavid du Colombier if (is_a_jump_n)
4802*0b459c2cSDavid du Colombier p1 += 2;
4803*0b459c2cSDavid du Colombier break;
4804*0b459c2cSDavid du Colombier
4805*0b459c2cSDavid du Colombier default:
4806*0b459c2cSDavid du Colombier /* do nothing */ ;
4807*0b459c2cSDavid du Colombier }
4808*0b459c2cSDavid du Colombier p1 += mcnt;
4809*0b459c2cSDavid du Colombier
4810*0b459c2cSDavid du Colombier /* If the next operation is a jump backwards in the pattern
4811*0b459c2cSDavid du Colombier to an on_failure_jump right before the start_memory
4812*0b459c2cSDavid du Colombier corresponding to this stop_memory, exit from the loop
4813*0b459c2cSDavid du Colombier by forcing a failure after pushing on the stack the
4814*0b459c2cSDavid du Colombier on_failure_jump's jump in the pattern, and d. */
4815*0b459c2cSDavid du Colombier if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4816*0b459c2cSDavid du Colombier && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4817*0b459c2cSDavid du Colombier {
4818*0b459c2cSDavid du Colombier /* If this group ever matched anything, then restore
4819*0b459c2cSDavid du Colombier what its registers were before trying this last
4820*0b459c2cSDavid du Colombier failed match, e.g., with `(a*)*b' against `ab' for
4821*0b459c2cSDavid du Colombier regstart[1], and, e.g., with `((a*)*(b*)*)*'
4822*0b459c2cSDavid du Colombier against `aba' for regend[3].
4823*0b459c2cSDavid du Colombier
4824*0b459c2cSDavid du Colombier Also restore the registers for inner groups for,
4825*0b459c2cSDavid du Colombier e.g., `((a*)(b*))*' against `aba' (register 3 would
4826*0b459c2cSDavid du Colombier otherwise get trashed). */
4827*0b459c2cSDavid du Colombier
4828*0b459c2cSDavid du Colombier if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4829*0b459c2cSDavid du Colombier {
4830*0b459c2cSDavid du Colombier unsigned r;
4831*0b459c2cSDavid du Colombier
4832*0b459c2cSDavid du Colombier EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4833*0b459c2cSDavid du Colombier
4834*0b459c2cSDavid du Colombier /* Restore this and inner groups' (if any) registers. */
4835*0b459c2cSDavid du Colombier for (r = *p; r < *p + *(p + 1); r++)
4836*0b459c2cSDavid du Colombier {
4837*0b459c2cSDavid du Colombier regstart[r] = old_regstart[r];
4838*0b459c2cSDavid du Colombier
4839*0b459c2cSDavid du Colombier /* xx why this test? */
4840*0b459c2cSDavid du Colombier if (old_regend[r] >= regstart[r])
4841*0b459c2cSDavid du Colombier regend[r] = old_regend[r];
4842*0b459c2cSDavid du Colombier }
4843*0b459c2cSDavid du Colombier }
4844*0b459c2cSDavid du Colombier p1++;
4845*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4846*0b459c2cSDavid du Colombier PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4847*0b459c2cSDavid du Colombier
4848*0b459c2cSDavid du Colombier goto fail;
4849*0b459c2cSDavid du Colombier }
4850*0b459c2cSDavid du Colombier }
4851*0b459c2cSDavid du Colombier
4852*0b459c2cSDavid du Colombier /* Move past the register number and the inner group count. */
4853*0b459c2cSDavid du Colombier p += 2;
4854*0b459c2cSDavid du Colombier break;
4855*0b459c2cSDavid du Colombier
4856*0b459c2cSDavid du Colombier
4857*0b459c2cSDavid du Colombier /* \<digit> has been turned into a `duplicate' command which is
4858*0b459c2cSDavid du Colombier followed by the numeric value of <digit> as the register number. */
4859*0b459c2cSDavid du Colombier case duplicate:
4860*0b459c2cSDavid du Colombier {
4861*0b459c2cSDavid du Colombier register const char *d2, *dend2;
4862*0b459c2cSDavid du Colombier int regno = *p++; /* Get which register to match against. */
4863*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4864*0b459c2cSDavid du Colombier
4865*0b459c2cSDavid du Colombier /* Can't back reference a group which we've never matched. */
4866*0b459c2cSDavid du Colombier if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4867*0b459c2cSDavid du Colombier goto fail;
4868*0b459c2cSDavid du Colombier
4869*0b459c2cSDavid du Colombier /* Where in input to try to start matching. */
4870*0b459c2cSDavid du Colombier d2 = regstart[regno];
4871*0b459c2cSDavid du Colombier
4872*0b459c2cSDavid du Colombier /* Where to stop matching; if both the place to start and
4873*0b459c2cSDavid du Colombier the place to stop matching are in the same string, then
4874*0b459c2cSDavid du Colombier set to the place to stop, otherwise, for now have to use
4875*0b459c2cSDavid du Colombier the end of the first string. */
4876*0b459c2cSDavid du Colombier
4877*0b459c2cSDavid du Colombier dend2 = ((FIRST_STRING_P (regstart[regno])
4878*0b459c2cSDavid du Colombier == FIRST_STRING_P (regend[regno]))
4879*0b459c2cSDavid du Colombier ? regend[regno] : end_match_1);
4880*0b459c2cSDavid du Colombier for (;;)
4881*0b459c2cSDavid du Colombier {
4882*0b459c2cSDavid du Colombier /* If necessary, advance to next segment in register
4883*0b459c2cSDavid du Colombier contents. */
4884*0b459c2cSDavid du Colombier while (d2 == dend2)
4885*0b459c2cSDavid du Colombier {
4886*0b459c2cSDavid du Colombier if (dend2 == end_match_2) break;
4887*0b459c2cSDavid du Colombier if (dend2 == regend[regno]) break;
4888*0b459c2cSDavid du Colombier
4889*0b459c2cSDavid du Colombier /* End of string1 => advance to string2. */
4890*0b459c2cSDavid du Colombier d2 = string2;
4891*0b459c2cSDavid du Colombier dend2 = regend[regno];
4892*0b459c2cSDavid du Colombier }
4893*0b459c2cSDavid du Colombier /* At end of register contents => success */
4894*0b459c2cSDavid du Colombier if (d2 == dend2) break;
4895*0b459c2cSDavid du Colombier
4896*0b459c2cSDavid du Colombier /* If necessary, advance to next segment in data. */
4897*0b459c2cSDavid du Colombier PREFETCH ();
4898*0b459c2cSDavid du Colombier
4899*0b459c2cSDavid du Colombier /* How many characters left in this segment to match. */
4900*0b459c2cSDavid du Colombier mcnt = dend - d;
4901*0b459c2cSDavid du Colombier
4902*0b459c2cSDavid du Colombier /* Want how many consecutive characters we can match in
4903*0b459c2cSDavid du Colombier one shot, so, if necessary, adjust the count. */
4904*0b459c2cSDavid du Colombier if (mcnt > dend2 - d2)
4905*0b459c2cSDavid du Colombier mcnt = dend2 - d2;
4906*0b459c2cSDavid du Colombier
4907*0b459c2cSDavid du Colombier /* Compare that many; failure if mismatch, else move
4908*0b459c2cSDavid du Colombier past them. */
4909*0b459c2cSDavid du Colombier if (RE_TRANSLATE_P (translate)
4910*0b459c2cSDavid du Colombier ? bcmp_translate (d, d2, mcnt, translate)
4911*0b459c2cSDavid du Colombier : bcmp (d, d2, mcnt))
4912*0b459c2cSDavid du Colombier goto fail;
4913*0b459c2cSDavid du Colombier d += mcnt, d2 += mcnt;
4914*0b459c2cSDavid du Colombier
4915*0b459c2cSDavid du Colombier /* Do this because we've match some characters. */
4916*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
4917*0b459c2cSDavid du Colombier }
4918*0b459c2cSDavid du Colombier }
4919*0b459c2cSDavid du Colombier break;
4920*0b459c2cSDavid du Colombier
4921*0b459c2cSDavid du Colombier
4922*0b459c2cSDavid du Colombier /* begline matches the empty string at the beginning of the string
4923*0b459c2cSDavid du Colombier (unless `not_bol' is set in `bufp'), and, if
4924*0b459c2cSDavid du Colombier `newline_anchor' is set, after newlines. */
4925*0b459c2cSDavid du Colombier case begline:
4926*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING begline.\n");
4927*0b459c2cSDavid du Colombier
4928*0b459c2cSDavid du Colombier if (AT_STRINGS_BEG (d))
4929*0b459c2cSDavid du Colombier {
4930*0b459c2cSDavid du Colombier if (!bufp->not_bol) break;
4931*0b459c2cSDavid du Colombier }
4932*0b459c2cSDavid du Colombier else if (d[-1] == '\n' && bufp->newline_anchor)
4933*0b459c2cSDavid du Colombier {
4934*0b459c2cSDavid du Colombier break;
4935*0b459c2cSDavid du Colombier }
4936*0b459c2cSDavid du Colombier /* In all other cases, we fail. */
4937*0b459c2cSDavid du Colombier goto fail;
4938*0b459c2cSDavid du Colombier
4939*0b459c2cSDavid du Colombier
4940*0b459c2cSDavid du Colombier /* endline is the dual of begline. */
4941*0b459c2cSDavid du Colombier case endline:
4942*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING endline.\n");
4943*0b459c2cSDavid du Colombier
4944*0b459c2cSDavid du Colombier if (AT_STRINGS_END (d))
4945*0b459c2cSDavid du Colombier {
4946*0b459c2cSDavid du Colombier if (!bufp->not_eol) break;
4947*0b459c2cSDavid du Colombier }
4948*0b459c2cSDavid du Colombier
4949*0b459c2cSDavid du Colombier /* We have to ``prefetch'' the next character. */
4950*0b459c2cSDavid du Colombier else if ((d == end1 ? *string2 : *d) == '\n'
4951*0b459c2cSDavid du Colombier && bufp->newline_anchor)
4952*0b459c2cSDavid du Colombier {
4953*0b459c2cSDavid du Colombier break;
4954*0b459c2cSDavid du Colombier }
4955*0b459c2cSDavid du Colombier goto fail;
4956*0b459c2cSDavid du Colombier
4957*0b459c2cSDavid du Colombier
4958*0b459c2cSDavid du Colombier /* Match at the very beginning of the data. */
4959*0b459c2cSDavid du Colombier case begbuf:
4960*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4961*0b459c2cSDavid du Colombier if (AT_STRINGS_BEG (d))
4962*0b459c2cSDavid du Colombier break;
4963*0b459c2cSDavid du Colombier goto fail;
4964*0b459c2cSDavid du Colombier
4965*0b459c2cSDavid du Colombier
4966*0b459c2cSDavid du Colombier /* Match at the very end of the data. */
4967*0b459c2cSDavid du Colombier case endbuf:
4968*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4969*0b459c2cSDavid du Colombier if (AT_STRINGS_END (d))
4970*0b459c2cSDavid du Colombier break;
4971*0b459c2cSDavid du Colombier goto fail;
4972*0b459c2cSDavid du Colombier
4973*0b459c2cSDavid du Colombier
4974*0b459c2cSDavid du Colombier /* on_failure_keep_string_jump is used to optimize `.*\n'. It
4975*0b459c2cSDavid du Colombier pushes NULL as the value for the string on the stack. Then
4976*0b459c2cSDavid du Colombier `pop_failure_point' will keep the current value for the
4977*0b459c2cSDavid du Colombier string, instead of restoring it. To see why, consider
4978*0b459c2cSDavid du Colombier matching `foo\nbar' against `.*\n'. The .* matches the foo;
4979*0b459c2cSDavid du Colombier then the . fails against the \n. But the next thing we want
4980*0b459c2cSDavid du Colombier to do is match the \n against the \n; if we restored the
4981*0b459c2cSDavid du Colombier string value, we would be back at the foo.
4982*0b459c2cSDavid du Colombier
4983*0b459c2cSDavid du Colombier Because this is used only in specific cases, we don't need to
4984*0b459c2cSDavid du Colombier check all the things that `on_failure_jump' does, to make
4985*0b459c2cSDavid du Colombier sure the right things get saved on the stack. Hence we don't
4986*0b459c2cSDavid du Colombier share its code. The only reason to push anything on the
4987*0b459c2cSDavid du Colombier stack at all is that otherwise we would have to change
4988*0b459c2cSDavid du Colombier `anychar's code to do something besides goto fail in this
4989*0b459c2cSDavid du Colombier case; that seems worse than this. */
4990*0b459c2cSDavid du Colombier case on_failure_keep_string_jump:
4991*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4992*0b459c2cSDavid du Colombier
4993*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p);
4994*0b459c2cSDavid du Colombier DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4995*0b459c2cSDavid du Colombier
4996*0b459c2cSDavid du Colombier PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4997*0b459c2cSDavid du Colombier break;
4998*0b459c2cSDavid du Colombier
4999*0b459c2cSDavid du Colombier
5000*0b459c2cSDavid du Colombier /* Uses of on_failure_jump:
5001*0b459c2cSDavid du Colombier
5002*0b459c2cSDavid du Colombier Each alternative starts with an on_failure_jump that points
5003*0b459c2cSDavid du Colombier to the beginning of the next alternative. Each alternative
5004*0b459c2cSDavid du Colombier except the last ends with a jump that in effect jumps past
5005*0b459c2cSDavid du Colombier the rest of the alternatives. (They really jump to the
5006*0b459c2cSDavid du Colombier ending jump of the following alternative, because tensioning
5007*0b459c2cSDavid du Colombier these jumps is a hassle.)
5008*0b459c2cSDavid du Colombier
5009*0b459c2cSDavid du Colombier Repeats start with an on_failure_jump that points past both
5010*0b459c2cSDavid du Colombier the repetition text and either the following jump or
5011*0b459c2cSDavid du Colombier pop_failure_jump back to this on_failure_jump. */
5012*0b459c2cSDavid du Colombier case on_failure_jump:
5013*0b459c2cSDavid du Colombier on_failure:
5014*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5015*0b459c2cSDavid du Colombier
5016*0b459c2cSDavid du Colombier #if defined (WINDOWSNT) && defined (emacs)
5017*0b459c2cSDavid du Colombier QUIT;
5018*0b459c2cSDavid du Colombier #endif
5019*0b459c2cSDavid du Colombier
5020*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p);
5021*0b459c2cSDavid du Colombier DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5022*0b459c2cSDavid du Colombier
5023*0b459c2cSDavid du Colombier /* If this on_failure_jump comes right before a group (i.e.,
5024*0b459c2cSDavid du Colombier the original * applied to a group), save the information
5025*0b459c2cSDavid du Colombier for that group and all inner ones, so that if we fail back
5026*0b459c2cSDavid du Colombier to this point, the group's information will be correct.
5027*0b459c2cSDavid du Colombier For example, in \(a*\)*\1, we need the preceding group,
5028*0b459c2cSDavid du Colombier and in \(zz\(a*\)b*\)\2, we need the inner group. */
5029*0b459c2cSDavid du Colombier
5030*0b459c2cSDavid du Colombier /* We can't use `p' to check ahead because we push
5031*0b459c2cSDavid du Colombier a failure point to `p + mcnt' after we do this. */
5032*0b459c2cSDavid du Colombier p1 = p;
5033*0b459c2cSDavid du Colombier
5034*0b459c2cSDavid du Colombier /* We need to skip no_op's before we look for the
5035*0b459c2cSDavid du Colombier start_memory in case this on_failure_jump is happening as
5036*0b459c2cSDavid du Colombier the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5037*0b459c2cSDavid du Colombier against aba. */
5038*0b459c2cSDavid du Colombier while (p1 < pend && (re_opcode_t) *p1 == no_op)
5039*0b459c2cSDavid du Colombier p1++;
5040*0b459c2cSDavid du Colombier
5041*0b459c2cSDavid du Colombier if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5042*0b459c2cSDavid du Colombier {
5043*0b459c2cSDavid du Colombier /* We have a new highest active register now. This will
5044*0b459c2cSDavid du Colombier get reset at the start_memory we are about to get to,
5045*0b459c2cSDavid du Colombier but we will have saved all the registers relevant to
5046*0b459c2cSDavid du Colombier this repetition op, as described above. */
5047*0b459c2cSDavid du Colombier highest_active_reg = *(p1 + 1) + *(p1 + 2);
5048*0b459c2cSDavid du Colombier if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5049*0b459c2cSDavid du Colombier lowest_active_reg = *(p1 + 1);
5050*0b459c2cSDavid du Colombier }
5051*0b459c2cSDavid du Colombier
5052*0b459c2cSDavid du Colombier DEBUG_PRINT1 (":\n");
5053*0b459c2cSDavid du Colombier PUSH_FAILURE_POINT (p + mcnt, d, -2);
5054*0b459c2cSDavid du Colombier break;
5055*0b459c2cSDavid du Colombier
5056*0b459c2cSDavid du Colombier
5057*0b459c2cSDavid du Colombier /* A smart repeat ends with `maybe_pop_jump'.
5058*0b459c2cSDavid du Colombier We change it to either `pop_failure_jump' or `jump'. */
5059*0b459c2cSDavid du Colombier case maybe_pop_jump:
5060*0b459c2cSDavid du Colombier #if defined (WINDOWSNT) && defined (emacs)
5061*0b459c2cSDavid du Colombier QUIT;
5062*0b459c2cSDavid du Colombier #endif
5063*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p);
5064*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5065*0b459c2cSDavid du Colombier {
5066*0b459c2cSDavid du Colombier register unsigned char *p2 = p;
5067*0b459c2cSDavid du Colombier
5068*0b459c2cSDavid du Colombier /* Compare the beginning of the repeat with what in the
5069*0b459c2cSDavid du Colombier pattern follows its end. If we can establish that there
5070*0b459c2cSDavid du Colombier is nothing that they would both match, i.e., that we
5071*0b459c2cSDavid du Colombier would have to backtrack because of (as in, e.g., `a*a')
5072*0b459c2cSDavid du Colombier then we can change to pop_failure_jump, because we'll
5073*0b459c2cSDavid du Colombier never have to backtrack.
5074*0b459c2cSDavid du Colombier
5075*0b459c2cSDavid du Colombier This is not true in the case of alternatives: in
5076*0b459c2cSDavid du Colombier `(a|ab)*' we do need to backtrack to the `ab' alternative
5077*0b459c2cSDavid du Colombier (e.g., if the string was `ab'). But instead of trying to
5078*0b459c2cSDavid du Colombier detect that here, the alternative has put on a dummy
5079*0b459c2cSDavid du Colombier failure point which is what we will end up popping. */
5080*0b459c2cSDavid du Colombier
5081*0b459c2cSDavid du Colombier /* Skip over open/close-group commands.
5082*0b459c2cSDavid du Colombier If what follows this loop is a ...+ construct,
5083*0b459c2cSDavid du Colombier look at what begins its body, since we will have to
5084*0b459c2cSDavid du Colombier match at least one of that. */
5085*0b459c2cSDavid du Colombier while (1)
5086*0b459c2cSDavid du Colombier {
5087*0b459c2cSDavid du Colombier if (p2 + 2 < pend
5088*0b459c2cSDavid du Colombier && ((re_opcode_t) *p2 == stop_memory
5089*0b459c2cSDavid du Colombier || (re_opcode_t) *p2 == start_memory))
5090*0b459c2cSDavid du Colombier p2 += 3;
5091*0b459c2cSDavid du Colombier else if (p2 + 6 < pend
5092*0b459c2cSDavid du Colombier && (re_opcode_t) *p2 == dummy_failure_jump)
5093*0b459c2cSDavid du Colombier p2 += 6;
5094*0b459c2cSDavid du Colombier else
5095*0b459c2cSDavid du Colombier break;
5096*0b459c2cSDavid du Colombier }
5097*0b459c2cSDavid du Colombier
5098*0b459c2cSDavid du Colombier p1 = p + mcnt;
5099*0b459c2cSDavid du Colombier /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5100*0b459c2cSDavid du Colombier to the `maybe_finalize_jump' of this case. Examine what
5101*0b459c2cSDavid du Colombier follows. */
5102*0b459c2cSDavid du Colombier
5103*0b459c2cSDavid du Colombier /* If we're at the end of the pattern, we can change. */
5104*0b459c2cSDavid du Colombier if (p2 == pend)
5105*0b459c2cSDavid du Colombier {
5106*0b459c2cSDavid du Colombier /* Consider what happens when matching ":\(.*\)"
5107*0b459c2cSDavid du Colombier against ":/". I don't really understand this code
5108*0b459c2cSDavid du Colombier yet. */
5109*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5110*0b459c2cSDavid du Colombier DEBUG_PRINT1
5111*0b459c2cSDavid du Colombier (" End of pattern: change to `pop_failure_jump'.\n");
5112*0b459c2cSDavid du Colombier }
5113*0b459c2cSDavid du Colombier
5114*0b459c2cSDavid du Colombier else if ((re_opcode_t) *p2 == exactn
5115*0b459c2cSDavid du Colombier || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5116*0b459c2cSDavid du Colombier {
5117*0b459c2cSDavid du Colombier register unsigned int c
5118*0b459c2cSDavid du Colombier = *p2 == (unsigned char) endline ? '\n' : p2[2];
5119*0b459c2cSDavid du Colombier
5120*0b459c2cSDavid du Colombier if ((re_opcode_t) p1[3] == exactn)
5121*0b459c2cSDavid du Colombier {
5122*0b459c2cSDavid du Colombier if (!(multibyte /* && (c != '\n') */
5123*0b459c2cSDavid du Colombier && BASE_LEADING_CODE_P (c))
5124*0b459c2cSDavid du Colombier ? c != p1[5]
5125*0b459c2cSDavid du Colombier : (STRING_CHAR (&p2[2], pend - &p2[2])
5126*0b459c2cSDavid du Colombier != STRING_CHAR (&p1[5], pend - &p1[5])))
5127*0b459c2cSDavid du Colombier {
5128*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5129*0b459c2cSDavid du Colombier DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5130*0b459c2cSDavid du Colombier c, p1[5]);
5131*0b459c2cSDavid du Colombier }
5132*0b459c2cSDavid du Colombier }
5133*0b459c2cSDavid du Colombier
5134*0b459c2cSDavid du Colombier else if ((re_opcode_t) p1[3] == charset
5135*0b459c2cSDavid du Colombier || (re_opcode_t) p1[3] == charset_not)
5136*0b459c2cSDavid du Colombier {
5137*0b459c2cSDavid du Colombier int not = (re_opcode_t) p1[3] == charset_not;
5138*0b459c2cSDavid du Colombier
5139*0b459c2cSDavid du Colombier if (multibyte /* && (c != '\n') */
5140*0b459c2cSDavid du Colombier && BASE_LEADING_CODE_P (c))
5141*0b459c2cSDavid du Colombier c = STRING_CHAR (&p2[2], pend - &p2[2]);
5142*0b459c2cSDavid du Colombier
5143*0b459c2cSDavid du Colombier /* Test if C is listed in charset (or charset_not)
5144*0b459c2cSDavid du Colombier at `&p1[3]'. */
5145*0b459c2cSDavid du Colombier if (SINGLE_BYTE_CHAR_P (c))
5146*0b459c2cSDavid du Colombier {
5147*0b459c2cSDavid du Colombier if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
5148*0b459c2cSDavid du Colombier && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5149*0b459c2cSDavid du Colombier not = !not;
5150*0b459c2cSDavid du Colombier }
5151*0b459c2cSDavid du Colombier else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
5152*0b459c2cSDavid du Colombier CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
5153*0b459c2cSDavid du Colombier
5154*0b459c2cSDavid du Colombier /* `not' is equal to 1 if c would match, which means
5155*0b459c2cSDavid du Colombier that we can't change to pop_failure_jump. */
5156*0b459c2cSDavid du Colombier if (!not)
5157*0b459c2cSDavid du Colombier {
5158*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5159*0b459c2cSDavid du Colombier DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5160*0b459c2cSDavid du Colombier }
5161*0b459c2cSDavid du Colombier }
5162*0b459c2cSDavid du Colombier }
5163*0b459c2cSDavid du Colombier else if ((re_opcode_t) *p2 == charset)
5164*0b459c2cSDavid du Colombier {
5165*0b459c2cSDavid du Colombier if ((re_opcode_t) p1[3] == exactn)
5166*0b459c2cSDavid du Colombier {
5167*0b459c2cSDavid du Colombier register unsigned int c = p1[5];
5168*0b459c2cSDavid du Colombier int not = 0;
5169*0b459c2cSDavid du Colombier
5170*0b459c2cSDavid du Colombier if (multibyte && BASE_LEADING_CODE_P (c))
5171*0b459c2cSDavid du Colombier c = STRING_CHAR (&p1[5], pend - &p1[5]);
5172*0b459c2cSDavid du Colombier
5173*0b459c2cSDavid du Colombier /* Test if C is listed in charset at `p2'. */
5174*0b459c2cSDavid du Colombier if (SINGLE_BYTE_CHAR_P (c))
5175*0b459c2cSDavid du Colombier {
5176*0b459c2cSDavid du Colombier if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
5177*0b459c2cSDavid du Colombier && (p2[2 + c / BYTEWIDTH]
5178*0b459c2cSDavid du Colombier & (1 << (c % BYTEWIDTH))))
5179*0b459c2cSDavid du Colombier not = !not;
5180*0b459c2cSDavid du Colombier }
5181*0b459c2cSDavid du Colombier else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
5182*0b459c2cSDavid du Colombier CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
5183*0b459c2cSDavid du Colombier
5184*0b459c2cSDavid du Colombier if (!not)
5185*0b459c2cSDavid du Colombier {
5186*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5187*0b459c2cSDavid du Colombier DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5188*0b459c2cSDavid du Colombier }
5189*0b459c2cSDavid du Colombier }
5190*0b459c2cSDavid du Colombier
5191*0b459c2cSDavid du Colombier /* It is hard to list up all the character in charset
5192*0b459c2cSDavid du Colombier P2 if it includes multibyte character. Give up in
5193*0b459c2cSDavid du Colombier such case. */
5194*0b459c2cSDavid du Colombier else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
5195*0b459c2cSDavid du Colombier {
5196*0b459c2cSDavid du Colombier /* Now, we are sure that P2 has no range table.
5197*0b459c2cSDavid du Colombier So, for the size of bitmap in P2, `p2[1]' is
5198*0b459c2cSDavid du Colombier enough. But P1 may have range table, so the
5199*0b459c2cSDavid du Colombier size of bitmap table of P1 is extracted by
5200*0b459c2cSDavid du Colombier using macro `CHARSET_BITMAP_SIZE'.
5201*0b459c2cSDavid du Colombier
5202*0b459c2cSDavid du Colombier Since we know that all the character listed in
5203*0b459c2cSDavid du Colombier P2 is ASCII, it is enough to test only bitmap
5204*0b459c2cSDavid du Colombier table of P1. */
5205*0b459c2cSDavid du Colombier
5206*0b459c2cSDavid du Colombier if ((re_opcode_t) p1[3] == charset_not)
5207*0b459c2cSDavid du Colombier {
5208*0b459c2cSDavid du Colombier int idx;
5209*0b459c2cSDavid du Colombier /* We win if the charset_not inside the loop lists
5210*0b459c2cSDavid du Colombier every character listed in the charset after. */
5211*0b459c2cSDavid du Colombier for (idx = 0; idx < (int) p2[1]; idx++)
5212*0b459c2cSDavid du Colombier if (! (p2[2 + idx] == 0
5213*0b459c2cSDavid du Colombier || (idx < CHARSET_BITMAP_SIZE (&p1[3])
5214*0b459c2cSDavid du Colombier && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5215*0b459c2cSDavid du Colombier break;
5216*0b459c2cSDavid du Colombier
5217*0b459c2cSDavid du Colombier if (idx == p2[1])
5218*0b459c2cSDavid du Colombier {
5219*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5220*0b459c2cSDavid du Colombier DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5221*0b459c2cSDavid du Colombier }
5222*0b459c2cSDavid du Colombier }
5223*0b459c2cSDavid du Colombier else if ((re_opcode_t) p1[3] == charset)
5224*0b459c2cSDavid du Colombier {
5225*0b459c2cSDavid du Colombier int idx;
5226*0b459c2cSDavid du Colombier /* We win if the charset inside the loop
5227*0b459c2cSDavid du Colombier has no overlap with the one after the loop. */
5228*0b459c2cSDavid du Colombier for (idx = 0;
5229*0b459c2cSDavid du Colombier (idx < (int) p2[1]
5230*0b459c2cSDavid du Colombier && idx < CHARSET_BITMAP_SIZE (&p1[3]));
5231*0b459c2cSDavid du Colombier idx++)
5232*0b459c2cSDavid du Colombier if ((p2[2 + idx] & p1[5 + idx]) != 0)
5233*0b459c2cSDavid du Colombier break;
5234*0b459c2cSDavid du Colombier
5235*0b459c2cSDavid du Colombier if (idx == p2[1]
5236*0b459c2cSDavid du Colombier || idx == CHARSET_BITMAP_SIZE (&p1[3]))
5237*0b459c2cSDavid du Colombier {
5238*0b459c2cSDavid du Colombier p[-3] = (unsigned char) pop_failure_jump;
5239*0b459c2cSDavid du Colombier DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5240*0b459c2cSDavid du Colombier }
5241*0b459c2cSDavid du Colombier }
5242*0b459c2cSDavid du Colombier }
5243*0b459c2cSDavid du Colombier }
5244*0b459c2cSDavid du Colombier }
5245*0b459c2cSDavid du Colombier p -= 2; /* Point at relative address again. */
5246*0b459c2cSDavid du Colombier if ((re_opcode_t) p[-1] != pop_failure_jump)
5247*0b459c2cSDavid du Colombier {
5248*0b459c2cSDavid du Colombier p[-1] = (unsigned char) jump;
5249*0b459c2cSDavid du Colombier DEBUG_PRINT1 (" Match => jump.\n");
5250*0b459c2cSDavid du Colombier goto unconditional_jump;
5251*0b459c2cSDavid du Colombier }
5252*0b459c2cSDavid du Colombier /* Note fall through. */
5253*0b459c2cSDavid du Colombier
5254*0b459c2cSDavid du Colombier
5255*0b459c2cSDavid du Colombier /* The end of a simple repeat has a pop_failure_jump back to
5256*0b459c2cSDavid du Colombier its matching on_failure_jump, where the latter will push a
5257*0b459c2cSDavid du Colombier failure point. The pop_failure_jump takes off failure
5258*0b459c2cSDavid du Colombier points put on by this pop_failure_jump's matching
5259*0b459c2cSDavid du Colombier on_failure_jump; we got through the pattern to here from the
5260*0b459c2cSDavid du Colombier matching on_failure_jump, so didn't fail. */
5261*0b459c2cSDavid du Colombier case pop_failure_jump:
5262*0b459c2cSDavid du Colombier {
5263*0b459c2cSDavid du Colombier /* We need to pass separate storage for the lowest and
5264*0b459c2cSDavid du Colombier highest registers, even though we don't care about the
5265*0b459c2cSDavid du Colombier actual values. Otherwise, we will restore only one
5266*0b459c2cSDavid du Colombier register from the stack, since lowest will == highest in
5267*0b459c2cSDavid du Colombier `pop_failure_point'. */
5268*0b459c2cSDavid du Colombier unsigned dummy_low_reg, dummy_high_reg;
5269*0b459c2cSDavid du Colombier unsigned char *pdummy;
5270*0b459c2cSDavid du Colombier const char *sdummy;
5271*0b459c2cSDavid du Colombier
5272*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5273*0b459c2cSDavid du Colombier POP_FAILURE_POINT (sdummy, pdummy,
5274*0b459c2cSDavid du Colombier dummy_low_reg, dummy_high_reg,
5275*0b459c2cSDavid du Colombier reg_dummy, reg_dummy, reg_info_dummy);
5276*0b459c2cSDavid du Colombier }
5277*0b459c2cSDavid du Colombier /* Note fall through. */
5278*0b459c2cSDavid du Colombier
5279*0b459c2cSDavid du Colombier
5280*0b459c2cSDavid du Colombier /* Unconditionally jump (without popping any failure points). */
5281*0b459c2cSDavid du Colombier case jump:
5282*0b459c2cSDavid du Colombier unconditional_jump:
5283*0b459c2cSDavid du Colombier #if defined (WINDOWSNT) && defined (emacs)
5284*0b459c2cSDavid du Colombier QUIT;
5285*0b459c2cSDavid du Colombier #endif
5286*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5287*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5288*0b459c2cSDavid du Colombier p += mcnt; /* Do the jump. */
5289*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("(to 0x%x).\n", p);
5290*0b459c2cSDavid du Colombier break;
5291*0b459c2cSDavid du Colombier
5292*0b459c2cSDavid du Colombier
5293*0b459c2cSDavid du Colombier /* We need this opcode so we can detect where alternatives end
5294*0b459c2cSDavid du Colombier in `group_match_null_string_p' et al. */
5295*0b459c2cSDavid du Colombier case jump_past_alt:
5296*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5297*0b459c2cSDavid du Colombier goto unconditional_jump;
5298*0b459c2cSDavid du Colombier
5299*0b459c2cSDavid du Colombier
5300*0b459c2cSDavid du Colombier /* Normally, the on_failure_jump pushes a failure point, which
5301*0b459c2cSDavid du Colombier then gets popped at pop_failure_jump. We will end up at
5302*0b459c2cSDavid du Colombier pop_failure_jump, also, and with a pattern of, say, `a+', we
5303*0b459c2cSDavid du Colombier are skipping over the on_failure_jump, so we have to push
5304*0b459c2cSDavid du Colombier something meaningless for pop_failure_jump to pop. */
5305*0b459c2cSDavid du Colombier case dummy_failure_jump:
5306*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5307*0b459c2cSDavid du Colombier /* It doesn't matter what we push for the string here. What
5308*0b459c2cSDavid du Colombier the code at `fail' tests is the value for the pattern. */
5309*0b459c2cSDavid du Colombier PUSH_FAILURE_POINT (0, 0, -2);
5310*0b459c2cSDavid du Colombier goto unconditional_jump;
5311*0b459c2cSDavid du Colombier
5312*0b459c2cSDavid du Colombier
5313*0b459c2cSDavid du Colombier /* At the end of an alternative, we need to push a dummy failure
5314*0b459c2cSDavid du Colombier point in case we are followed by a `pop_failure_jump', because
5315*0b459c2cSDavid du Colombier we don't want the failure point for the alternative to be
5316*0b459c2cSDavid du Colombier popped. For example, matching `(a|ab)*' against `aab'
5317*0b459c2cSDavid du Colombier requires that we match the `ab' alternative. */
5318*0b459c2cSDavid du Colombier case push_dummy_failure:
5319*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5320*0b459c2cSDavid du Colombier /* See comments just above at `dummy_failure_jump' about the
5321*0b459c2cSDavid du Colombier two zeroes. */
5322*0b459c2cSDavid du Colombier PUSH_FAILURE_POINT (0, 0, -2);
5323*0b459c2cSDavid du Colombier break;
5324*0b459c2cSDavid du Colombier
5325*0b459c2cSDavid du Colombier /* Have to succeed matching what follows at least n times.
5326*0b459c2cSDavid du Colombier After that, handle like `on_failure_jump'. */
5327*0b459c2cSDavid du Colombier case succeed_n:
5328*0b459c2cSDavid du Colombier EXTRACT_NUMBER (mcnt, p + 2);
5329*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5330*0b459c2cSDavid du Colombier
5331*0b459c2cSDavid du Colombier assert (mcnt >= 0);
5332*0b459c2cSDavid du Colombier /* Originally, this is how many times we HAVE to succeed. */
5333*0b459c2cSDavid du Colombier if (mcnt > 0)
5334*0b459c2cSDavid du Colombier {
5335*0b459c2cSDavid du Colombier mcnt--;
5336*0b459c2cSDavid du Colombier p += 2;
5337*0b459c2cSDavid du Colombier STORE_NUMBER_AND_INCR (p, mcnt);
5338*0b459c2cSDavid du Colombier DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
5339*0b459c2cSDavid du Colombier }
5340*0b459c2cSDavid du Colombier else if (mcnt == 0)
5341*0b459c2cSDavid du Colombier {
5342*0b459c2cSDavid du Colombier DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
5343*0b459c2cSDavid du Colombier p[2] = (unsigned char) no_op;
5344*0b459c2cSDavid du Colombier p[3] = (unsigned char) no_op;
5345*0b459c2cSDavid du Colombier goto on_failure;
5346*0b459c2cSDavid du Colombier }
5347*0b459c2cSDavid du Colombier break;
5348*0b459c2cSDavid du Colombier
5349*0b459c2cSDavid du Colombier case jump_n:
5350*0b459c2cSDavid du Colombier EXTRACT_NUMBER (mcnt, p + 2);
5351*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5352*0b459c2cSDavid du Colombier
5353*0b459c2cSDavid du Colombier /* Originally, this is how many times we CAN jump. */
5354*0b459c2cSDavid du Colombier if (mcnt)
5355*0b459c2cSDavid du Colombier {
5356*0b459c2cSDavid du Colombier mcnt--;
5357*0b459c2cSDavid du Colombier STORE_NUMBER (p + 2, mcnt);
5358*0b459c2cSDavid du Colombier goto unconditional_jump;
5359*0b459c2cSDavid du Colombier }
5360*0b459c2cSDavid du Colombier /* If don't have to jump any more, skip over the rest of command. */
5361*0b459c2cSDavid du Colombier else
5362*0b459c2cSDavid du Colombier p += 4;
5363*0b459c2cSDavid du Colombier break;
5364*0b459c2cSDavid du Colombier
5365*0b459c2cSDavid du Colombier case set_number_at:
5366*0b459c2cSDavid du Colombier {
5367*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5368*0b459c2cSDavid du Colombier
5369*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p);
5370*0b459c2cSDavid du Colombier p1 = p + mcnt;
5371*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p);
5372*0b459c2cSDavid du Colombier DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
5373*0b459c2cSDavid du Colombier STORE_NUMBER (p1, mcnt);
5374*0b459c2cSDavid du Colombier break;
5375*0b459c2cSDavid du Colombier }
5376*0b459c2cSDavid du Colombier
5377*0b459c2cSDavid du Colombier case wordbound:
5378*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5379*0b459c2cSDavid du Colombier
5380*0b459c2cSDavid du Colombier /* We SUCCEED in one of the following cases: */
5381*0b459c2cSDavid du Colombier
5382*0b459c2cSDavid du Colombier /* Case 1: D is at the beginning or the end of string. */
5383*0b459c2cSDavid du Colombier if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5384*0b459c2cSDavid du Colombier break;
5385*0b459c2cSDavid du Colombier else
5386*0b459c2cSDavid du Colombier {
5387*0b459c2cSDavid du Colombier /* C1 is the character before D, S1 is the syntax of C1, C2
5388*0b459c2cSDavid du Colombier is the character at D, and S2 is the syntax of C2. */
5389*0b459c2cSDavid du Colombier int c1, c2, s1, s2;
5390*0b459c2cSDavid du Colombier int pos1 = PTR_TO_OFFSET (d - 1);
5391*0b459c2cSDavid du Colombier int charpos;
5392*0b459c2cSDavid du Colombier
5393*0b459c2cSDavid du Colombier GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5394*0b459c2cSDavid du Colombier GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5395*0b459c2cSDavid du Colombier #ifdef emacs
5396*0b459c2cSDavid du Colombier charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5397*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (charpos);
5398*0b459c2cSDavid du Colombier #endif
5399*0b459c2cSDavid du Colombier s1 = SYNTAX (c1);
5400*0b459c2cSDavid du Colombier #ifdef emacs
5401*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5402*0b459c2cSDavid du Colombier #endif
5403*0b459c2cSDavid du Colombier s2 = SYNTAX (c2);
5404*0b459c2cSDavid du Colombier
5405*0b459c2cSDavid du Colombier if (/* Case 2: Only one of S1 and S2 is Sword. */
5406*0b459c2cSDavid du Colombier ((s1 == Sword) != (s2 == Sword))
5407*0b459c2cSDavid du Colombier /* Case 3: Both of S1 and S2 are Sword, and macro
5408*0b459c2cSDavid du Colombier WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5409*0b459c2cSDavid du Colombier || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5410*0b459c2cSDavid du Colombier break;
5411*0b459c2cSDavid du Colombier }
5412*0b459c2cSDavid du Colombier goto fail;
5413*0b459c2cSDavid du Colombier
5414*0b459c2cSDavid du Colombier case notwordbound:
5415*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5416*0b459c2cSDavid du Colombier
5417*0b459c2cSDavid du Colombier /* We FAIL in one of the following cases: */
5418*0b459c2cSDavid du Colombier
5419*0b459c2cSDavid du Colombier /* Case 1: D is at the beginning or the end of string. */
5420*0b459c2cSDavid du Colombier if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5421*0b459c2cSDavid du Colombier goto fail;
5422*0b459c2cSDavid du Colombier else
5423*0b459c2cSDavid du Colombier {
5424*0b459c2cSDavid du Colombier /* C1 is the character before D, S1 is the syntax of C1, C2
5425*0b459c2cSDavid du Colombier is the character at D, and S2 is the syntax of C2. */
5426*0b459c2cSDavid du Colombier int c1, c2, s1, s2;
5427*0b459c2cSDavid du Colombier int pos1 = PTR_TO_OFFSET (d - 1);
5428*0b459c2cSDavid du Colombier int charpos;
5429*0b459c2cSDavid du Colombier
5430*0b459c2cSDavid du Colombier GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5431*0b459c2cSDavid du Colombier GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5432*0b459c2cSDavid du Colombier #ifdef emacs
5433*0b459c2cSDavid du Colombier charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5434*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (charpos);
5435*0b459c2cSDavid du Colombier #endif
5436*0b459c2cSDavid du Colombier s1 = SYNTAX (c1);
5437*0b459c2cSDavid du Colombier #ifdef emacs
5438*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5439*0b459c2cSDavid du Colombier #endif
5440*0b459c2cSDavid du Colombier s2 = SYNTAX (c2);
5441*0b459c2cSDavid du Colombier
5442*0b459c2cSDavid du Colombier if (/* Case 2: Only one of S1 and S2 is Sword. */
5443*0b459c2cSDavid du Colombier ((s1 == Sword) != (s2 == Sword))
5444*0b459c2cSDavid du Colombier /* Case 3: Both of S1 and S2 are Sword, and macro
5445*0b459c2cSDavid du Colombier WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5446*0b459c2cSDavid du Colombier || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5447*0b459c2cSDavid du Colombier goto fail;
5448*0b459c2cSDavid du Colombier }
5449*0b459c2cSDavid du Colombier break;
5450*0b459c2cSDavid du Colombier
5451*0b459c2cSDavid du Colombier case wordbeg:
5452*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5453*0b459c2cSDavid du Colombier
5454*0b459c2cSDavid du Colombier /* We FAIL in one of the following cases: */
5455*0b459c2cSDavid du Colombier
5456*0b459c2cSDavid du Colombier /* Case 1: D is at the end of string. */
5457*0b459c2cSDavid du Colombier if (AT_STRINGS_END (d))
5458*0b459c2cSDavid du Colombier goto fail;
5459*0b459c2cSDavid du Colombier else
5460*0b459c2cSDavid du Colombier {
5461*0b459c2cSDavid du Colombier /* C1 is the character before D, S1 is the syntax of C1, C2
5462*0b459c2cSDavid du Colombier is the character at D, and S2 is the syntax of C2. */
5463*0b459c2cSDavid du Colombier int c1, c2, s1, s2;
5464*0b459c2cSDavid du Colombier int pos1 = PTR_TO_OFFSET (d);
5465*0b459c2cSDavid du Colombier int charpos;
5466*0b459c2cSDavid du Colombier
5467*0b459c2cSDavid du Colombier GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5468*0b459c2cSDavid du Colombier #ifdef emacs
5469*0b459c2cSDavid du Colombier charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5470*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (charpos);
5471*0b459c2cSDavid du Colombier #endif
5472*0b459c2cSDavid du Colombier s2 = SYNTAX (c2);
5473*0b459c2cSDavid du Colombier
5474*0b459c2cSDavid du Colombier /* Case 2: S2 is not Sword. */
5475*0b459c2cSDavid du Colombier if (s2 != Sword)
5476*0b459c2cSDavid du Colombier goto fail;
5477*0b459c2cSDavid du Colombier
5478*0b459c2cSDavid du Colombier /* Case 3: D is not at the beginning of string ... */
5479*0b459c2cSDavid du Colombier if (!AT_STRINGS_BEG (d))
5480*0b459c2cSDavid du Colombier {
5481*0b459c2cSDavid du Colombier GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5482*0b459c2cSDavid du Colombier #ifdef emacs
5483*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5484*0b459c2cSDavid du Colombier #endif
5485*0b459c2cSDavid du Colombier s1 = SYNTAX (c1);
5486*0b459c2cSDavid du Colombier
5487*0b459c2cSDavid du Colombier /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5488*0b459c2cSDavid du Colombier returns 0. */
5489*0b459c2cSDavid du Colombier if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5490*0b459c2cSDavid du Colombier goto fail;
5491*0b459c2cSDavid du Colombier }
5492*0b459c2cSDavid du Colombier }
5493*0b459c2cSDavid du Colombier break;
5494*0b459c2cSDavid du Colombier
5495*0b459c2cSDavid du Colombier case wordend:
5496*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING wordend.\n");
5497*0b459c2cSDavid du Colombier
5498*0b459c2cSDavid du Colombier /* We FAIL in one of the following cases: */
5499*0b459c2cSDavid du Colombier
5500*0b459c2cSDavid du Colombier /* Case 1: D is at the beginning of string. */
5501*0b459c2cSDavid du Colombier if (AT_STRINGS_BEG (d))
5502*0b459c2cSDavid du Colombier goto fail;
5503*0b459c2cSDavid du Colombier else
5504*0b459c2cSDavid du Colombier {
5505*0b459c2cSDavid du Colombier /* C1 is the character before D, S1 is the syntax of C1, C2
5506*0b459c2cSDavid du Colombier is the character at D, and S2 is the syntax of C2. */
5507*0b459c2cSDavid du Colombier int c1, c2, s1, s2;
5508*0b459c2cSDavid du Colombier int pos1 = PTR_TO_OFFSET (d);
5509*0b459c2cSDavid du Colombier int charpos;
5510*0b459c2cSDavid du Colombier
5511*0b459c2cSDavid du Colombier GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5512*0b459c2cSDavid du Colombier #ifdef emacs
5513*0b459c2cSDavid du Colombier charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
5514*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (charpos);
5515*0b459c2cSDavid du Colombier #endif
5516*0b459c2cSDavid du Colombier s1 = SYNTAX (c1);
5517*0b459c2cSDavid du Colombier
5518*0b459c2cSDavid du Colombier /* Case 2: S1 is not Sword. */
5519*0b459c2cSDavid du Colombier if (s1 != Sword)
5520*0b459c2cSDavid du Colombier goto fail;
5521*0b459c2cSDavid du Colombier
5522*0b459c2cSDavid du Colombier /* Case 3: D is not at the end of string ... */
5523*0b459c2cSDavid du Colombier if (!AT_STRINGS_END (d))
5524*0b459c2cSDavid du Colombier {
5525*0b459c2cSDavid du Colombier GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5526*0b459c2cSDavid du Colombier #ifdef emacs
5527*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5528*0b459c2cSDavid du Colombier #endif
5529*0b459c2cSDavid du Colombier s2 = SYNTAX (c2);
5530*0b459c2cSDavid du Colombier
5531*0b459c2cSDavid du Colombier /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5532*0b459c2cSDavid du Colombier returns 0. */
5533*0b459c2cSDavid du Colombier if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5534*0b459c2cSDavid du Colombier goto fail;
5535*0b459c2cSDavid du Colombier }
5536*0b459c2cSDavid du Colombier }
5537*0b459c2cSDavid du Colombier break;
5538*0b459c2cSDavid du Colombier
5539*0b459c2cSDavid du Colombier #ifdef emacs
5540*0b459c2cSDavid du Colombier case before_dot:
5541*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5542*0b459c2cSDavid du Colombier if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
5543*0b459c2cSDavid du Colombier goto fail;
5544*0b459c2cSDavid du Colombier break;
5545*0b459c2cSDavid du Colombier
5546*0b459c2cSDavid du Colombier case at_dot:
5547*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5548*0b459c2cSDavid du Colombier if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
5549*0b459c2cSDavid du Colombier goto fail;
5550*0b459c2cSDavid du Colombier break;
5551*0b459c2cSDavid du Colombier
5552*0b459c2cSDavid du Colombier case after_dot:
5553*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5554*0b459c2cSDavid du Colombier if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
5555*0b459c2cSDavid du Colombier goto fail;
5556*0b459c2cSDavid du Colombier break;
5557*0b459c2cSDavid du Colombier
5558*0b459c2cSDavid du Colombier case syntaxspec:
5559*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5560*0b459c2cSDavid du Colombier mcnt = *p++;
5561*0b459c2cSDavid du Colombier goto matchsyntax;
5562*0b459c2cSDavid du Colombier
5563*0b459c2cSDavid du Colombier case wordchar:
5564*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5565*0b459c2cSDavid du Colombier mcnt = (int) Sword;
5566*0b459c2cSDavid du Colombier matchsyntax:
5567*0b459c2cSDavid du Colombier PREFETCH ();
5568*0b459c2cSDavid du Colombier #ifdef emacs
5569*0b459c2cSDavid du Colombier {
5570*0b459c2cSDavid du Colombier int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5571*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (pos1);
5572*0b459c2cSDavid du Colombier }
5573*0b459c2cSDavid du Colombier #endif
5574*0b459c2cSDavid du Colombier {
5575*0b459c2cSDavid du Colombier int c, len;
5576*0b459c2cSDavid du Colombier
5577*0b459c2cSDavid du Colombier if (multibyte)
5578*0b459c2cSDavid du Colombier /* we must concern about multibyte form, ... */
5579*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5580*0b459c2cSDavid du Colombier else
5581*0b459c2cSDavid du Colombier /* everything should be handled as ASCII, even though it
5582*0b459c2cSDavid du Colombier looks like multibyte form. */
5583*0b459c2cSDavid du Colombier c = *d, len = 1;
5584*0b459c2cSDavid du Colombier
5585*0b459c2cSDavid du Colombier if (SYNTAX (c) != (enum syntaxcode) mcnt)
5586*0b459c2cSDavid du Colombier goto fail;
5587*0b459c2cSDavid du Colombier d += len;
5588*0b459c2cSDavid du Colombier }
5589*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5590*0b459c2cSDavid du Colombier break;
5591*0b459c2cSDavid du Colombier
5592*0b459c2cSDavid du Colombier case notsyntaxspec:
5593*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5594*0b459c2cSDavid du Colombier mcnt = *p++;
5595*0b459c2cSDavid du Colombier goto matchnotsyntax;
5596*0b459c2cSDavid du Colombier
5597*0b459c2cSDavid du Colombier case notwordchar:
5598*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5599*0b459c2cSDavid du Colombier mcnt = (int) Sword;
5600*0b459c2cSDavid du Colombier matchnotsyntax:
5601*0b459c2cSDavid du Colombier PREFETCH ();
5602*0b459c2cSDavid du Colombier #ifdef emacs
5603*0b459c2cSDavid du Colombier {
5604*0b459c2cSDavid du Colombier int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5605*0b459c2cSDavid du Colombier UPDATE_SYNTAX_TABLE (pos1);
5606*0b459c2cSDavid du Colombier }
5607*0b459c2cSDavid du Colombier #endif
5608*0b459c2cSDavid du Colombier {
5609*0b459c2cSDavid du Colombier int c, len;
5610*0b459c2cSDavid du Colombier
5611*0b459c2cSDavid du Colombier if (multibyte)
5612*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5613*0b459c2cSDavid du Colombier else
5614*0b459c2cSDavid du Colombier c = *d, len = 1;
5615*0b459c2cSDavid du Colombier
5616*0b459c2cSDavid du Colombier if (SYNTAX (c) == (enum syntaxcode) mcnt)
5617*0b459c2cSDavid du Colombier goto fail;
5618*0b459c2cSDavid du Colombier d += len;
5619*0b459c2cSDavid du Colombier }
5620*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5621*0b459c2cSDavid du Colombier break;
5622*0b459c2cSDavid du Colombier
5623*0b459c2cSDavid du Colombier case categoryspec:
5624*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
5625*0b459c2cSDavid du Colombier mcnt = *p++;
5626*0b459c2cSDavid du Colombier PREFETCH ();
5627*0b459c2cSDavid du Colombier {
5628*0b459c2cSDavid du Colombier int c, len;
5629*0b459c2cSDavid du Colombier
5630*0b459c2cSDavid du Colombier if (multibyte)
5631*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5632*0b459c2cSDavid du Colombier else
5633*0b459c2cSDavid du Colombier c = *d, len = 1;
5634*0b459c2cSDavid du Colombier
5635*0b459c2cSDavid du Colombier if (!CHAR_HAS_CATEGORY (c, mcnt))
5636*0b459c2cSDavid du Colombier goto fail;
5637*0b459c2cSDavid du Colombier d += len;
5638*0b459c2cSDavid du Colombier }
5639*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5640*0b459c2cSDavid du Colombier break;
5641*0b459c2cSDavid du Colombier
5642*0b459c2cSDavid du Colombier case notcategoryspec:
5643*0b459c2cSDavid du Colombier DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
5644*0b459c2cSDavid du Colombier mcnt = *p++;
5645*0b459c2cSDavid du Colombier PREFETCH ();
5646*0b459c2cSDavid du Colombier {
5647*0b459c2cSDavid du Colombier int c, len;
5648*0b459c2cSDavid du Colombier
5649*0b459c2cSDavid du Colombier if (multibyte)
5650*0b459c2cSDavid du Colombier c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5651*0b459c2cSDavid du Colombier else
5652*0b459c2cSDavid du Colombier c = *d, len = 1;
5653*0b459c2cSDavid du Colombier
5654*0b459c2cSDavid du Colombier if (CHAR_HAS_CATEGORY (c, mcnt))
5655*0b459c2cSDavid du Colombier goto fail;
5656*0b459c2cSDavid du Colombier d += len;
5657*0b459c2cSDavid du Colombier }
5658*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5659*0b459c2cSDavid du Colombier break;
5660*0b459c2cSDavid du Colombier
5661*0b459c2cSDavid du Colombier #else /* not emacs */
5662*0b459c2cSDavid du Colombier case wordchar:
5663*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5664*0b459c2cSDavid du Colombier PREFETCH ();
5665*0b459c2cSDavid du Colombier if (!WORDCHAR_P (d))
5666*0b459c2cSDavid du Colombier goto fail;
5667*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5668*0b459c2cSDavid du Colombier d++;
5669*0b459c2cSDavid du Colombier break;
5670*0b459c2cSDavid du Colombier
5671*0b459c2cSDavid du Colombier case notwordchar:
5672*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5673*0b459c2cSDavid du Colombier PREFETCH ();
5674*0b459c2cSDavid du Colombier if (WORDCHAR_P (d))
5675*0b459c2cSDavid du Colombier goto fail;
5676*0b459c2cSDavid du Colombier SET_REGS_MATCHED ();
5677*0b459c2cSDavid du Colombier d++;
5678*0b459c2cSDavid du Colombier break;
5679*0b459c2cSDavid du Colombier #endif /* not emacs */
5680*0b459c2cSDavid du Colombier
5681*0b459c2cSDavid du Colombier default:
5682*0b459c2cSDavid du Colombier abort ();
5683*0b459c2cSDavid du Colombier }
5684*0b459c2cSDavid du Colombier continue; /* Successfully executed one pattern command; keep going. */
5685*0b459c2cSDavid du Colombier
5686*0b459c2cSDavid du Colombier
5687*0b459c2cSDavid du Colombier /* We goto here if a matching operation fails. */
5688*0b459c2cSDavid du Colombier fail:
5689*0b459c2cSDavid du Colombier #if defined (WINDOWSNT) && defined (emacs)
5690*0b459c2cSDavid du Colombier QUIT;
5691*0b459c2cSDavid du Colombier #endif
5692*0b459c2cSDavid du Colombier if (!FAIL_STACK_EMPTY ())
5693*0b459c2cSDavid du Colombier { /* A restart point is known. Restore to that state. */
5694*0b459c2cSDavid du Colombier DEBUG_PRINT1 ("\nFAIL:\n");
5695*0b459c2cSDavid du Colombier POP_FAILURE_POINT (d, p,
5696*0b459c2cSDavid du Colombier lowest_active_reg, highest_active_reg,
5697*0b459c2cSDavid du Colombier regstart, regend, reg_info);
5698*0b459c2cSDavid du Colombier
5699*0b459c2cSDavid du Colombier /* If this failure point is a dummy, try the next one. */
5700*0b459c2cSDavid du Colombier if (!p)
5701*0b459c2cSDavid du Colombier goto fail;
5702*0b459c2cSDavid du Colombier
5703*0b459c2cSDavid du Colombier /* If we failed to the end of the pattern, don't examine *p. */
5704*0b459c2cSDavid du Colombier assert (p <= pend);
5705*0b459c2cSDavid du Colombier if (p < pend)
5706*0b459c2cSDavid du Colombier {
5707*0b459c2cSDavid du Colombier boolean is_a_jump_n = false;
5708*0b459c2cSDavid du Colombier
5709*0b459c2cSDavid du Colombier /* If failed to a backwards jump that's part of a repetition
5710*0b459c2cSDavid du Colombier loop, need to pop this failure point and use the next one. */
5711*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p)
5712*0b459c2cSDavid du Colombier {
5713*0b459c2cSDavid du Colombier case jump_n:
5714*0b459c2cSDavid du Colombier is_a_jump_n = true;
5715*0b459c2cSDavid du Colombier case maybe_pop_jump:
5716*0b459c2cSDavid du Colombier case pop_failure_jump:
5717*0b459c2cSDavid du Colombier case jump:
5718*0b459c2cSDavid du Colombier p1 = p + 1;
5719*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5720*0b459c2cSDavid du Colombier p1 += mcnt;
5721*0b459c2cSDavid du Colombier
5722*0b459c2cSDavid du Colombier if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5723*0b459c2cSDavid du Colombier || (!is_a_jump_n
5724*0b459c2cSDavid du Colombier && (re_opcode_t) *p1 == on_failure_jump))
5725*0b459c2cSDavid du Colombier goto fail;
5726*0b459c2cSDavid du Colombier break;
5727*0b459c2cSDavid du Colombier default:
5728*0b459c2cSDavid du Colombier /* do nothing */ ;
5729*0b459c2cSDavid du Colombier }
5730*0b459c2cSDavid du Colombier }
5731*0b459c2cSDavid du Colombier
5732*0b459c2cSDavid du Colombier if (d >= string1 && d <= end1)
5733*0b459c2cSDavid du Colombier dend = end_match_1;
5734*0b459c2cSDavid du Colombier }
5735*0b459c2cSDavid du Colombier else
5736*0b459c2cSDavid du Colombier break; /* Matching at this starting point really fails. */
5737*0b459c2cSDavid du Colombier } /* for (;;) */
5738*0b459c2cSDavid du Colombier
5739*0b459c2cSDavid du Colombier if (best_regs_set)
5740*0b459c2cSDavid du Colombier goto restore_best_regs;
5741*0b459c2cSDavid du Colombier
5742*0b459c2cSDavid du Colombier FREE_VARIABLES ();
5743*0b459c2cSDavid du Colombier
5744*0b459c2cSDavid du Colombier return -1; /* Failure to match. */
5745*0b459c2cSDavid du Colombier } /* re_match_2 */
5746*0b459c2cSDavid du Colombier
5747*0b459c2cSDavid du Colombier /* Subroutine definitions for re_match_2. */
5748*0b459c2cSDavid du Colombier
5749*0b459c2cSDavid du Colombier
5750*0b459c2cSDavid du Colombier /* We are passed P pointing to a register number after a start_memory.
5751*0b459c2cSDavid du Colombier
5752*0b459c2cSDavid du Colombier Return true if the pattern up to the corresponding stop_memory can
5753*0b459c2cSDavid du Colombier match the empty string, and false otherwise.
5754*0b459c2cSDavid du Colombier
5755*0b459c2cSDavid du Colombier If we find the matching stop_memory, sets P to point to one past its number.
5756*0b459c2cSDavid du Colombier Otherwise, sets P to an undefined byte less than or equal to END.
5757*0b459c2cSDavid du Colombier
5758*0b459c2cSDavid du Colombier We don't handle duplicates properly (yet). */
5759*0b459c2cSDavid du Colombier
5760*0b459c2cSDavid du Colombier static boolean
group_match_null_string_p(p,end,reg_info)5761*0b459c2cSDavid du Colombier group_match_null_string_p (p, end, reg_info)
5762*0b459c2cSDavid du Colombier unsigned char **p, *end;
5763*0b459c2cSDavid du Colombier register_info_type *reg_info;
5764*0b459c2cSDavid du Colombier {
5765*0b459c2cSDavid du Colombier int mcnt;
5766*0b459c2cSDavid du Colombier /* Point to after the args to the start_memory. */
5767*0b459c2cSDavid du Colombier unsigned char *p1 = *p + 2;
5768*0b459c2cSDavid du Colombier
5769*0b459c2cSDavid du Colombier while (p1 < end)
5770*0b459c2cSDavid du Colombier {
5771*0b459c2cSDavid du Colombier /* Skip over opcodes that can match nothing, and return true or
5772*0b459c2cSDavid du Colombier false, as appropriate, when we get to one that can't, or to the
5773*0b459c2cSDavid du Colombier matching stop_memory. */
5774*0b459c2cSDavid du Colombier
5775*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p1)
5776*0b459c2cSDavid du Colombier {
5777*0b459c2cSDavid du Colombier /* Could be either a loop or a series of alternatives. */
5778*0b459c2cSDavid du Colombier case on_failure_jump:
5779*0b459c2cSDavid du Colombier p1++;
5780*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5781*0b459c2cSDavid du Colombier
5782*0b459c2cSDavid du Colombier /* If the next operation is not a jump backwards in the
5783*0b459c2cSDavid du Colombier pattern. */
5784*0b459c2cSDavid du Colombier
5785*0b459c2cSDavid du Colombier if (mcnt >= 0)
5786*0b459c2cSDavid du Colombier {
5787*0b459c2cSDavid du Colombier /* Go through the on_failure_jumps of the alternatives,
5788*0b459c2cSDavid du Colombier seeing if any of the alternatives cannot match nothing.
5789*0b459c2cSDavid du Colombier The last alternative starts with only a jump,
5790*0b459c2cSDavid du Colombier whereas the rest start with on_failure_jump and end
5791*0b459c2cSDavid du Colombier with a jump, e.g., here is the pattern for `a|b|c':
5792*0b459c2cSDavid du Colombier
5793*0b459c2cSDavid du Colombier /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5794*0b459c2cSDavid du Colombier /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5795*0b459c2cSDavid du Colombier /exactn/1/c
5796*0b459c2cSDavid du Colombier
5797*0b459c2cSDavid du Colombier So, we have to first go through the first (n-1)
5798*0b459c2cSDavid du Colombier alternatives and then deal with the last one separately. */
5799*0b459c2cSDavid du Colombier
5800*0b459c2cSDavid du Colombier
5801*0b459c2cSDavid du Colombier /* Deal with the first (n-1) alternatives, which start
5802*0b459c2cSDavid du Colombier with an on_failure_jump (see above) that jumps to right
5803*0b459c2cSDavid du Colombier past a jump_past_alt. */
5804*0b459c2cSDavid du Colombier
5805*0b459c2cSDavid du Colombier while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5806*0b459c2cSDavid du Colombier {
5807*0b459c2cSDavid du Colombier /* `mcnt' holds how many bytes long the alternative
5808*0b459c2cSDavid du Colombier is, including the ending `jump_past_alt' and
5809*0b459c2cSDavid du Colombier its number. */
5810*0b459c2cSDavid du Colombier
5811*0b459c2cSDavid du Colombier if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5812*0b459c2cSDavid du Colombier reg_info))
5813*0b459c2cSDavid du Colombier return false;
5814*0b459c2cSDavid du Colombier
5815*0b459c2cSDavid du Colombier /* Move to right after this alternative, including the
5816*0b459c2cSDavid du Colombier jump_past_alt. */
5817*0b459c2cSDavid du Colombier p1 += mcnt;
5818*0b459c2cSDavid du Colombier
5819*0b459c2cSDavid du Colombier /* Break if it's the beginning of an n-th alternative
5820*0b459c2cSDavid du Colombier that doesn't begin with an on_failure_jump. */
5821*0b459c2cSDavid du Colombier if ((re_opcode_t) *p1 != on_failure_jump)
5822*0b459c2cSDavid du Colombier break;
5823*0b459c2cSDavid du Colombier
5824*0b459c2cSDavid du Colombier /* Still have to check that it's not an n-th
5825*0b459c2cSDavid du Colombier alternative that starts with an on_failure_jump. */
5826*0b459c2cSDavid du Colombier p1++;
5827*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5828*0b459c2cSDavid du Colombier if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5829*0b459c2cSDavid du Colombier {
5830*0b459c2cSDavid du Colombier /* Get to the beginning of the n-th alternative. */
5831*0b459c2cSDavid du Colombier p1 -= 3;
5832*0b459c2cSDavid du Colombier break;
5833*0b459c2cSDavid du Colombier }
5834*0b459c2cSDavid du Colombier }
5835*0b459c2cSDavid du Colombier
5836*0b459c2cSDavid du Colombier /* Deal with the last alternative: go back and get number
5837*0b459c2cSDavid du Colombier of the `jump_past_alt' just before it. `mcnt' contains
5838*0b459c2cSDavid du Colombier the length of the alternative. */
5839*0b459c2cSDavid du Colombier EXTRACT_NUMBER (mcnt, p1 - 2);
5840*0b459c2cSDavid du Colombier
5841*0b459c2cSDavid du Colombier if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5842*0b459c2cSDavid du Colombier return false;
5843*0b459c2cSDavid du Colombier
5844*0b459c2cSDavid du Colombier p1 += mcnt; /* Get past the n-th alternative. */
5845*0b459c2cSDavid du Colombier } /* if mcnt > 0 */
5846*0b459c2cSDavid du Colombier break;
5847*0b459c2cSDavid du Colombier
5848*0b459c2cSDavid du Colombier
5849*0b459c2cSDavid du Colombier case stop_memory:
5850*0b459c2cSDavid du Colombier assert (p1[1] == **p);
5851*0b459c2cSDavid du Colombier *p = p1 + 2;
5852*0b459c2cSDavid du Colombier return true;
5853*0b459c2cSDavid du Colombier
5854*0b459c2cSDavid du Colombier
5855*0b459c2cSDavid du Colombier default:
5856*0b459c2cSDavid du Colombier if (!common_op_match_null_string_p (&p1, end, reg_info))
5857*0b459c2cSDavid du Colombier return false;
5858*0b459c2cSDavid du Colombier }
5859*0b459c2cSDavid du Colombier } /* while p1 < end */
5860*0b459c2cSDavid du Colombier
5861*0b459c2cSDavid du Colombier return false;
5862*0b459c2cSDavid du Colombier } /* group_match_null_string_p */
5863*0b459c2cSDavid du Colombier
5864*0b459c2cSDavid du Colombier
5865*0b459c2cSDavid du Colombier /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5866*0b459c2cSDavid du Colombier It expects P to be the first byte of a single alternative and END one
5867*0b459c2cSDavid du Colombier byte past the last. The alternative can contain groups. */
5868*0b459c2cSDavid du Colombier
5869*0b459c2cSDavid du Colombier static boolean
alt_match_null_string_p(p,end,reg_info)5870*0b459c2cSDavid du Colombier alt_match_null_string_p (p, end, reg_info)
5871*0b459c2cSDavid du Colombier unsigned char *p, *end;
5872*0b459c2cSDavid du Colombier register_info_type *reg_info;
5873*0b459c2cSDavid du Colombier {
5874*0b459c2cSDavid du Colombier int mcnt;
5875*0b459c2cSDavid du Colombier unsigned char *p1 = p;
5876*0b459c2cSDavid du Colombier
5877*0b459c2cSDavid du Colombier while (p1 < end)
5878*0b459c2cSDavid du Colombier {
5879*0b459c2cSDavid du Colombier /* Skip over opcodes that can match nothing, and break when we get
5880*0b459c2cSDavid du Colombier to one that can't. */
5881*0b459c2cSDavid du Colombier
5882*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p1)
5883*0b459c2cSDavid du Colombier {
5884*0b459c2cSDavid du Colombier /* It's a loop. */
5885*0b459c2cSDavid du Colombier case on_failure_jump:
5886*0b459c2cSDavid du Colombier p1++;
5887*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5888*0b459c2cSDavid du Colombier p1 += mcnt;
5889*0b459c2cSDavid du Colombier break;
5890*0b459c2cSDavid du Colombier
5891*0b459c2cSDavid du Colombier default:
5892*0b459c2cSDavid du Colombier if (!common_op_match_null_string_p (&p1, end, reg_info))
5893*0b459c2cSDavid du Colombier return false;
5894*0b459c2cSDavid du Colombier }
5895*0b459c2cSDavid du Colombier } /* while p1 < end */
5896*0b459c2cSDavid du Colombier
5897*0b459c2cSDavid du Colombier return true;
5898*0b459c2cSDavid du Colombier } /* alt_match_null_string_p */
5899*0b459c2cSDavid du Colombier
5900*0b459c2cSDavid du Colombier
5901*0b459c2cSDavid du Colombier /* Deals with the ops common to group_match_null_string_p and
5902*0b459c2cSDavid du Colombier alt_match_null_string_p.
5903*0b459c2cSDavid du Colombier
5904*0b459c2cSDavid du Colombier Sets P to one after the op and its arguments, if any. */
5905*0b459c2cSDavid du Colombier
5906*0b459c2cSDavid du Colombier static boolean
common_op_match_null_string_p(p,end,reg_info)5907*0b459c2cSDavid du Colombier common_op_match_null_string_p (p, end, reg_info)
5908*0b459c2cSDavid du Colombier unsigned char **p, *end;
5909*0b459c2cSDavid du Colombier register_info_type *reg_info;
5910*0b459c2cSDavid du Colombier {
5911*0b459c2cSDavid du Colombier int mcnt;
5912*0b459c2cSDavid du Colombier boolean ret;
5913*0b459c2cSDavid du Colombier int reg_no;
5914*0b459c2cSDavid du Colombier unsigned char *p1 = *p;
5915*0b459c2cSDavid du Colombier
5916*0b459c2cSDavid du Colombier switch ((re_opcode_t) *p1++)
5917*0b459c2cSDavid du Colombier {
5918*0b459c2cSDavid du Colombier case no_op:
5919*0b459c2cSDavid du Colombier case begline:
5920*0b459c2cSDavid du Colombier case endline:
5921*0b459c2cSDavid du Colombier case begbuf:
5922*0b459c2cSDavid du Colombier case endbuf:
5923*0b459c2cSDavid du Colombier case wordbeg:
5924*0b459c2cSDavid du Colombier case wordend:
5925*0b459c2cSDavid du Colombier case wordbound:
5926*0b459c2cSDavid du Colombier case notwordbound:
5927*0b459c2cSDavid du Colombier #ifdef emacs
5928*0b459c2cSDavid du Colombier case before_dot:
5929*0b459c2cSDavid du Colombier case at_dot:
5930*0b459c2cSDavid du Colombier case after_dot:
5931*0b459c2cSDavid du Colombier #endif
5932*0b459c2cSDavid du Colombier break;
5933*0b459c2cSDavid du Colombier
5934*0b459c2cSDavid du Colombier case start_memory:
5935*0b459c2cSDavid du Colombier reg_no = *p1;
5936*0b459c2cSDavid du Colombier assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5937*0b459c2cSDavid du Colombier ret = group_match_null_string_p (&p1, end, reg_info);
5938*0b459c2cSDavid du Colombier
5939*0b459c2cSDavid du Colombier /* Have to set this here in case we're checking a group which
5940*0b459c2cSDavid du Colombier contains a group and a back reference to it. */
5941*0b459c2cSDavid du Colombier
5942*0b459c2cSDavid du Colombier if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5943*0b459c2cSDavid du Colombier REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5944*0b459c2cSDavid du Colombier
5945*0b459c2cSDavid du Colombier if (!ret)
5946*0b459c2cSDavid du Colombier return false;
5947*0b459c2cSDavid du Colombier break;
5948*0b459c2cSDavid du Colombier
5949*0b459c2cSDavid du Colombier /* If this is an optimized succeed_n for zero times, make the jump. */
5950*0b459c2cSDavid du Colombier case jump:
5951*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5952*0b459c2cSDavid du Colombier if (mcnt >= 0)
5953*0b459c2cSDavid du Colombier p1 += mcnt;
5954*0b459c2cSDavid du Colombier else
5955*0b459c2cSDavid du Colombier return false;
5956*0b459c2cSDavid du Colombier break;
5957*0b459c2cSDavid du Colombier
5958*0b459c2cSDavid du Colombier case succeed_n:
5959*0b459c2cSDavid du Colombier /* Get to the number of times to succeed. */
5960*0b459c2cSDavid du Colombier p1 += 2;
5961*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5962*0b459c2cSDavid du Colombier
5963*0b459c2cSDavid du Colombier if (mcnt == 0)
5964*0b459c2cSDavid du Colombier {
5965*0b459c2cSDavid du Colombier p1 -= 4;
5966*0b459c2cSDavid du Colombier EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5967*0b459c2cSDavid du Colombier p1 += mcnt;
5968*0b459c2cSDavid du Colombier }
5969*0b459c2cSDavid du Colombier else
5970*0b459c2cSDavid du Colombier return false;
5971*0b459c2cSDavid du Colombier break;
5972*0b459c2cSDavid du Colombier
5973*0b459c2cSDavid du Colombier case duplicate:
5974*0b459c2cSDavid du Colombier if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5975*0b459c2cSDavid du Colombier return false;
5976*0b459c2cSDavid du Colombier break;
5977*0b459c2cSDavid du Colombier
5978*0b459c2cSDavid du Colombier case set_number_at:
5979*0b459c2cSDavid du Colombier p1 += 4;
5980*0b459c2cSDavid du Colombier
5981*0b459c2cSDavid du Colombier default:
5982*0b459c2cSDavid du Colombier /* All other opcodes mean we cannot match the empty string. */
5983*0b459c2cSDavid du Colombier return false;
5984*0b459c2cSDavid du Colombier }
5985*0b459c2cSDavid du Colombier
5986*0b459c2cSDavid du Colombier *p = p1;
5987*0b459c2cSDavid du Colombier return true;
5988*0b459c2cSDavid du Colombier } /* common_op_match_null_string_p */
5989*0b459c2cSDavid du Colombier
5990*0b459c2cSDavid du Colombier
5991*0b459c2cSDavid du Colombier /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5992*0b459c2cSDavid du Colombier bytes; nonzero otherwise. */
5993*0b459c2cSDavid du Colombier
5994*0b459c2cSDavid du Colombier static int
bcmp_translate(s1,s2,len,translate)5995*0b459c2cSDavid du Colombier bcmp_translate (s1, s2, len, translate)
5996*0b459c2cSDavid du Colombier unsigned char *s1, *s2;
5997*0b459c2cSDavid du Colombier register int len;
5998*0b459c2cSDavid du Colombier RE_TRANSLATE_TYPE translate;
5999*0b459c2cSDavid du Colombier {
6000*0b459c2cSDavid du Colombier register unsigned char *p1 = s1, *p2 = s2;
6001*0b459c2cSDavid du Colombier unsigned char *p1_end = s1 + len;
6002*0b459c2cSDavid du Colombier unsigned char *p2_end = s2 + len;
6003*0b459c2cSDavid du Colombier
6004*0b459c2cSDavid du Colombier while (p1 != p1_end && p2 != p2_end)
6005*0b459c2cSDavid du Colombier {
6006*0b459c2cSDavid du Colombier int p1_charlen, p2_charlen;
6007*0b459c2cSDavid du Colombier int p1_ch, p2_ch;
6008*0b459c2cSDavid du Colombier
6009*0b459c2cSDavid du Colombier p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
6010*0b459c2cSDavid du Colombier p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
6011*0b459c2cSDavid du Colombier
6012*0b459c2cSDavid du Colombier if (RE_TRANSLATE (translate, p1_ch)
6013*0b459c2cSDavid du Colombier != RE_TRANSLATE (translate, p2_ch))
6014*0b459c2cSDavid du Colombier return 1;
6015*0b459c2cSDavid du Colombier
6016*0b459c2cSDavid du Colombier p1 += p1_charlen, p2 += p2_charlen;
6017*0b459c2cSDavid du Colombier }
6018*0b459c2cSDavid du Colombier
6019*0b459c2cSDavid du Colombier if (p1 != p1_end || p2 != p2_end)
6020*0b459c2cSDavid du Colombier return 1;
6021*0b459c2cSDavid du Colombier
6022*0b459c2cSDavid du Colombier return 0;
6023*0b459c2cSDavid du Colombier }
6024*0b459c2cSDavid du Colombier
6025*0b459c2cSDavid du Colombier /* Entry points for GNU code. */
6026*0b459c2cSDavid du Colombier
6027*0b459c2cSDavid du Colombier /* re_compile_pattern is the GNU regular expression compiler: it
6028*0b459c2cSDavid du Colombier compiles PATTERN (of length SIZE) and puts the result in BUFP.
6029*0b459c2cSDavid du Colombier Returns 0 if the pattern was valid, otherwise an error string.
6030*0b459c2cSDavid du Colombier
6031*0b459c2cSDavid du Colombier Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6032*0b459c2cSDavid du Colombier are set in BUFP on entry.
6033*0b459c2cSDavid du Colombier
6034*0b459c2cSDavid du Colombier We call regex_compile to do the actual compilation. */
6035*0b459c2cSDavid du Colombier
6036*0b459c2cSDavid du Colombier const char *
re_compile_pattern(pattern,length,bufp)6037*0b459c2cSDavid du Colombier re_compile_pattern (pattern, length, bufp)
6038*0b459c2cSDavid du Colombier const char *pattern;
6039*0b459c2cSDavid du Colombier int length;
6040*0b459c2cSDavid du Colombier struct re_pattern_buffer *bufp;
6041*0b459c2cSDavid du Colombier {
6042*0b459c2cSDavid du Colombier reg_errcode_t ret;
6043*0b459c2cSDavid du Colombier
6044*0b459c2cSDavid du Colombier /* GNU code is written to assume at least RE_NREGS registers will be set
6045*0b459c2cSDavid du Colombier (and at least one extra will be -1). */
6046*0b459c2cSDavid du Colombier bufp->regs_allocated = REGS_UNALLOCATED;
6047*0b459c2cSDavid du Colombier
6048*0b459c2cSDavid du Colombier /* And GNU code determines whether or not to get register information
6049*0b459c2cSDavid du Colombier by passing null for the REGS argument to re_match, etc., not by
6050*0b459c2cSDavid du Colombier setting no_sub. */
6051*0b459c2cSDavid du Colombier bufp->no_sub = 0;
6052*0b459c2cSDavid du Colombier
6053*0b459c2cSDavid du Colombier /* Match anchors at newline. */
6054*0b459c2cSDavid du Colombier bufp->newline_anchor = 1;
6055*0b459c2cSDavid du Colombier
6056*0b459c2cSDavid du Colombier ret = regex_compile (pattern, length, re_syntax_options, bufp);
6057*0b459c2cSDavid du Colombier
6058*0b459c2cSDavid du Colombier if (!ret)
6059*0b459c2cSDavid du Colombier return NULL;
6060*0b459c2cSDavid du Colombier return gettext (re_error_msgid[(int) ret]);
6061*0b459c2cSDavid du Colombier }
6062*0b459c2cSDavid du Colombier
6063*0b459c2cSDavid du Colombier /* Entry points compatible with 4.2 BSD regex library. We don't define
6064*0b459c2cSDavid du Colombier them unless specifically requested. */
6065*0b459c2cSDavid du Colombier
6066*0b459c2cSDavid du Colombier #if defined (_REGEX_RE_COMP) || defined (_LIBC)
6067*0b459c2cSDavid du Colombier
6068*0b459c2cSDavid du Colombier /* BSD has one and only one pattern buffer. */
6069*0b459c2cSDavid du Colombier static struct re_pattern_buffer re_comp_buf;
6070*0b459c2cSDavid du Colombier
6071*0b459c2cSDavid du Colombier char *
6072*0b459c2cSDavid du Colombier #ifdef _LIBC
6073*0b459c2cSDavid du Colombier /* Make these definitions weak in libc, so POSIX programs can redefine
6074*0b459c2cSDavid du Colombier these names if they don't use our functions, and still use
6075*0b459c2cSDavid du Colombier regcomp/regexec below without link errors. */
6076*0b459c2cSDavid du Colombier weak_function
6077*0b459c2cSDavid du Colombier #endif
re_comp(s)6078*0b459c2cSDavid du Colombier re_comp (s)
6079*0b459c2cSDavid du Colombier const char *s;
6080*0b459c2cSDavid du Colombier {
6081*0b459c2cSDavid du Colombier reg_errcode_t ret;
6082*0b459c2cSDavid du Colombier
6083*0b459c2cSDavid du Colombier if (!s)
6084*0b459c2cSDavid du Colombier {
6085*0b459c2cSDavid du Colombier if (!re_comp_buf.buffer)
6086*0b459c2cSDavid du Colombier return gettext ("No previous regular expression");
6087*0b459c2cSDavid du Colombier return 0;
6088*0b459c2cSDavid du Colombier }
6089*0b459c2cSDavid du Colombier
6090*0b459c2cSDavid du Colombier if (!re_comp_buf.buffer)
6091*0b459c2cSDavid du Colombier {
6092*0b459c2cSDavid du Colombier re_comp_buf.buffer = (unsigned char *) malloc (200);
6093*0b459c2cSDavid du Colombier if (re_comp_buf.buffer == NULL)
6094*0b459c2cSDavid du Colombier /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6095*0b459c2cSDavid du Colombier return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6096*0b459c2cSDavid du Colombier re_comp_buf.allocated = 200;
6097*0b459c2cSDavid du Colombier
6098*0b459c2cSDavid du Colombier re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6099*0b459c2cSDavid du Colombier if (re_comp_buf.fastmap == NULL)
6100*0b459c2cSDavid du Colombier /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6101*0b459c2cSDavid du Colombier return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6102*0b459c2cSDavid du Colombier }
6103*0b459c2cSDavid du Colombier
6104*0b459c2cSDavid du Colombier /* Since `re_exec' always passes NULL for the `regs' argument, we
6105*0b459c2cSDavid du Colombier don't need to initialize the pattern buffer fields which affect it. */
6106*0b459c2cSDavid du Colombier
6107*0b459c2cSDavid du Colombier /* Match anchors at newlines. */
6108*0b459c2cSDavid du Colombier re_comp_buf.newline_anchor = 1;
6109*0b459c2cSDavid du Colombier
6110*0b459c2cSDavid du Colombier ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6111*0b459c2cSDavid du Colombier
6112*0b459c2cSDavid du Colombier if (!ret)
6113*0b459c2cSDavid du Colombier return NULL;
6114*0b459c2cSDavid du Colombier
6115*0b459c2cSDavid du Colombier /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6116*0b459c2cSDavid du Colombier return (char *) gettext (re_error_msgid[(int) ret]);
6117*0b459c2cSDavid du Colombier }
6118*0b459c2cSDavid du Colombier
6119*0b459c2cSDavid du Colombier
6120*0b459c2cSDavid du Colombier int
6121*0b459c2cSDavid du Colombier #ifdef _LIBC
6122*0b459c2cSDavid du Colombier weak_function
6123*0b459c2cSDavid du Colombier #endif
re_exec(s)6124*0b459c2cSDavid du Colombier re_exec (s)
6125*0b459c2cSDavid du Colombier const char *s;
6126*0b459c2cSDavid du Colombier {
6127*0b459c2cSDavid du Colombier const int len = strlen (s);
6128*0b459c2cSDavid du Colombier return
6129*0b459c2cSDavid du Colombier 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6130*0b459c2cSDavid du Colombier }
6131*0b459c2cSDavid du Colombier #endif /* _REGEX_RE_COMP */
6132*0b459c2cSDavid du Colombier
6133*0b459c2cSDavid du Colombier /* POSIX.2 functions. Don't define these for Emacs. */
6134*0b459c2cSDavid du Colombier
6135*0b459c2cSDavid du Colombier #ifndef emacs
6136*0b459c2cSDavid du Colombier
6137*0b459c2cSDavid du Colombier /* regcomp takes a regular expression as a string and compiles it.
6138*0b459c2cSDavid du Colombier
6139*0b459c2cSDavid du Colombier PREG is a regex_t *. We do not expect any fields to be initialized,
6140*0b459c2cSDavid du Colombier since POSIX says we shouldn't. Thus, we set
6141*0b459c2cSDavid du Colombier
6142*0b459c2cSDavid du Colombier `buffer' to the compiled pattern;
6143*0b459c2cSDavid du Colombier `used' to the length of the compiled pattern;
6144*0b459c2cSDavid du Colombier `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6145*0b459c2cSDavid du Colombier REG_EXTENDED bit in CFLAGS is set; otherwise, to
6146*0b459c2cSDavid du Colombier RE_SYNTAX_POSIX_BASIC;
6147*0b459c2cSDavid du Colombier `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6148*0b459c2cSDavid du Colombier `fastmap' and `fastmap_accurate' to zero;
6149*0b459c2cSDavid du Colombier `re_nsub' to the number of subexpressions in PATTERN.
6150*0b459c2cSDavid du Colombier
6151*0b459c2cSDavid du Colombier PATTERN is the address of the pattern string.
6152*0b459c2cSDavid du Colombier
6153*0b459c2cSDavid du Colombier CFLAGS is a series of bits which affect compilation.
6154*0b459c2cSDavid du Colombier
6155*0b459c2cSDavid du Colombier If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6156*0b459c2cSDavid du Colombier use POSIX basic syntax.
6157*0b459c2cSDavid du Colombier
6158*0b459c2cSDavid du Colombier If REG_NEWLINE is set, then . and [^...] don't match newline.
6159*0b459c2cSDavid du Colombier Also, regexec will try a match beginning after every newline.
6160*0b459c2cSDavid du Colombier
6161*0b459c2cSDavid du Colombier If REG_ICASE is set, then we considers upper- and lowercase
6162*0b459c2cSDavid du Colombier versions of letters to be equivalent when matching.
6163*0b459c2cSDavid du Colombier
6164*0b459c2cSDavid du Colombier If REG_NOSUB is set, then when PREG is passed to regexec, that
6165*0b459c2cSDavid du Colombier routine will report only success or failure, and nothing about the
6166*0b459c2cSDavid du Colombier registers.
6167*0b459c2cSDavid du Colombier
6168*0b459c2cSDavid du Colombier It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
6169*0b459c2cSDavid du Colombier the return codes and their meanings.) */
6170*0b459c2cSDavid du Colombier
6171*0b459c2cSDavid du Colombier int
regcomp(preg,pattern,cflags)6172*0b459c2cSDavid du Colombier regcomp (preg, pattern, cflags)
6173*0b459c2cSDavid du Colombier regex_t *preg;
6174*0b459c2cSDavid du Colombier const char *pattern;
6175*0b459c2cSDavid du Colombier int cflags;
6176*0b459c2cSDavid du Colombier {
6177*0b459c2cSDavid du Colombier reg_errcode_t ret;
6178*0b459c2cSDavid du Colombier unsigned syntax
6179*0b459c2cSDavid du Colombier = (cflags & REG_EXTENDED) ?
6180*0b459c2cSDavid du Colombier RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6181*0b459c2cSDavid du Colombier
6182*0b459c2cSDavid du Colombier /* regex_compile will allocate the space for the compiled pattern. */
6183*0b459c2cSDavid du Colombier preg->buffer = 0;
6184*0b459c2cSDavid du Colombier preg->allocated = 0;
6185*0b459c2cSDavid du Colombier preg->used = 0;
6186*0b459c2cSDavid du Colombier
6187*0b459c2cSDavid du Colombier /* Don't bother to use a fastmap when searching. This simplifies the
6188*0b459c2cSDavid du Colombier REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6189*0b459c2cSDavid du Colombier characters after newlines into the fastmap. This way, we just try
6190*0b459c2cSDavid du Colombier every character. */
6191*0b459c2cSDavid du Colombier preg->fastmap = 0;
6192*0b459c2cSDavid du Colombier
6193*0b459c2cSDavid du Colombier if (cflags & REG_ICASE)
6194*0b459c2cSDavid du Colombier {
6195*0b459c2cSDavid du Colombier unsigned i;
6196*0b459c2cSDavid du Colombier
6197*0b459c2cSDavid du Colombier preg->translate
6198*0b459c2cSDavid du Colombier = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6199*0b459c2cSDavid du Colombier * sizeof (*(RE_TRANSLATE_TYPE)0));
6200*0b459c2cSDavid du Colombier if (preg->translate == NULL)
6201*0b459c2cSDavid du Colombier return (int) REG_ESPACE;
6202*0b459c2cSDavid du Colombier
6203*0b459c2cSDavid du Colombier /* Map uppercase characters to corresponding lowercase ones. */
6204*0b459c2cSDavid du Colombier for (i = 0; i < CHAR_SET_SIZE; i++)
6205*0b459c2cSDavid du Colombier preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
6206*0b459c2cSDavid du Colombier }
6207*0b459c2cSDavid du Colombier else
6208*0b459c2cSDavid du Colombier preg->translate = NULL;
6209*0b459c2cSDavid du Colombier
6210*0b459c2cSDavid du Colombier /* If REG_NEWLINE is set, newlines are treated differently. */
6211*0b459c2cSDavid du Colombier if (cflags & REG_NEWLINE)
6212*0b459c2cSDavid du Colombier { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6213*0b459c2cSDavid du Colombier syntax &= ~RE_DOT_NEWLINE;
6214*0b459c2cSDavid du Colombier syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6215*0b459c2cSDavid du Colombier /* It also changes the matching behavior. */
6216*0b459c2cSDavid du Colombier preg->newline_anchor = 1;
6217*0b459c2cSDavid du Colombier }
6218*0b459c2cSDavid du Colombier else
6219*0b459c2cSDavid du Colombier preg->newline_anchor = 0;
6220*0b459c2cSDavid du Colombier
6221*0b459c2cSDavid du Colombier preg->no_sub = !!(cflags & REG_NOSUB);
6222*0b459c2cSDavid du Colombier
6223*0b459c2cSDavid du Colombier /* POSIX says a null character in the pattern terminates it, so we
6224*0b459c2cSDavid du Colombier can use strlen here in compiling the pattern. */
6225*0b459c2cSDavid du Colombier ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6226*0b459c2cSDavid du Colombier
6227*0b459c2cSDavid du Colombier /* POSIX doesn't distinguish between an unmatched open-group and an
6228*0b459c2cSDavid du Colombier unmatched close-group: both are REG_EPAREN. */
6229*0b459c2cSDavid du Colombier if (ret == REG_ERPAREN) ret = REG_EPAREN;
6230*0b459c2cSDavid du Colombier
6231*0b459c2cSDavid du Colombier return (int) ret;
6232*0b459c2cSDavid du Colombier }
6233*0b459c2cSDavid du Colombier
6234*0b459c2cSDavid du Colombier
6235*0b459c2cSDavid du Colombier /* regexec searches for a given pattern, specified by PREG, in the
6236*0b459c2cSDavid du Colombier string STRING.
6237*0b459c2cSDavid du Colombier
6238*0b459c2cSDavid du Colombier If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6239*0b459c2cSDavid du Colombier `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
6240*0b459c2cSDavid du Colombier least NMATCH elements, and we set them to the offsets of the
6241*0b459c2cSDavid du Colombier corresponding matched substrings.
6242*0b459c2cSDavid du Colombier
6243*0b459c2cSDavid du Colombier EFLAGS specifies `execution flags' which affect matching: if
6244*0b459c2cSDavid du Colombier REG_NOTBOL is set, then ^ does not match at the beginning of the
6245*0b459c2cSDavid du Colombier string; if REG_NOTEOL is set, then $ does not match at the end.
6246*0b459c2cSDavid du Colombier
6247*0b459c2cSDavid du Colombier We return 0 if we find a match and REG_NOMATCH if not. */
6248*0b459c2cSDavid du Colombier
6249*0b459c2cSDavid du Colombier int
regexec(preg,string,nmatch,pmatch,eflags)6250*0b459c2cSDavid du Colombier regexec (preg, string, nmatch, pmatch, eflags)
6251*0b459c2cSDavid du Colombier const regex_t *preg;
6252*0b459c2cSDavid du Colombier const char *string;
6253*0b459c2cSDavid du Colombier size_t nmatch;
6254*0b459c2cSDavid du Colombier regmatch_t pmatch[];
6255*0b459c2cSDavid du Colombier int eflags;
6256*0b459c2cSDavid du Colombier {
6257*0b459c2cSDavid du Colombier int ret;
6258*0b459c2cSDavid du Colombier struct re_registers regs;
6259*0b459c2cSDavid du Colombier regex_t private_preg;
6260*0b459c2cSDavid du Colombier int len = strlen (string);
6261*0b459c2cSDavid du Colombier boolean want_reg_info = !preg->no_sub && nmatch > 0;
6262*0b459c2cSDavid du Colombier
6263*0b459c2cSDavid du Colombier private_preg = *preg;
6264*0b459c2cSDavid du Colombier
6265*0b459c2cSDavid du Colombier private_preg.not_bol = !!(eflags & REG_NOTBOL);
6266*0b459c2cSDavid du Colombier private_preg.not_eol = !!(eflags & REG_NOTEOL);
6267*0b459c2cSDavid du Colombier
6268*0b459c2cSDavid du Colombier /* The user has told us exactly how many registers to return
6269*0b459c2cSDavid du Colombier information about, via `nmatch'. We have to pass that on to the
6270*0b459c2cSDavid du Colombier matching routines. */
6271*0b459c2cSDavid du Colombier private_preg.regs_allocated = REGS_FIXED;
6272*0b459c2cSDavid du Colombier
6273*0b459c2cSDavid du Colombier if (want_reg_info)
6274*0b459c2cSDavid du Colombier {
6275*0b459c2cSDavid du Colombier regs.num_regs = nmatch;
6276*0b459c2cSDavid du Colombier regs.start = TALLOC (nmatch, regoff_t);
6277*0b459c2cSDavid du Colombier regs.end = TALLOC (nmatch, regoff_t);
6278*0b459c2cSDavid du Colombier if (regs.start == NULL || regs.end == NULL)
6279*0b459c2cSDavid du Colombier return (int) REG_NOMATCH;
6280*0b459c2cSDavid du Colombier }
6281*0b459c2cSDavid du Colombier
6282*0b459c2cSDavid du Colombier /* Perform the searching operation. */
6283*0b459c2cSDavid du Colombier ret = re_search (&private_preg, string, len,
6284*0b459c2cSDavid du Colombier /* start: */ 0, /* range: */ len,
6285*0b459c2cSDavid du Colombier want_reg_info ? ®s : (struct re_registers *) 0);
6286*0b459c2cSDavid du Colombier
6287*0b459c2cSDavid du Colombier /* Copy the register information to the POSIX structure. */
6288*0b459c2cSDavid du Colombier if (want_reg_info)
6289*0b459c2cSDavid du Colombier {
6290*0b459c2cSDavid du Colombier if (ret >= 0)
6291*0b459c2cSDavid du Colombier {
6292*0b459c2cSDavid du Colombier unsigned r;
6293*0b459c2cSDavid du Colombier
6294*0b459c2cSDavid du Colombier for (r = 0; r < nmatch; r++)
6295*0b459c2cSDavid du Colombier {
6296*0b459c2cSDavid du Colombier pmatch[r].rm_so = regs.start[r];
6297*0b459c2cSDavid du Colombier pmatch[r].rm_eo = regs.end[r];
6298*0b459c2cSDavid du Colombier }
6299*0b459c2cSDavid du Colombier }
6300*0b459c2cSDavid du Colombier
6301*0b459c2cSDavid du Colombier /* If we needed the temporary register info, free the space now. */
6302*0b459c2cSDavid du Colombier free (regs.start);
6303*0b459c2cSDavid du Colombier free (regs.end);
6304*0b459c2cSDavid du Colombier }
6305*0b459c2cSDavid du Colombier
6306*0b459c2cSDavid du Colombier /* We want zero return to mean success, unlike `re_search'. */
6307*0b459c2cSDavid du Colombier return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6308*0b459c2cSDavid du Colombier }
6309*0b459c2cSDavid du Colombier
6310*0b459c2cSDavid du Colombier
6311*0b459c2cSDavid du Colombier /* Returns a message corresponding to an error code, ERRCODE, returned
6312*0b459c2cSDavid du Colombier from either regcomp or regexec. We don't use PREG here. */
6313*0b459c2cSDavid du Colombier
6314*0b459c2cSDavid du Colombier size_t
regerror(errcode,preg,errbuf,errbuf_size)6315*0b459c2cSDavid du Colombier regerror (errcode, preg, errbuf, errbuf_size)
6316*0b459c2cSDavid du Colombier int errcode;
6317*0b459c2cSDavid du Colombier const regex_t *preg;
6318*0b459c2cSDavid du Colombier char *errbuf;
6319*0b459c2cSDavid du Colombier size_t errbuf_size;
6320*0b459c2cSDavid du Colombier {
6321*0b459c2cSDavid du Colombier const char *msg;
6322*0b459c2cSDavid du Colombier size_t msg_size;
6323*0b459c2cSDavid du Colombier
6324*0b459c2cSDavid du Colombier if (errcode < 0
6325*0b459c2cSDavid du Colombier || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6326*0b459c2cSDavid du Colombier /* Only error codes returned by the rest of the code should be passed
6327*0b459c2cSDavid du Colombier to this routine. If we are given anything else, or if other regex
6328*0b459c2cSDavid du Colombier code generates an invalid error code, then the program has a bug.
6329*0b459c2cSDavid du Colombier Dump core so we can fix it. */
6330*0b459c2cSDavid du Colombier abort ();
6331*0b459c2cSDavid du Colombier
6332*0b459c2cSDavid du Colombier msg = gettext (re_error_msgid[errcode]);
6333*0b459c2cSDavid du Colombier
6334*0b459c2cSDavid du Colombier msg_size = strlen (msg) + 1; /* Includes the null. */
6335*0b459c2cSDavid du Colombier
6336*0b459c2cSDavid du Colombier if (errbuf_size != 0)
6337*0b459c2cSDavid du Colombier {
6338*0b459c2cSDavid du Colombier if (msg_size > errbuf_size)
6339*0b459c2cSDavid du Colombier {
6340*0b459c2cSDavid du Colombier strncpy (errbuf, msg, errbuf_size - 1);
6341*0b459c2cSDavid du Colombier errbuf[errbuf_size - 1] = 0;
6342*0b459c2cSDavid du Colombier }
6343*0b459c2cSDavid du Colombier else
6344*0b459c2cSDavid du Colombier strcpy (errbuf, msg);
6345*0b459c2cSDavid du Colombier }
6346*0b459c2cSDavid du Colombier
6347*0b459c2cSDavid du Colombier return msg_size;
6348*0b459c2cSDavid du Colombier }
6349*0b459c2cSDavid du Colombier
6350*0b459c2cSDavid du Colombier
6351*0b459c2cSDavid du Colombier /* Free dynamically allocated space used by PREG. */
6352*0b459c2cSDavid du Colombier
6353*0b459c2cSDavid du Colombier void
regfree(preg)6354*0b459c2cSDavid du Colombier regfree (preg)
6355*0b459c2cSDavid du Colombier regex_t *preg;
6356*0b459c2cSDavid du Colombier {
6357*0b459c2cSDavid du Colombier if (preg->buffer != NULL)
6358*0b459c2cSDavid du Colombier free (preg->buffer);
6359*0b459c2cSDavid du Colombier preg->buffer = NULL;
6360*0b459c2cSDavid du Colombier
6361*0b459c2cSDavid du Colombier preg->allocated = 0;
6362*0b459c2cSDavid du Colombier preg->used = 0;
6363*0b459c2cSDavid du Colombier
6364*0b459c2cSDavid du Colombier if (preg->fastmap != NULL)
6365*0b459c2cSDavid du Colombier free (preg->fastmap);
6366*0b459c2cSDavid du Colombier preg->fastmap = NULL;
6367*0b459c2cSDavid du Colombier preg->fastmap_accurate = 0;
6368*0b459c2cSDavid du Colombier
6369*0b459c2cSDavid du Colombier if (preg->translate != NULL)
6370*0b459c2cSDavid du Colombier free (preg->translate);
6371*0b459c2cSDavid du Colombier preg->translate = NULL;
6372*0b459c2cSDavid du Colombier }
6373*0b459c2cSDavid du Colombier
6374*0b459c2cSDavid du Colombier #endif /* not emacs */
6375