xref: /netbsd-src/external/gpl2/diffutils/dist/lib/regex.c (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1*75f6d617Schristos /*	$NetBSD: regex.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $	*/
2*75f6d617Schristos 
3*75f6d617Schristos /* Extended regular expression matching and search library,
4*75f6d617Schristos    version 0.12.
5*75f6d617Schristos    (Implements POSIX draft P1003.2/D11.2, except for some of the
6*75f6d617Schristos    internationalization features.)
7*75f6d617Schristos    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
8*75f6d617Schristos 
9*75f6d617Schristos    This program is free software; you can redistribute it and/or modify
10*75f6d617Schristos    it under the terms of the GNU General Public License as published by
11*75f6d617Schristos    the Free Software Foundation; either version 2, or (at your option)
12*75f6d617Schristos    any later version.
13*75f6d617Schristos 
14*75f6d617Schristos    This program is distributed in the hope that it will be useful,
15*75f6d617Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
16*75f6d617Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17*75f6d617Schristos    GNU General Public License for more details.
18*75f6d617Schristos 
19*75f6d617Schristos    You should have received a copy of the GNU General Public License
20*75f6d617Schristos    along with this program; if not, write to the Free Software Foundation,
21*75f6d617Schristos    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22*75f6d617Schristos 
23*75f6d617Schristos /* AIX requires this to be the first thing in the file. */
24*75f6d617Schristos #if defined _AIX && !defined REGEX_MALLOC
25*75f6d617Schristos   #pragma alloca
26*75f6d617Schristos #endif
27*75f6d617Schristos 
28*75f6d617Schristos #undef	_GNU_SOURCE
29*75f6d617Schristos #define _GNU_SOURCE
30*75f6d617Schristos 
31*75f6d617Schristos #ifdef HAVE_CONFIG_H
32*75f6d617Schristos # include <config.h>
33*75f6d617Schristos #endif
34*75f6d617Schristos 
35*75f6d617Schristos #ifndef PARAMS
36*75f6d617Schristos # if defined __GNUC__ || (defined __STDC__ && __STDC__)
37*75f6d617Schristos #  define PARAMS(args) args
38*75f6d617Schristos # else
39*75f6d617Schristos #  define PARAMS(args) ()
40*75f6d617Schristos # endif  /* GCC.  */
41*75f6d617Schristos #endif  /* Not PARAMS.  */
42*75f6d617Schristos 
43*75f6d617Schristos #ifndef INSIDE_RECURSION
44*75f6d617Schristos 
45*75f6d617Schristos # if defined STDC_HEADERS && !defined emacs
46*75f6d617Schristos #  include <stddef.h>
47*75f6d617Schristos # else
48*75f6d617Schristos /* We need this for `regex.h', and perhaps for the Emacs include files.  */
49*75f6d617Schristos #  include <sys/types.h>
50*75f6d617Schristos # endif
51*75f6d617Schristos 
52*75f6d617Schristos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
53*75f6d617Schristos 
54*75f6d617Schristos /* For platform which support the ISO C amendement 1 functionality we
55*75f6d617Schristos    support user defined character classes.  */
56*75f6d617Schristos # if defined _LIBC || WIDE_CHAR_SUPPORT
57*75f6d617Schristos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
58*75f6d617Schristos #  include <wchar.h>
59*75f6d617Schristos #  include <wctype.h>
60*75f6d617Schristos # endif
61*75f6d617Schristos 
62*75f6d617Schristos # ifdef _LIBC
63*75f6d617Schristos /* We have to keep the namespace clean.  */
64*75f6d617Schristos #  define regfree(preg) __regfree (preg)
65*75f6d617Schristos #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
66*75f6d617Schristos #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
67*75f6d617Schristos #  define regerror(errcode, preg, errbuf, errbuf_size) \
68*75f6d617Schristos 	__regerror(errcode, preg, errbuf, errbuf_size)
69*75f6d617Schristos #  define re_set_registers(bu, re, nu, st, en) \
70*75f6d617Schristos 	__re_set_registers (bu, re, nu, st, en)
71*75f6d617Schristos #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
72*75f6d617Schristos 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
73*75f6d617Schristos #  define re_match(bufp, string, size, pos, regs) \
74*75f6d617Schristos 	__re_match (bufp, string, size, pos, regs)
75*75f6d617Schristos #  define re_search(bufp, string, size, startpos, range, regs) \
76*75f6d617Schristos 	__re_search (bufp, string, size, startpos, range, regs)
77*75f6d617Schristos #  define re_compile_pattern(pattern, length, bufp) \
78*75f6d617Schristos 	__re_compile_pattern (pattern, length, bufp)
79*75f6d617Schristos #  define re_set_syntax(syntax) __re_set_syntax (syntax)
80*75f6d617Schristos #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
81*75f6d617Schristos 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
82*75f6d617Schristos #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
83*75f6d617Schristos 
84*75f6d617Schristos #  define btowc __btowc
85*75f6d617Schristos #  define iswctype __iswctype
86*75f6d617Schristos #  define mbrtowc __mbrtowc
87*75f6d617Schristos #  define wcslen __wcslen
88*75f6d617Schristos #  define wcscoll __wcscoll
89*75f6d617Schristos #  define wcrtomb __wcrtomb
90*75f6d617Schristos 
91*75f6d617Schristos /* We are also using some library internals.  */
92*75f6d617Schristos #  include <locale/localeinfo.h>
93*75f6d617Schristos #  include <locale/elem-hash.h>
94*75f6d617Schristos #  include <langinfo.h>
95*75f6d617Schristos #  include <locale/coll-lookup.h>
96*75f6d617Schristos # endif
97*75f6d617Schristos 
98*75f6d617Schristos /* This is for other GNU distributions with internationalized messages.  */
99*75f6d617Schristos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
100*75f6d617Schristos #  include <libintl.h>
101*75f6d617Schristos #  ifdef _LIBC
102*75f6d617Schristos #   undef gettext
103*75f6d617Schristos #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
104*75f6d617Schristos #  endif
105*75f6d617Schristos # else
106*75f6d617Schristos #  define gettext(msgid) (msgid)
107*75f6d617Schristos # endif
108*75f6d617Schristos 
109*75f6d617Schristos # ifndef gettext_noop
110*75f6d617Schristos /* This define is so xgettext can find the internationalizable
111*75f6d617Schristos    strings.  */
112*75f6d617Schristos #  define gettext_noop(String) String
113*75f6d617Schristos # endif
114*75f6d617Schristos 
115*75f6d617Schristos /* Support for bounded pointers.  */
116*75f6d617Schristos # if !defined _LIBC && !defined __BOUNDED_POINTERS__
117*75f6d617Schristos #  define __bounded	/* nothing */
118*75f6d617Schristos #  define __unbounded	/* nothing */
119*75f6d617Schristos #  define __ptrvalue	/* nothing */
120*75f6d617Schristos # endif
121*75f6d617Schristos 
122*75f6d617Schristos /* The `emacs' switch turns on certain matching commands
123*75f6d617Schristos    that make sense only in Emacs. */
124*75f6d617Schristos # ifdef emacs
125*75f6d617Schristos 
126*75f6d617Schristos #  include "lisp.h"
127*75f6d617Schristos #  include "buffer.h"
128*75f6d617Schristos #  include "syntax.h"
129*75f6d617Schristos 
130*75f6d617Schristos # else  /* not emacs */
131*75f6d617Schristos 
132*75f6d617Schristos /* If we are not linking with Emacs proper,
133*75f6d617Schristos    we can't use the relocating allocator
134*75f6d617Schristos    even if config.h says that we can.  */
135*75f6d617Schristos #  undef REL_ALLOC
136*75f6d617Schristos 
137*75f6d617Schristos #  if defined STDC_HEADERS || defined _LIBC
138*75f6d617Schristos #   include <stdlib.h>
139*75f6d617Schristos #  else
140*75f6d617Schristos char *malloc ();
141*75f6d617Schristos char *realloc ();
142*75f6d617Schristos #  endif
143*75f6d617Schristos 
144*75f6d617Schristos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
145*75f6d617Schristos    If nothing else has been done, use the method below.  */
146*75f6d617Schristos #  ifdef INHIBIT_STRING_HEADER
147*75f6d617Schristos #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
148*75f6d617Schristos #    if !defined bzero && !defined bcopy
149*75f6d617Schristos #     undef INHIBIT_STRING_HEADER
150*75f6d617Schristos #    endif
151*75f6d617Schristos #   endif
152*75f6d617Schristos #  endif
153*75f6d617Schristos 
154*75f6d617Schristos /* This is the normal way of making sure we have a bcopy and a bzero.
155*75f6d617Schristos    This is used in most programs--a few other programs avoid this
156*75f6d617Schristos    by defining INHIBIT_STRING_HEADER.  */
157*75f6d617Schristos #  ifndef INHIBIT_STRING_HEADER
158*75f6d617Schristos #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
159*75f6d617Schristos #    include <string.h>
160*75f6d617Schristos #    ifndef bzero
161*75f6d617Schristos #     ifndef _LIBC
162*75f6d617Schristos #      define bzero(s, n)	(memset (s, '\0', n), (s))
163*75f6d617Schristos #     else
164*75f6d617Schristos #      define bzero(s, n)	__bzero (s, n)
165*75f6d617Schristos #     endif
166*75f6d617Schristos #    endif
167*75f6d617Schristos #   else
168*75f6d617Schristos #    include <strings.h>
169*75f6d617Schristos #    ifndef memcmp
170*75f6d617Schristos #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
171*75f6d617Schristos #    endif
172*75f6d617Schristos #    ifndef memcpy
173*75f6d617Schristos #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
174*75f6d617Schristos #    endif
175*75f6d617Schristos #   endif
176*75f6d617Schristos #  endif
177*75f6d617Schristos 
178*75f6d617Schristos /* Define the syntax stuff for \<, \>, etc.  */
179*75f6d617Schristos 
180*75f6d617Schristos /* This must be nonzero for the wordchar and notwordchar pattern
181*75f6d617Schristos    commands in re_match_2.  */
182*75f6d617Schristos #  ifndef Sword
183*75f6d617Schristos #   define Sword 1
184*75f6d617Schristos #  endif
185*75f6d617Schristos 
186*75f6d617Schristos #  ifdef SWITCH_ENUM_BUG
187*75f6d617Schristos #   define SWITCH_ENUM_CAST(x) ((int)(x))
188*75f6d617Schristos #  else
189*75f6d617Schristos #   define SWITCH_ENUM_CAST(x) (x)
190*75f6d617Schristos #  endif
191*75f6d617Schristos 
192*75f6d617Schristos # endif /* not emacs */
193*75f6d617Schristos 
194*75f6d617Schristos # if defined _LIBC || HAVE_LIMITS_H
195*75f6d617Schristos #  include <limits.h>
196*75f6d617Schristos # endif
197*75f6d617Schristos 
198*75f6d617Schristos # ifndef MB_LEN_MAX
199*75f6d617Schristos #  define MB_LEN_MAX 1
200*75f6d617Schristos # endif
201*75f6d617Schristos 
202*75f6d617Schristos /* Get the interface, including the syntax bits.  */
203*75f6d617Schristos # include <regex.h>
204*75f6d617Schristos 
205*75f6d617Schristos /* isalpha etc. are used for the character classes.  */
206*75f6d617Schristos # include <ctype.h>
207*75f6d617Schristos 
208*75f6d617Schristos /* Jim Meyering writes:
209*75f6d617Schristos 
210*75f6d617Schristos    "... Some ctype macros are valid only for character codes that
211*75f6d617Schristos    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
212*75f6d617Schristos    using /bin/cc or gcc but without giving an ansi option).  So, all
213*75f6d617Schristos    ctype uses should be through macros like ISPRINT...  If
214*75f6d617Schristos    STDC_HEADERS is defined, then autoconf has verified that the ctype
215*75f6d617Schristos    macros don't need to be guarded with references to isascii. ...
216*75f6d617Schristos    Defining isascii to 1 should let any compiler worth its salt
217*75f6d617Schristos    eliminate the && through constant folding."
218*75f6d617Schristos    Solaris defines some of these symbols so we must undefine them first.  */
219*75f6d617Schristos 
220*75f6d617Schristos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
221*75f6d617Schristos #  define IN_CTYPE_DOMAIN(c) 1
222*75f6d617Schristos # else
223*75f6d617Schristos #  define IN_CTYPE_DOMAIN(c) isascii(c)
224*75f6d617Schristos # endif
225*75f6d617Schristos 
226*75f6d617Schristos # ifdef isblank
227*75f6d617Schristos #  define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
228*75f6d617Schristos # else
229*75f6d617Schristos #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
230*75f6d617Schristos # endif
231*75f6d617Schristos # ifdef isgraph
232*75f6d617Schristos #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
233*75f6d617Schristos # else
234*75f6d617Schristos #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
235*75f6d617Schristos # endif
236*75f6d617Schristos 
237*75f6d617Schristos # undef ISPRINT
238*75f6d617Schristos # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
239*75f6d617Schristos # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
240*75f6d617Schristos # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
241*75f6d617Schristos # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
242*75f6d617Schristos # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
243*75f6d617Schristos # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
244*75f6d617Schristos # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
245*75f6d617Schristos # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
246*75f6d617Schristos # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
247*75f6d617Schristos # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
248*75f6d617Schristos 
249*75f6d617Schristos # ifdef _tolower
250*75f6d617Schristos #  define TOLOWER(c) _tolower(c)
251*75f6d617Schristos # else
252*75f6d617Schristos #  define TOLOWER(c) tolower(c)
253*75f6d617Schristos # endif
254*75f6d617Schristos 
255*75f6d617Schristos # ifndef NULL
256*75f6d617Schristos #  define NULL (void *)0
257*75f6d617Schristos # endif
258*75f6d617Schristos 
259*75f6d617Schristos /* We remove any previous definition of `SIGN_EXTEND_CHAR',
260*75f6d617Schristos    since ours (we hope) works properly with all combinations of
261*75f6d617Schristos    machines, compilers, `char' and `unsigned char' argument types.
262*75f6d617Schristos    (Per Bothner suggested the basic approach.)  */
263*75f6d617Schristos # undef SIGN_EXTEND_CHAR
264*75f6d617Schristos # if __STDC__
265*75f6d617Schristos #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
266*75f6d617Schristos # else  /* not __STDC__ */
267*75f6d617Schristos /* As in Harbison and Steele.  */
268*75f6d617Schristos #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
269*75f6d617Schristos # endif
270*75f6d617Schristos 
271*75f6d617Schristos # ifndef emacs
272*75f6d617Schristos /* How many characters in the character set.  */
273*75f6d617Schristos #  define CHAR_SET_SIZE 256
274*75f6d617Schristos 
275*75f6d617Schristos #  ifdef SYNTAX_TABLE
276*75f6d617Schristos 
277*75f6d617Schristos extern char *re_syntax_table;
278*75f6d617Schristos 
279*75f6d617Schristos #  else /* not SYNTAX_TABLE */
280*75f6d617Schristos 
281*75f6d617Schristos static char re_syntax_table[CHAR_SET_SIZE];
282*75f6d617Schristos 
283*75f6d617Schristos static void init_syntax_once PARAMS ((void));
284*75f6d617Schristos 
285*75f6d617Schristos static void
init_syntax_once()286*75f6d617Schristos init_syntax_once ()
287*75f6d617Schristos {
288*75f6d617Schristos    register int c;
289*75f6d617Schristos    static int done = 0;
290*75f6d617Schristos 
291*75f6d617Schristos    if (done)
292*75f6d617Schristos      return;
293*75f6d617Schristos    bzero (re_syntax_table, sizeof re_syntax_table);
294*75f6d617Schristos 
295*75f6d617Schristos    for (c = 0; c < CHAR_SET_SIZE; ++c)
296*75f6d617Schristos      if (ISALNUM (c))
297*75f6d617Schristos 	re_syntax_table[c] = Sword;
298*75f6d617Schristos 
299*75f6d617Schristos    re_syntax_table['_'] = Sword;
300*75f6d617Schristos 
301*75f6d617Schristos    done = 1;
302*75f6d617Schristos }
303*75f6d617Schristos 
304*75f6d617Schristos #  endif /* not SYNTAX_TABLE */
305*75f6d617Schristos 
306*75f6d617Schristos #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
307*75f6d617Schristos 
308*75f6d617Schristos # endif /* emacs */
309*75f6d617Schristos 
310*75f6d617Schristos /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
311*75f6d617Schristos    use `alloca' instead of `malloc'.  This is because using malloc in
312*75f6d617Schristos    re_search* or re_match* could cause memory leaks when C-g is used in
313*75f6d617Schristos    Emacs; also, malloc is slower and causes storage fragmentation.  On
314*75f6d617Schristos    the other hand, malloc is more portable, and easier to debug.
315*75f6d617Schristos 
316*75f6d617Schristos    Because we sometimes use alloca, some routines have to be macros,
317*75f6d617Schristos    not functions -- `alloca'-allocated space disappears at the end of the
318*75f6d617Schristos    function it is called in.  */
319*75f6d617Schristos 
320*75f6d617Schristos # ifdef REGEX_MALLOC
321*75f6d617Schristos 
322*75f6d617Schristos #  define REGEX_ALLOCATE malloc
323*75f6d617Schristos #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
324*75f6d617Schristos #  define REGEX_FREE free
325*75f6d617Schristos 
326*75f6d617Schristos # else /* not REGEX_MALLOC  */
327*75f6d617Schristos 
328*75f6d617Schristos /* Emacs already defines alloca, sometimes.  */
329*75f6d617Schristos #  ifndef alloca
330*75f6d617Schristos 
331*75f6d617Schristos /* Make alloca work the best possible way.  */
332*75f6d617Schristos #   ifdef __GNUC__
333*75f6d617Schristos #    define alloca __builtin_alloca
334*75f6d617Schristos #   else /* not __GNUC__ */
335*75f6d617Schristos #    if HAVE_ALLOCA_H
336*75f6d617Schristos #     include <alloca.h>
337*75f6d617Schristos #    endif /* HAVE_ALLOCA_H */
338*75f6d617Schristos #   endif /* not __GNUC__ */
339*75f6d617Schristos 
340*75f6d617Schristos #  endif /* not alloca */
341*75f6d617Schristos 
342*75f6d617Schristos #  define REGEX_ALLOCATE alloca
343*75f6d617Schristos 
344*75f6d617Schristos /* Assumes a `char *destination' variable.  */
345*75f6d617Schristos #  define REGEX_REALLOCATE(source, osize, nsize)			\
346*75f6d617Schristos   (destination = (char *) alloca (nsize),				\
347*75f6d617Schristos    memcpy (destination, source, osize))
348*75f6d617Schristos 
349*75f6d617Schristos /* No need to do anything to free, after alloca.  */
350*75f6d617Schristos #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
351*75f6d617Schristos 
352*75f6d617Schristos # endif /* not REGEX_MALLOC */
353*75f6d617Schristos 
354*75f6d617Schristos /* Define how to allocate the failure stack.  */
355*75f6d617Schristos 
356*75f6d617Schristos # if defined REL_ALLOC && defined REGEX_MALLOC
357*75f6d617Schristos 
358*75f6d617Schristos #  define REGEX_ALLOCATE_STACK(size)				\
359*75f6d617Schristos   r_alloc (&failure_stack_ptr, (size))
360*75f6d617Schristos #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
361*75f6d617Schristos   r_re_alloc (&failure_stack_ptr, (nsize))
362*75f6d617Schristos #  define REGEX_FREE_STACK(ptr)					\
363*75f6d617Schristos   r_alloc_free (&failure_stack_ptr)
364*75f6d617Schristos 
365*75f6d617Schristos # else /* not using relocating allocator */
366*75f6d617Schristos 
367*75f6d617Schristos #  ifdef REGEX_MALLOC
368*75f6d617Schristos 
369*75f6d617Schristos #   define REGEX_ALLOCATE_STACK malloc
370*75f6d617Schristos #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
371*75f6d617Schristos #   define REGEX_FREE_STACK free
372*75f6d617Schristos 
373*75f6d617Schristos #  else /* not REGEX_MALLOC */
374*75f6d617Schristos 
375*75f6d617Schristos #   define REGEX_ALLOCATE_STACK alloca
376*75f6d617Schristos 
377*75f6d617Schristos #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
378*75f6d617Schristos    REGEX_REALLOCATE (source, osize, nsize)
379*75f6d617Schristos /* No need to explicitly free anything.  */
380*75f6d617Schristos #   define REGEX_FREE_STACK(arg)
381*75f6d617Schristos 
382*75f6d617Schristos #  endif /* not REGEX_MALLOC */
383*75f6d617Schristos # endif /* not using relocating allocator */
384*75f6d617Schristos 
385*75f6d617Schristos 
386*75f6d617Schristos /* True if `size1' is non-NULL and PTR is pointing anywhere inside
387*75f6d617Schristos    `string1' or just past its end.  This works if PTR is NULL, which is
388*75f6d617Schristos    a good thing.  */
389*75f6d617Schristos # define FIRST_STRING_P(ptr) 					\
390*75f6d617Schristos   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
391*75f6d617Schristos 
392*75f6d617Schristos /* (Re)Allocate N items of type T using malloc, or fail.  */
393*75f6d617Schristos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
394*75f6d617Schristos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
395*75f6d617Schristos # define RETALLOC_IF(addr, n, t) \
396*75f6d617Schristos   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
397*75f6d617Schristos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
398*75f6d617Schristos 
399*75f6d617Schristos # define BYTEWIDTH 8 /* In bits.  */
400*75f6d617Schristos 
401*75f6d617Schristos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
402*75f6d617Schristos 
403*75f6d617Schristos # undef MAX
404*75f6d617Schristos # undef MIN
405*75f6d617Schristos # define MAX(a, b) ((a) > (b) ? (a) : (b))
406*75f6d617Schristos # define MIN(a, b) ((a) < (b) ? (a) : (b))
407*75f6d617Schristos 
408*75f6d617Schristos typedef char boolean;
409*75f6d617Schristos # define false 0
410*75f6d617Schristos # define true 1
411*75f6d617Schristos 
412*75f6d617Schristos static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
413*75f6d617Schristos                                                    reg_syntax_t syntax,
414*75f6d617Schristos                                                    struct re_pattern_buffer *bufp));
415*75f6d617Schristos 
416*75f6d617Schristos static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
417*75f6d617Schristos 					     const char *string1, int size1,
418*75f6d617Schristos 					     const char *string2, int size2,
419*75f6d617Schristos 					     int pos,
420*75f6d617Schristos 					     struct re_registers *regs,
421*75f6d617Schristos 					     int stop));
422*75f6d617Schristos static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
423*75f6d617Schristos 				     const char *string1, int size1,
424*75f6d617Schristos 				     const char *string2, int size2,
425*75f6d617Schristos 				     int startpos, int range,
426*75f6d617Schristos 				     struct re_registers *regs, int stop));
427*75f6d617Schristos static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
428*75f6d617Schristos 
429*75f6d617Schristos #ifdef MBS_SUPPORT
430*75f6d617Schristos static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
431*75f6d617Schristos                                                    reg_syntax_t syntax,
432*75f6d617Schristos                                                    struct re_pattern_buffer *bufp));
433*75f6d617Schristos 
434*75f6d617Schristos 
435*75f6d617Schristos static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
436*75f6d617Schristos 					    const char *cstring1, int csize1,
437*75f6d617Schristos 					    const char *cstring2, int csize2,
438*75f6d617Schristos 					    int pos,
439*75f6d617Schristos 					    struct re_registers *regs,
440*75f6d617Schristos 					    int stop,
441*75f6d617Schristos 					    wchar_t *string1, int size1,
442*75f6d617Schristos 					    wchar_t *string2, int size2,
443*75f6d617Schristos 					    int *mbs_offset1, int *mbs_offset2));
444*75f6d617Schristos static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
445*75f6d617Schristos 				    const char *string1, int size1,
446*75f6d617Schristos 				    const char *string2, int size2,
447*75f6d617Schristos 				    int startpos, int range,
448*75f6d617Schristos 				    struct re_registers *regs, int stop));
449*75f6d617Schristos static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
450*75f6d617Schristos #endif
451*75f6d617Schristos 
452*75f6d617Schristos /* These are the command codes that appear in compiled regular
453*75f6d617Schristos    expressions.  Some opcodes are followed by argument bytes.  A
454*75f6d617Schristos    command code can specify any interpretation whatsoever for its
455*75f6d617Schristos    arguments.  Zero bytes may appear in the compiled regular expression.  */
456*75f6d617Schristos 
457*75f6d617Schristos typedef enum
458*75f6d617Schristos {
459*75f6d617Schristos   no_op = 0,
460*75f6d617Schristos 
461*75f6d617Schristos   /* Succeed right away--no more backtracking.  */
462*75f6d617Schristos   succeed,
463*75f6d617Schristos 
464*75f6d617Schristos         /* Followed by one byte giving n, then by n literal bytes.  */
465*75f6d617Schristos   exactn,
466*75f6d617Schristos 
467*75f6d617Schristos # ifdef MBS_SUPPORT
468*75f6d617Schristos 	/* Same as exactn, but contains binary data.  */
469*75f6d617Schristos   exactn_bin,
470*75f6d617Schristos # endif
471*75f6d617Schristos 
472*75f6d617Schristos         /* Matches any (more or less) character.  */
473*75f6d617Schristos   anychar,
474*75f6d617Schristos 
475*75f6d617Schristos         /* Matches any one char belonging to specified set.  First
476*75f6d617Schristos            following byte is number of bitmap bytes.  Then come bytes
477*75f6d617Schristos            for a bitmap saying which chars are in.  Bits in each byte
478*75f6d617Schristos            are ordered low-bit-first.  A character is in the set if its
479*75f6d617Schristos            bit is 1.  A character too large to have a bit in the map is
480*75f6d617Schristos            automatically not in the set.  */
481*75f6d617Schristos         /* ifdef MBS_SUPPORT, following element is length of character
482*75f6d617Schristos 	   classes, length of collating symbols, length of equivalence
483*75f6d617Schristos 	   classes, length of character ranges, and length of characters.
484*75f6d617Schristos 	   Next, character class element, collating symbols elements,
485*75f6d617Schristos 	   equivalence class elements, range elements, and character
486*75f6d617Schristos 	   elements follow.
487*75f6d617Schristos 	   See regex_compile function.  */
488*75f6d617Schristos   charset,
489*75f6d617Schristos 
490*75f6d617Schristos         /* Same parameters as charset, but match any character that is
491*75f6d617Schristos            not one of those specified.  */
492*75f6d617Schristos   charset_not,
493*75f6d617Schristos 
494*75f6d617Schristos         /* Start remembering the text that is matched, for storing in a
495*75f6d617Schristos            register.  Followed by one byte with the register number, in
496*75f6d617Schristos            the range 0 to one less than the pattern buffer's re_nsub
497*75f6d617Schristos            field.  Then followed by one byte with the number of groups
498*75f6d617Schristos            inner to this one.  (This last has to be part of the
499*75f6d617Schristos            start_memory only because we need it in the on_failure_jump
500*75f6d617Schristos            of re_match_2.)  */
501*75f6d617Schristos   start_memory,
502*75f6d617Schristos 
503*75f6d617Schristos         /* Stop remembering the text that is matched and store it in a
504*75f6d617Schristos            memory register.  Followed by one byte with the register
505*75f6d617Schristos            number, in the range 0 to one less than `re_nsub' in the
506*75f6d617Schristos            pattern buffer, and one byte with the number of inner groups,
507*75f6d617Schristos            just like `start_memory'.  (We need the number of inner
508*75f6d617Schristos            groups here because we don't have any easy way of finding the
509*75f6d617Schristos            corresponding start_memory when we're at a stop_memory.)  */
510*75f6d617Schristos   stop_memory,
511*75f6d617Schristos 
512*75f6d617Schristos         /* Match a duplicate of something remembered. Followed by one
513*75f6d617Schristos            byte containing the register number.  */
514*75f6d617Schristos   duplicate,
515*75f6d617Schristos 
516*75f6d617Schristos         /* Fail unless at beginning of line.  */
517*75f6d617Schristos   begline,
518*75f6d617Schristos 
519*75f6d617Schristos         /* Fail unless at end of line.  */
520*75f6d617Schristos   endline,
521*75f6d617Schristos 
522*75f6d617Schristos         /* Succeeds if at beginning of buffer (if emacs) or at beginning
523*75f6d617Schristos            of string to be matched (if not).  */
524*75f6d617Schristos   begbuf,
525*75f6d617Schristos 
526*75f6d617Schristos         /* Analogously, for end of buffer/string.  */
527*75f6d617Schristos   endbuf,
528*75f6d617Schristos 
529*75f6d617Schristos         /* Followed by two byte relative address to which to jump.  */
530*75f6d617Schristos   jump,
531*75f6d617Schristos 
532*75f6d617Schristos 	/* Same as jump, but marks the end of an alternative.  */
533*75f6d617Schristos   jump_past_alt,
534*75f6d617Schristos 
535*75f6d617Schristos         /* Followed by two-byte relative address of place to resume at
536*75f6d617Schristos            in case of failure.  */
537*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
538*75f6d617Schristos   on_failure_jump,
539*75f6d617Schristos 
540*75f6d617Schristos         /* Like on_failure_jump, but pushes a placeholder instead of the
541*75f6d617Schristos            current string position when executed.  */
542*75f6d617Schristos   on_failure_keep_string_jump,
543*75f6d617Schristos 
544*75f6d617Schristos         /* Throw away latest failure point and then jump to following
545*75f6d617Schristos            two-byte relative address.  */
546*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
547*75f6d617Schristos   pop_failure_jump,
548*75f6d617Schristos 
549*75f6d617Schristos         /* Change to pop_failure_jump if know won't have to backtrack to
550*75f6d617Schristos            match; otherwise change to jump.  This is used to jump
551*75f6d617Schristos            back to the beginning of a repeat.  If what follows this jump
552*75f6d617Schristos            clearly won't match what the repeat does, such that we can be
553*75f6d617Schristos            sure that there is no use backtracking out of repetitions
554*75f6d617Schristos            already matched, then we change it to a pop_failure_jump.
555*75f6d617Schristos            Followed by two-byte address.  */
556*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
557*75f6d617Schristos   maybe_pop_jump,
558*75f6d617Schristos 
559*75f6d617Schristos         /* Jump to following two-byte address, and push a dummy failure
560*75f6d617Schristos            point. This failure point will be thrown away if an attempt
561*75f6d617Schristos            is made to use it for a failure.  A `+' construct makes this
562*75f6d617Schristos            before the first repeat.  Also used as an intermediary kind
563*75f6d617Schristos            of jump when compiling an alternative.  */
564*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
565*75f6d617Schristos   dummy_failure_jump,
566*75f6d617Schristos 
567*75f6d617Schristos 	/* Push a dummy failure point and continue.  Used at the end of
568*75f6d617Schristos 	   alternatives.  */
569*75f6d617Schristos   push_dummy_failure,
570*75f6d617Schristos 
571*75f6d617Schristos         /* Followed by two-byte relative address and two-byte number n.
572*75f6d617Schristos            After matching N times, jump to the address upon failure.  */
573*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
574*75f6d617Schristos   succeed_n,
575*75f6d617Schristos 
576*75f6d617Schristos         /* Followed by two-byte relative address, and two-byte number n.
577*75f6d617Schristos            Jump to the address N times, then fail.  */
578*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
579*75f6d617Schristos   jump_n,
580*75f6d617Schristos 
581*75f6d617Schristos         /* Set the following two-byte relative address to the
582*75f6d617Schristos            subsequent two-byte number.  The address *includes* the two
583*75f6d617Schristos            bytes of number.  */
584*75f6d617Schristos         /* ifdef MBS_SUPPORT, the size of address is 1.  */
585*75f6d617Schristos   set_number_at,
586*75f6d617Schristos 
587*75f6d617Schristos   wordchar,	/* Matches any word-constituent character.  */
588*75f6d617Schristos   notwordchar,	/* Matches any char that is not a word-constituent.  */
589*75f6d617Schristos 
590*75f6d617Schristos   wordbeg,	/* Succeeds if at word beginning.  */
591*75f6d617Schristos   wordend,	/* Succeeds if at word end.  */
592*75f6d617Schristos 
593*75f6d617Schristos   wordbound,	/* Succeeds if at a word boundary.  */
594*75f6d617Schristos   notwordbound	/* Succeeds if not at a word boundary.  */
595*75f6d617Schristos 
596*75f6d617Schristos # ifdef emacs
597*75f6d617Schristos   ,before_dot,	/* Succeeds if before point.  */
598*75f6d617Schristos   at_dot,	/* Succeeds if at point.  */
599*75f6d617Schristos   after_dot,	/* Succeeds if after point.  */
600*75f6d617Schristos 
601*75f6d617Schristos 	/* Matches any character whose syntax is specified.  Followed by
602*75f6d617Schristos            a byte which contains a syntax code, e.g., Sword.  */
603*75f6d617Schristos   syntaxspec,
604*75f6d617Schristos 
605*75f6d617Schristos 	/* Matches any character whose syntax is not that specified.  */
606*75f6d617Schristos   notsyntaxspec
607*75f6d617Schristos # endif /* emacs */
608*75f6d617Schristos } re_opcode_t;
609*75f6d617Schristos #endif /* not INSIDE_RECURSION */
610*75f6d617Schristos 
611*75f6d617Schristos 
612*75f6d617Schristos #ifdef BYTE
613*75f6d617Schristos # define CHAR_T char
614*75f6d617Schristos # define UCHAR_T unsigned char
615*75f6d617Schristos # define COMPILED_BUFFER_VAR bufp->buffer
616*75f6d617Schristos # define OFFSET_ADDRESS_SIZE 2
617*75f6d617Schristos # define PREFIX(name) byte_##name
618*75f6d617Schristos # define ARG_PREFIX(name) name
619*75f6d617Schristos # define PUT_CHAR(c) putchar (c)
620*75f6d617Schristos #else
621*75f6d617Schristos # ifdef WCHAR
622*75f6d617Schristos #  define CHAR_T wchar_t
623*75f6d617Schristos #  define UCHAR_T wchar_t
624*75f6d617Schristos #  define COMPILED_BUFFER_VAR wc_buffer
625*75f6d617Schristos #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
626*75f6d617Schristos #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
627*75f6d617Schristos #  define PREFIX(name) wcs_##name
628*75f6d617Schristos #  define ARG_PREFIX(name) c##name
629*75f6d617Schristos /* Should we use wide stream??  */
630*75f6d617Schristos #  define PUT_CHAR(c) printf ("%C", c);
631*75f6d617Schristos #  define TRUE 1
632*75f6d617Schristos #  define FALSE 0
633*75f6d617Schristos # else
634*75f6d617Schristos #  ifdef MBS_SUPPORT
635*75f6d617Schristos #   define WCHAR
636*75f6d617Schristos #   define INSIDE_RECURSION
637*75f6d617Schristos #   include "regex.c"
638*75f6d617Schristos #   undef INSIDE_RECURSION
639*75f6d617Schristos #  endif
640*75f6d617Schristos #  define BYTE
641*75f6d617Schristos #  define INSIDE_RECURSION
642*75f6d617Schristos #  include "regex.c"
643*75f6d617Schristos #  undef INSIDE_RECURSION
644*75f6d617Schristos # endif
645*75f6d617Schristos #endif
646*75f6d617Schristos #include "unlocked-io.h"
647*75f6d617Schristos 
648*75f6d617Schristos #ifdef INSIDE_RECURSION
649*75f6d617Schristos /* Common operations on the compiled pattern.  */
650*75f6d617Schristos 
651*75f6d617Schristos /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
652*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
653*75f6d617Schristos 
654*75f6d617Schristos # ifdef WCHAR
655*75f6d617Schristos #  define STORE_NUMBER(destination, number)				\
656*75f6d617Schristos   do {									\
657*75f6d617Schristos     *(destination) = (UCHAR_T)(number);				\
658*75f6d617Schristos   } while (0)
659*75f6d617Schristos # else /* BYTE */
660*75f6d617Schristos #  define STORE_NUMBER(destination, number)				\
661*75f6d617Schristos   do {									\
662*75f6d617Schristos     (destination)[0] = (number) & 0377;					\
663*75f6d617Schristos     (destination)[1] = (number) >> 8;					\
664*75f6d617Schristos   } while (0)
665*75f6d617Schristos # endif /* WCHAR */
666*75f6d617Schristos 
667*75f6d617Schristos /* Same as STORE_NUMBER, except increment DESTINATION to
668*75f6d617Schristos    the byte after where the number is stored.  Therefore, DESTINATION
669*75f6d617Schristos    must be an lvalue.  */
670*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
671*75f6d617Schristos 
672*75f6d617Schristos # define STORE_NUMBER_AND_INCR(destination, number)			\
673*75f6d617Schristos   do {									\
674*75f6d617Schristos     STORE_NUMBER (destination, number);					\
675*75f6d617Schristos     (destination) += OFFSET_ADDRESS_SIZE;				\
676*75f6d617Schristos   } while (0)
677*75f6d617Schristos 
678*75f6d617Schristos /* Put into DESTINATION a number stored in two contiguous bytes starting
679*75f6d617Schristos    at SOURCE.  */
680*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
681*75f6d617Schristos 
682*75f6d617Schristos # ifdef WCHAR
683*75f6d617Schristos #  define EXTRACT_NUMBER(destination, source)				\
684*75f6d617Schristos   do {									\
685*75f6d617Schristos     (destination) = *(source);						\
686*75f6d617Schristos   } while (0)
687*75f6d617Schristos # else /* BYTE */
688*75f6d617Schristos #  define EXTRACT_NUMBER(destination, source)				\
689*75f6d617Schristos   do {									\
690*75f6d617Schristos     (destination) = *(source) & 0377;					\
691*75f6d617Schristos     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
692*75f6d617Schristos   } while (0)
693*75f6d617Schristos # endif
694*75f6d617Schristos 
695*75f6d617Schristos # ifdef DEBUG
696*75f6d617Schristos static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
697*75f6d617Schristos static void
698*75f6d617Schristos PREFIX(extract_number) (dest, source)
699*75f6d617Schristos     int *dest;
700*75f6d617Schristos     UCHAR_T *source;
701*75f6d617Schristos {
702*75f6d617Schristos #  ifdef WCHAR
703*75f6d617Schristos   *dest = *source;
704*75f6d617Schristos #  else /* BYTE */
705*75f6d617Schristos   int temp = SIGN_EXTEND_CHAR (*(source + 1));
706*75f6d617Schristos   *dest = *source & 0377;
707*75f6d617Schristos   *dest += temp << 8;
708*75f6d617Schristos #  endif
709*75f6d617Schristos }
710*75f6d617Schristos 
711*75f6d617Schristos #  ifndef EXTRACT_MACROS /* To debug the macros.  */
712*75f6d617Schristos #   undef EXTRACT_NUMBER
713*75f6d617Schristos #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
714*75f6d617Schristos #  endif /* not EXTRACT_MACROS */
715*75f6d617Schristos 
716*75f6d617Schristos # endif /* DEBUG */
717*75f6d617Schristos 
718*75f6d617Schristos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
719*75f6d617Schristos    SOURCE must be an lvalue.  */
720*75f6d617Schristos 
721*75f6d617Schristos # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
722*75f6d617Schristos   do {									\
723*75f6d617Schristos     EXTRACT_NUMBER (destination, source);				\
724*75f6d617Schristos     (source) += OFFSET_ADDRESS_SIZE; 					\
725*75f6d617Schristos   } while (0)
726*75f6d617Schristos 
727*75f6d617Schristos # ifdef DEBUG
728*75f6d617Schristos static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
729*75f6d617Schristos 						       UCHAR_T **source));
730*75f6d617Schristos static void
731*75f6d617Schristos PREFIX(extract_number_and_incr) (destination, source)
732*75f6d617Schristos     int *destination;
733*75f6d617Schristos     UCHAR_T **source;
734*75f6d617Schristos {
735*75f6d617Schristos   PREFIX(extract_number) (destination, *source);
736*75f6d617Schristos   *source += OFFSET_ADDRESS_SIZE;
737*75f6d617Schristos }
738*75f6d617Schristos 
739*75f6d617Schristos #  ifndef EXTRACT_MACROS
740*75f6d617Schristos #   undef EXTRACT_NUMBER_AND_INCR
741*75f6d617Schristos #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
742*75f6d617Schristos   PREFIX(extract_number_and_incr) (&dest, &src)
743*75f6d617Schristos #  endif /* not EXTRACT_MACROS */
744*75f6d617Schristos 
745*75f6d617Schristos # endif /* DEBUG */
746*75f6d617Schristos 
747*75f6d617Schristos 
748*75f6d617Schristos 
749*75f6d617Schristos /* If DEBUG is defined, Regex prints many voluminous messages about what
750*75f6d617Schristos    it is doing (if the variable `debug' is nonzero).  If linked with the
751*75f6d617Schristos    main program in `iregex.c', you can enter patterns and strings
752*75f6d617Schristos    interactively.  And if linked with the main program in `main.c' and
753*75f6d617Schristos    the other test files, you can run the already-written tests.  */
754*75f6d617Schristos 
755*75f6d617Schristos # ifdef DEBUG
756*75f6d617Schristos 
757*75f6d617Schristos #  ifndef DEFINED_ONCE
758*75f6d617Schristos 
759*75f6d617Schristos /* We use standard I/O for debugging.  */
760*75f6d617Schristos #   include <stdio.h>
761*75f6d617Schristos 
762*75f6d617Schristos /* It is useful to test things that ``must'' be true when debugging.  */
763*75f6d617Schristos #   include <assert.h>
764*75f6d617Schristos 
765*75f6d617Schristos static int debug;
766*75f6d617Schristos 
767*75f6d617Schristos #   define DEBUG_STATEMENT(e) e
768*75f6d617Schristos #   define DEBUG_PRINT1(x) if (debug) printf (x)
769*75f6d617Schristos #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
770*75f6d617Schristos #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
771*75f6d617Schristos #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
772*75f6d617Schristos #  endif /* not DEFINED_ONCE */
773*75f6d617Schristos 
774*75f6d617Schristos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
775*75f6d617Schristos   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
776*75f6d617Schristos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
777*75f6d617Schristos   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
778*75f6d617Schristos 
779*75f6d617Schristos 
780*75f6d617Schristos /* Print the fastmap in human-readable form.  */
781*75f6d617Schristos 
782*75f6d617Schristos #  ifndef DEFINED_ONCE
783*75f6d617Schristos void
print_fastmap(fastmap)784*75f6d617Schristos print_fastmap (fastmap)
785*75f6d617Schristos     char *fastmap;
786*75f6d617Schristos {
787*75f6d617Schristos   unsigned was_a_range = 0;
788*75f6d617Schristos   unsigned i = 0;
789*75f6d617Schristos 
790*75f6d617Schristos   while (i < (1 << BYTEWIDTH))
791*75f6d617Schristos     {
792*75f6d617Schristos       if (fastmap[i++])
793*75f6d617Schristos 	{
794*75f6d617Schristos 	  was_a_range = 0;
795*75f6d617Schristos           putchar (i - 1);
796*75f6d617Schristos           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
797*75f6d617Schristos             {
798*75f6d617Schristos               was_a_range = 1;
799*75f6d617Schristos               i++;
800*75f6d617Schristos             }
801*75f6d617Schristos 	  if (was_a_range)
802*75f6d617Schristos             {
803*75f6d617Schristos               printf ("-");
804*75f6d617Schristos               putchar (i - 1);
805*75f6d617Schristos             }
806*75f6d617Schristos         }
807*75f6d617Schristos     }
808*75f6d617Schristos   putchar ('\n');
809*75f6d617Schristos }
810*75f6d617Schristos #  endif /* not DEFINED_ONCE */
811*75f6d617Schristos 
812*75f6d617Schristos 
813*75f6d617Schristos /* Print a compiled pattern string in human-readable form, starting at
814*75f6d617Schristos    the START pointer into it and ending just before the pointer END.  */
815*75f6d617Schristos 
816*75f6d617Schristos void
817*75f6d617Schristos PREFIX(print_partial_compiled_pattern) (start, end)
818*75f6d617Schristos     UCHAR_T *start;
819*75f6d617Schristos     UCHAR_T *end;
820*75f6d617Schristos {
821*75f6d617Schristos   int mcnt, mcnt2;
822*75f6d617Schristos   UCHAR_T *p1;
823*75f6d617Schristos   UCHAR_T *p = start;
824*75f6d617Schristos   UCHAR_T *pend = end;
825*75f6d617Schristos 
826*75f6d617Schristos   if (start == NULL)
827*75f6d617Schristos     {
828*75f6d617Schristos       printf ("(null)\n");
829*75f6d617Schristos       return;
830*75f6d617Schristos     }
831*75f6d617Schristos 
832*75f6d617Schristos   /* Loop over pattern commands.  */
833*75f6d617Schristos   while (p < pend)
834*75f6d617Schristos     {
835*75f6d617Schristos #  ifdef _LIBC
836*75f6d617Schristos       printf ("%td:\t", p - start);
837*75f6d617Schristos #  else
838*75f6d617Schristos       printf ("%ld:\t", (long int) (p - start));
839*75f6d617Schristos #  endif
840*75f6d617Schristos 
841*75f6d617Schristos       switch ((re_opcode_t) *p++)
842*75f6d617Schristos 	{
843*75f6d617Schristos         case no_op:
844*75f6d617Schristos           printf ("/no_op");
845*75f6d617Schristos           break;
846*75f6d617Schristos 
847*75f6d617Schristos 	case exactn:
848*75f6d617Schristos 	  mcnt = *p++;
849*75f6d617Schristos           printf ("/exactn/%d", mcnt);
850*75f6d617Schristos           do
851*75f6d617Schristos 	    {
852*75f6d617Schristos               putchar ('/');
853*75f6d617Schristos 	      PUT_CHAR (*p++);
854*75f6d617Schristos             }
855*75f6d617Schristos           while (--mcnt);
856*75f6d617Schristos           break;
857*75f6d617Schristos 
858*75f6d617Schristos #  ifdef MBS_SUPPORT
859*75f6d617Schristos 	case exactn_bin:
860*75f6d617Schristos 	  mcnt = *p++;
861*75f6d617Schristos 	  printf ("/exactn_bin/%d", mcnt);
862*75f6d617Schristos           do
863*75f6d617Schristos 	    {
864*75f6d617Schristos 	      printf("/%lx", (long int) *p++);
865*75f6d617Schristos             }
866*75f6d617Schristos           while (--mcnt);
867*75f6d617Schristos           break;
868*75f6d617Schristos #  endif /* MBS_SUPPORT */
869*75f6d617Schristos 
870*75f6d617Schristos 	case start_memory:
871*75f6d617Schristos           mcnt = *p++;
872*75f6d617Schristos           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
873*75f6d617Schristos           break;
874*75f6d617Schristos 
875*75f6d617Schristos 	case stop_memory:
876*75f6d617Schristos           mcnt = *p++;
877*75f6d617Schristos 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
878*75f6d617Schristos           break;
879*75f6d617Schristos 
880*75f6d617Schristos 	case duplicate:
881*75f6d617Schristos 	  printf ("/duplicate/%ld", (long int) *p++);
882*75f6d617Schristos 	  break;
883*75f6d617Schristos 
884*75f6d617Schristos 	case anychar:
885*75f6d617Schristos 	  printf ("/anychar");
886*75f6d617Schristos 	  break;
887*75f6d617Schristos 
888*75f6d617Schristos 	case charset:
889*75f6d617Schristos         case charset_not:
890*75f6d617Schristos           {
891*75f6d617Schristos #  ifdef WCHAR
892*75f6d617Schristos 	    int i, length;
893*75f6d617Schristos 	    wchar_t *workp = p;
894*75f6d617Schristos 	    printf ("/charset [%s",
895*75f6d617Schristos 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
896*75f6d617Schristos 	    p += 5;
897*75f6d617Schristos 	    length = *workp++; /* the length of char_classes */
898*75f6d617Schristos 	    for (i=0 ; i<length ; i++)
899*75f6d617Schristos 	      printf("[:%lx:]", (long int) *p++);
900*75f6d617Schristos 	    length = *workp++; /* the length of collating_symbol */
901*75f6d617Schristos 	    for (i=0 ; i<length ;)
902*75f6d617Schristos 	      {
903*75f6d617Schristos 		printf("[.");
904*75f6d617Schristos 		while(*p != 0)
905*75f6d617Schristos 		  PUT_CHAR((i++,*p++));
906*75f6d617Schristos 		i++,p++;
907*75f6d617Schristos 		printf(".]");
908*75f6d617Schristos 	      }
909*75f6d617Schristos 	    length = *workp++; /* the length of equivalence_class */
910*75f6d617Schristos 	    for (i=0 ; i<length ;)
911*75f6d617Schristos 	      {
912*75f6d617Schristos 		printf("[=");
913*75f6d617Schristos 		while(*p != 0)
914*75f6d617Schristos 		  PUT_CHAR((i++,*p++));
915*75f6d617Schristos 		i++,p++;
916*75f6d617Schristos 		printf("=]");
917*75f6d617Schristos 	      }
918*75f6d617Schristos 	    length = *workp++; /* the length of char_range */
919*75f6d617Schristos 	    for (i=0 ; i<length ; i++)
920*75f6d617Schristos 	      {
921*75f6d617Schristos 		wchar_t range_start = *p++;
922*75f6d617Schristos 		wchar_t range_end = *p++;
923*75f6d617Schristos 		printf("%C-%C", range_start, range_end);
924*75f6d617Schristos 	      }
925*75f6d617Schristos 	    length = *workp++; /* the length of char */
926*75f6d617Schristos 	    for (i=0 ; i<length ; i++)
927*75f6d617Schristos 	      printf("%C", *p++);
928*75f6d617Schristos 	    putchar (']');
929*75f6d617Schristos #  else
930*75f6d617Schristos             register int c, last = -100;
931*75f6d617Schristos 	    register int in_range = 0;
932*75f6d617Schristos 
933*75f6d617Schristos 	    printf ("/charset [%s",
934*75f6d617Schristos 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
935*75f6d617Schristos 
936*75f6d617Schristos             assert (p + *p < pend);
937*75f6d617Schristos 
938*75f6d617Schristos             for (c = 0; c < 256; c++)
939*75f6d617Schristos 	      if (c / 8 < *p
940*75f6d617Schristos 		  && (p[1 + (c/8)] & (1 << (c % 8))))
941*75f6d617Schristos 		{
942*75f6d617Schristos 		  /* Are we starting a range?  */
943*75f6d617Schristos 		  if (last + 1 == c && ! in_range)
944*75f6d617Schristos 		    {
945*75f6d617Schristos 		      putchar ('-');
946*75f6d617Schristos 		      in_range = 1;
947*75f6d617Schristos 		    }
948*75f6d617Schristos 		  /* Have we broken a range?  */
949*75f6d617Schristos 		  else if (last + 1 != c && in_range)
950*75f6d617Schristos               {
951*75f6d617Schristos 		      putchar (last);
952*75f6d617Schristos 		      in_range = 0;
953*75f6d617Schristos 		    }
954*75f6d617Schristos 
955*75f6d617Schristos 		  if (! in_range)
956*75f6d617Schristos 		    putchar (c);
957*75f6d617Schristos 
958*75f6d617Schristos 		  last = c;
959*75f6d617Schristos               }
960*75f6d617Schristos 
961*75f6d617Schristos 	    if (in_range)
962*75f6d617Schristos 	      putchar (last);
963*75f6d617Schristos 
964*75f6d617Schristos 	    putchar (']');
965*75f6d617Schristos 
966*75f6d617Schristos 	    p += 1 + *p;
967*75f6d617Schristos #  endif /* WCHAR */
968*75f6d617Schristos 	  }
969*75f6d617Schristos 	  break;
970*75f6d617Schristos 
971*75f6d617Schristos 	case begline:
972*75f6d617Schristos 	  printf ("/begline");
973*75f6d617Schristos           break;
974*75f6d617Schristos 
975*75f6d617Schristos 	case endline:
976*75f6d617Schristos           printf ("/endline");
977*75f6d617Schristos           break;
978*75f6d617Schristos 
979*75f6d617Schristos 	case on_failure_jump:
980*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
981*75f6d617Schristos #  ifdef _LIBC
982*75f6d617Schristos   	  printf ("/on_failure_jump to %td", p + mcnt - start);
983*75f6d617Schristos #  else
984*75f6d617Schristos   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
985*75f6d617Schristos #  endif
986*75f6d617Schristos           break;
987*75f6d617Schristos 
988*75f6d617Schristos 	case on_failure_keep_string_jump:
989*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
990*75f6d617Schristos #  ifdef _LIBC
991*75f6d617Schristos   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
992*75f6d617Schristos #  else
993*75f6d617Schristos   	  printf ("/on_failure_keep_string_jump to %ld",
994*75f6d617Schristos 		  (long int) (p + mcnt - start));
995*75f6d617Schristos #  endif
996*75f6d617Schristos           break;
997*75f6d617Schristos 
998*75f6d617Schristos 	case dummy_failure_jump:
999*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
1000*75f6d617Schristos #  ifdef _LIBC
1001*75f6d617Schristos   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
1002*75f6d617Schristos #  else
1003*75f6d617Schristos   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
1004*75f6d617Schristos #  endif
1005*75f6d617Schristos           break;
1006*75f6d617Schristos 
1007*75f6d617Schristos 	case push_dummy_failure:
1008*75f6d617Schristos           printf ("/push_dummy_failure");
1009*75f6d617Schristos           break;
1010*75f6d617Schristos 
1011*75f6d617Schristos         case maybe_pop_jump:
1012*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
1013*75f6d617Schristos #  ifdef _LIBC
1014*75f6d617Schristos   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
1015*75f6d617Schristos #  else
1016*75f6d617Schristos   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1017*75f6d617Schristos #  endif
1018*75f6d617Schristos 	  break;
1019*75f6d617Schristos 
1020*75f6d617Schristos         case pop_failure_jump:
1021*75f6d617Schristos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1022*75f6d617Schristos #  ifdef _LIBC
1023*75f6d617Schristos   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
1024*75f6d617Schristos #  else
1025*75f6d617Schristos   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1026*75f6d617Schristos #  endif
1027*75f6d617Schristos 	  break;
1028*75f6d617Schristos 
1029*75f6d617Schristos         case jump_past_alt:
1030*75f6d617Schristos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1031*75f6d617Schristos #  ifdef _LIBC
1032*75f6d617Schristos   	  printf ("/jump_past_alt to %td", p + mcnt - start);
1033*75f6d617Schristos #  else
1034*75f6d617Schristos   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1035*75f6d617Schristos #  endif
1036*75f6d617Schristos 	  break;
1037*75f6d617Schristos 
1038*75f6d617Schristos         case jump:
1039*75f6d617Schristos 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1040*75f6d617Schristos #  ifdef _LIBC
1041*75f6d617Schristos   	  printf ("/jump to %td", p + mcnt - start);
1042*75f6d617Schristos #  else
1043*75f6d617Schristos   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
1044*75f6d617Schristos #  endif
1045*75f6d617Schristos 	  break;
1046*75f6d617Schristos 
1047*75f6d617Schristos         case succeed_n:
1048*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
1049*75f6d617Schristos 	  p1 = p + mcnt;
1050*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1051*75f6d617Schristos #  ifdef _LIBC
1052*75f6d617Schristos 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1053*75f6d617Schristos #  else
1054*75f6d617Schristos 	  printf ("/succeed_n to %ld, %d times",
1055*75f6d617Schristos 		  (long int) (p1 - start), mcnt2);
1056*75f6d617Schristos #  endif
1057*75f6d617Schristos           break;
1058*75f6d617Schristos 
1059*75f6d617Schristos         case jump_n:
1060*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
1061*75f6d617Schristos 	  p1 = p + mcnt;
1062*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063*75f6d617Schristos 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1064*75f6d617Schristos           break;
1065*75f6d617Schristos 
1066*75f6d617Schristos         case set_number_at:
1067*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt, &p);
1068*75f6d617Schristos 	  p1 = p + mcnt;
1069*75f6d617Schristos           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1070*75f6d617Schristos #  ifdef _LIBC
1071*75f6d617Schristos 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1072*75f6d617Schristos #  else
1073*75f6d617Schristos 	  printf ("/set_number_at location %ld to %d",
1074*75f6d617Schristos 		  (long int) (p1 - start), mcnt2);
1075*75f6d617Schristos #  endif
1076*75f6d617Schristos           break;
1077*75f6d617Schristos 
1078*75f6d617Schristos         case wordbound:
1079*75f6d617Schristos 	  printf ("/wordbound");
1080*75f6d617Schristos 	  break;
1081*75f6d617Schristos 
1082*75f6d617Schristos 	case notwordbound:
1083*75f6d617Schristos 	  printf ("/notwordbound");
1084*75f6d617Schristos           break;
1085*75f6d617Schristos 
1086*75f6d617Schristos 	case wordbeg:
1087*75f6d617Schristos 	  printf ("/wordbeg");
1088*75f6d617Schristos 	  break;
1089*75f6d617Schristos 
1090*75f6d617Schristos 	case wordend:
1091*75f6d617Schristos 	  printf ("/wordend");
1092*75f6d617Schristos 	  break;
1093*75f6d617Schristos 
1094*75f6d617Schristos #  ifdef emacs
1095*75f6d617Schristos 	case before_dot:
1096*75f6d617Schristos 	  printf ("/before_dot");
1097*75f6d617Schristos           break;
1098*75f6d617Schristos 
1099*75f6d617Schristos 	case at_dot:
1100*75f6d617Schristos 	  printf ("/at_dot");
1101*75f6d617Schristos           break;
1102*75f6d617Schristos 
1103*75f6d617Schristos 	case after_dot:
1104*75f6d617Schristos 	  printf ("/after_dot");
1105*75f6d617Schristos           break;
1106*75f6d617Schristos 
1107*75f6d617Schristos 	case syntaxspec:
1108*75f6d617Schristos           printf ("/syntaxspec");
1109*75f6d617Schristos 	  mcnt = *p++;
1110*75f6d617Schristos 	  printf ("/%d", mcnt);
1111*75f6d617Schristos           break;
1112*75f6d617Schristos 
1113*75f6d617Schristos 	case notsyntaxspec:
1114*75f6d617Schristos           printf ("/notsyntaxspec");
1115*75f6d617Schristos 	  mcnt = *p++;
1116*75f6d617Schristos 	  printf ("/%d", mcnt);
1117*75f6d617Schristos 	  break;
1118*75f6d617Schristos #  endif /* emacs */
1119*75f6d617Schristos 
1120*75f6d617Schristos 	case wordchar:
1121*75f6d617Schristos 	  printf ("/wordchar");
1122*75f6d617Schristos           break;
1123*75f6d617Schristos 
1124*75f6d617Schristos 	case notwordchar:
1125*75f6d617Schristos 	  printf ("/notwordchar");
1126*75f6d617Schristos           break;
1127*75f6d617Schristos 
1128*75f6d617Schristos 	case begbuf:
1129*75f6d617Schristos 	  printf ("/begbuf");
1130*75f6d617Schristos           break;
1131*75f6d617Schristos 
1132*75f6d617Schristos 	case endbuf:
1133*75f6d617Schristos 	  printf ("/endbuf");
1134*75f6d617Schristos           break;
1135*75f6d617Schristos 
1136*75f6d617Schristos         default:
1137*75f6d617Schristos           printf ("?%ld", (long int) *(p-1));
1138*75f6d617Schristos 	}
1139*75f6d617Schristos 
1140*75f6d617Schristos       putchar ('\n');
1141*75f6d617Schristos     }
1142*75f6d617Schristos 
1143*75f6d617Schristos #  ifdef _LIBC
1144*75f6d617Schristos   printf ("%td:\tend of pattern.\n", p - start);
1145*75f6d617Schristos #  else
1146*75f6d617Schristos   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1147*75f6d617Schristos #  endif
1148*75f6d617Schristos }
1149*75f6d617Schristos 
1150*75f6d617Schristos 
1151*75f6d617Schristos void
1152*75f6d617Schristos PREFIX(print_compiled_pattern) (bufp)
1153*75f6d617Schristos     struct re_pattern_buffer *bufp;
1154*75f6d617Schristos {
1155*75f6d617Schristos   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1156*75f6d617Schristos 
1157*75f6d617Schristos   PREFIX(print_partial_compiled_pattern) (buffer, buffer
1158*75f6d617Schristos 				  + bufp->used / sizeof(UCHAR_T));
1159*75f6d617Schristos   printf ("%ld bytes used/%ld bytes allocated.\n",
1160*75f6d617Schristos 	  bufp->used, bufp->allocated);
1161*75f6d617Schristos 
1162*75f6d617Schristos   if (bufp->fastmap_accurate && bufp->fastmap)
1163*75f6d617Schristos     {
1164*75f6d617Schristos       printf ("fastmap: ");
1165*75f6d617Schristos       print_fastmap (bufp->fastmap);
1166*75f6d617Schristos     }
1167*75f6d617Schristos 
1168*75f6d617Schristos #  ifdef _LIBC
1169*75f6d617Schristos   printf ("re_nsub: %Zd\t", bufp->re_nsub);
1170*75f6d617Schristos #  else
1171*75f6d617Schristos   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1172*75f6d617Schristos #  endif
1173*75f6d617Schristos   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1174*75f6d617Schristos   printf ("can_be_null: %d\t", bufp->can_be_null);
1175*75f6d617Schristos   printf ("newline_anchor: %d\n", bufp->newline_anchor);
1176*75f6d617Schristos   printf ("no_sub: %d\t", bufp->no_sub);
1177*75f6d617Schristos   printf ("not_bol: %d\t", bufp->not_bol);
1178*75f6d617Schristos   printf ("not_eol: %d\t", bufp->not_eol);
1179*75f6d617Schristos   printf ("syntax: %lx\n", bufp->syntax);
1180*75f6d617Schristos   /* Perhaps we should print the translate table?  */
1181*75f6d617Schristos }
1182*75f6d617Schristos 
1183*75f6d617Schristos 
1184*75f6d617Schristos void
1185*75f6d617Schristos PREFIX(print_double_string) (where, string1, size1, string2, size2)
1186*75f6d617Schristos     const CHAR_T *where;
1187*75f6d617Schristos     const CHAR_T *string1;
1188*75f6d617Schristos     const CHAR_T *string2;
1189*75f6d617Schristos     int size1;
1190*75f6d617Schristos     int size2;
1191*75f6d617Schristos {
1192*75f6d617Schristos   int this_char;
1193*75f6d617Schristos 
1194*75f6d617Schristos   if (where == NULL)
1195*75f6d617Schristos     printf ("(null)");
1196*75f6d617Schristos   else
1197*75f6d617Schristos     {
1198*75f6d617Schristos       int cnt;
1199*75f6d617Schristos 
1200*75f6d617Schristos       if (FIRST_STRING_P (where))
1201*75f6d617Schristos         {
1202*75f6d617Schristos           for (this_char = where - string1; this_char < size1; this_char++)
1203*75f6d617Schristos 	    PUT_CHAR (string1[this_char]);
1204*75f6d617Schristos 
1205*75f6d617Schristos           where = string2;
1206*75f6d617Schristos         }
1207*75f6d617Schristos 
1208*75f6d617Schristos       cnt = 0;
1209*75f6d617Schristos       for (this_char = where - string2; this_char < size2; this_char++)
1210*75f6d617Schristos 	{
1211*75f6d617Schristos 	  PUT_CHAR (string2[this_char]);
1212*75f6d617Schristos 	  if (++cnt > 100)
1213*75f6d617Schristos 	    {
1214*75f6d617Schristos 	      fputs ("...", stdout);
1215*75f6d617Schristos 	      break;
1216*75f6d617Schristos 	    }
1217*75f6d617Schristos 	}
1218*75f6d617Schristos     }
1219*75f6d617Schristos }
1220*75f6d617Schristos 
1221*75f6d617Schristos #  ifndef DEFINED_ONCE
1222*75f6d617Schristos void
printchar(c)1223*75f6d617Schristos printchar (c)
1224*75f6d617Schristos      int c;
1225*75f6d617Schristos {
1226*75f6d617Schristos   putc (c, stderr);
1227*75f6d617Schristos }
1228*75f6d617Schristos #  endif
1229*75f6d617Schristos 
1230*75f6d617Schristos # else /* not DEBUG */
1231*75f6d617Schristos 
1232*75f6d617Schristos #  ifndef DEFINED_ONCE
1233*75f6d617Schristos #   undef assert
1234*75f6d617Schristos #   define assert(e)
1235*75f6d617Schristos 
1236*75f6d617Schristos #   define DEBUG_STATEMENT(e)
1237*75f6d617Schristos #   define DEBUG_PRINT1(x)
1238*75f6d617Schristos #   define DEBUG_PRINT2(x1, x2)
1239*75f6d617Schristos #   define DEBUG_PRINT3(x1, x2, x3)
1240*75f6d617Schristos #   define DEBUG_PRINT4(x1, x2, x3, x4)
1241*75f6d617Schristos #  endif /* not DEFINED_ONCE */
1242*75f6d617Schristos #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1243*75f6d617Schristos #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1244*75f6d617Schristos 
1245*75f6d617Schristos # endif /* not DEBUG */
1246*75f6d617Schristos 
1247*75f6d617Schristos 
1248*75f6d617Schristos 
1249*75f6d617Schristos # ifdef WCHAR
1250*75f6d617Schristos /* This  convert a multibyte string to a wide character string.
1251*75f6d617Schristos    And write their correspondances to offset_buffer(see below)
1252*75f6d617Schristos    and write whether each wchar_t is binary data to is_binary.
1253*75f6d617Schristos    This assume invalid multibyte sequences as binary data.
1254*75f6d617Schristos    We assume offset_buffer and is_binary is already allocated
1255*75f6d617Schristos    enough space.  */
1256*75f6d617Schristos 
1257*75f6d617Schristos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
1258*75f6d617Schristos 				  size_t len, int *offset_buffer,
1259*75f6d617Schristos 				  char *is_binary);
1260*75f6d617Schristos static size_t
convert_mbs_to_wcs(dest,src,len,offset_buffer,is_binary)1261*75f6d617Schristos convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
1262*75f6d617Schristos      CHAR_T *dest;
1263*75f6d617Schristos      const unsigned char* src;
1264*75f6d617Schristos      size_t len; /* the length of multibyte string.  */
1265*75f6d617Schristos 
1266*75f6d617Schristos      /* It hold correspondances between src(char string) and
1267*75f6d617Schristos 	dest(wchar_t string) for optimization.
1268*75f6d617Schristos 	e.g. src  = "xxxyzz"
1269*75f6d617Schristos              dest = {'X', 'Y', 'Z'}
1270*75f6d617Schristos 	      (each "xxx", "y" and "zz" represent one multibyte character
1271*75f6d617Schristos 	       corresponding to 'X', 'Y' and 'Z'.)
1272*75f6d617Schristos 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1273*75f6d617Schristos 	  	        = {0, 3, 4, 6}
1274*75f6d617Schristos      */
1275*75f6d617Schristos      int *offset_buffer;
1276*75f6d617Schristos      char *is_binary;
1277*75f6d617Schristos {
1278*75f6d617Schristos   wchar_t *pdest = dest;
1279*75f6d617Schristos   const unsigned char *psrc = src;
1280*75f6d617Schristos   size_t wc_count = 0;
1281*75f6d617Schristos 
1282*75f6d617Schristos   mbstate_t mbs;
1283*75f6d617Schristos   int i, consumed;
1284*75f6d617Schristos   size_t mb_remain = len;
1285*75f6d617Schristos   size_t mb_count = 0;
1286*75f6d617Schristos 
1287*75f6d617Schristos   /* Initialize the conversion state.  */
1288*75f6d617Schristos   memset (&mbs, 0, sizeof (mbstate_t));
1289*75f6d617Schristos 
1290*75f6d617Schristos   offset_buffer[0] = 0;
1291*75f6d617Schristos   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1292*75f6d617Schristos 	 psrc += consumed)
1293*75f6d617Schristos     {
1294*75f6d617Schristos       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1295*75f6d617Schristos 
1296*75f6d617Schristos       if (consumed <= 0)
1297*75f6d617Schristos 	/* failed to convert. maybe src contains binary data.
1298*75f6d617Schristos 	   So we consume 1 byte manualy.  */
1299*75f6d617Schristos 	{
1300*75f6d617Schristos 	  *pdest = *psrc;
1301*75f6d617Schristos 	  consumed = 1;
1302*75f6d617Schristos 	  is_binary[wc_count] = TRUE;
1303*75f6d617Schristos 	}
1304*75f6d617Schristos       else
1305*75f6d617Schristos 	is_binary[wc_count] = FALSE;
1306*75f6d617Schristos       /* In sjis encoding, we use yen sign as escape character in
1307*75f6d617Schristos 	 place of reverse solidus. So we convert 0x5c(yen sign in
1308*75f6d617Schristos 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1309*75f6d617Schristos 	 solidus in UCS2).  */
1310*75f6d617Schristos       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1311*75f6d617Schristos 	*pdest = (wchar_t) *psrc;
1312*75f6d617Schristos 
1313*75f6d617Schristos       offset_buffer[wc_count + 1] = mb_count += consumed;
1314*75f6d617Schristos     }
1315*75f6d617Schristos 
1316*75f6d617Schristos   /* Fill remain of the buffer with sentinel.  */
1317*75f6d617Schristos   for (i = wc_count + 1 ; i <= len ; i++)
1318*75f6d617Schristos     offset_buffer[i] = mb_count + 1;
1319*75f6d617Schristos 
1320*75f6d617Schristos   return wc_count;
1321*75f6d617Schristos }
1322*75f6d617Schristos 
1323*75f6d617Schristos # endif /* WCHAR */
1324*75f6d617Schristos 
1325*75f6d617Schristos #else /* not INSIDE_RECURSION */
1326*75f6d617Schristos 
1327*75f6d617Schristos /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1328*75f6d617Schristos    also be assigned to arbitrarily: each pattern buffer stores its own
1329*75f6d617Schristos    syntax, so it can be changed between regex compilations.  */
1330*75f6d617Schristos /* This has no initializer because initialized variables in Emacs
1331*75f6d617Schristos    become read-only after dumping.  */
1332*75f6d617Schristos reg_syntax_t re_syntax_options;
1333*75f6d617Schristos 
1334*75f6d617Schristos 
1335*75f6d617Schristos /* Specify the precise syntax of regexps for compilation.  This provides
1336*75f6d617Schristos    for compatibility for various utilities which historically have
1337*75f6d617Schristos    different, incompatible syntaxes.
1338*75f6d617Schristos 
1339*75f6d617Schristos    The argument SYNTAX is a bit mask comprised of the various bits
1340*75f6d617Schristos    defined in regex.h.  We return the old syntax.  */
1341*75f6d617Schristos 
1342*75f6d617Schristos reg_syntax_t
re_set_syntax(syntax)1343*75f6d617Schristos re_set_syntax (syntax)
1344*75f6d617Schristos     reg_syntax_t syntax;
1345*75f6d617Schristos {
1346*75f6d617Schristos   reg_syntax_t ret = re_syntax_options;
1347*75f6d617Schristos 
1348*75f6d617Schristos   re_syntax_options = syntax;
1349*75f6d617Schristos # ifdef DEBUG
1350*75f6d617Schristos   if (syntax & RE_DEBUG)
1351*75f6d617Schristos     debug = 1;
1352*75f6d617Schristos   else if (debug) /* was on but now is not */
1353*75f6d617Schristos     debug = 0;
1354*75f6d617Schristos # endif /* DEBUG */
1355*75f6d617Schristos   return ret;
1356*75f6d617Schristos }
1357*75f6d617Schristos # ifdef _LIBC
1358*75f6d617Schristos weak_alias (__re_set_syntax, re_set_syntax)
1359*75f6d617Schristos # endif
1360*75f6d617Schristos 
1361*75f6d617Schristos /* This table gives an error message for each of the error codes listed
1362*75f6d617Schristos    in regex.h.  Obviously the order here has to be same as there.
1363*75f6d617Schristos    POSIX doesn't require that we do anything for REG_NOERROR,
1364*75f6d617Schristos    but why not be nice?  */
1365*75f6d617Schristos 
1366*75f6d617Schristos static const char re_error_msgid[] =
1367*75f6d617Schristos   {
1368*75f6d617Schristos # define REG_NOERROR_IDX	0
1369*75f6d617Schristos     gettext_noop ("Success")	/* REG_NOERROR */
1370*75f6d617Schristos     "\0"
1371*75f6d617Schristos # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1372*75f6d617Schristos     gettext_noop ("No match")	/* REG_NOMATCH */
1373*75f6d617Schristos     "\0"
1374*75f6d617Schristos # define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1375*75f6d617Schristos     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1376*75f6d617Schristos     "\0"
1377*75f6d617Schristos # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1378*75f6d617Schristos     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1379*75f6d617Schristos     "\0"
1380*75f6d617Schristos # define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1381*75f6d617Schristos     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1382*75f6d617Schristos     "\0"
1383*75f6d617Schristos # define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1384*75f6d617Schristos     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1385*75f6d617Schristos     "\0"
1386*75f6d617Schristos # define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1387*75f6d617Schristos     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1388*75f6d617Schristos     "\0"
1389*75f6d617Schristos # define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1390*75f6d617Schristos     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1391*75f6d617Schristos     "\0"
1392*75f6d617Schristos # define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1393*75f6d617Schristos     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1394*75f6d617Schristos     "\0"
1395*75f6d617Schristos # define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1396*75f6d617Schristos     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1397*75f6d617Schristos     "\0"
1398*75f6d617Schristos # define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1399*75f6d617Schristos     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1400*75f6d617Schristos     "\0"
1401*75f6d617Schristos # define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1402*75f6d617Schristos     gettext_noop ("Invalid range end")	/* REG_ERANGE */
1403*75f6d617Schristos     "\0"
1404*75f6d617Schristos # define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1405*75f6d617Schristos     gettext_noop ("Memory exhausted") /* REG_ESPACE */
1406*75f6d617Schristos     "\0"
1407*75f6d617Schristos # define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1408*75f6d617Schristos     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1409*75f6d617Schristos     "\0"
1410*75f6d617Schristos # define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1411*75f6d617Schristos     gettext_noop ("Premature end of regular expression") /* REG_EEND */
1412*75f6d617Schristos     "\0"
1413*75f6d617Schristos # define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1414*75f6d617Schristos     gettext_noop ("Regular expression too big") /* REG_ESIZE */
1415*75f6d617Schristos     "\0"
1416*75f6d617Schristos # define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1417*75f6d617Schristos     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1418*75f6d617Schristos   };
1419*75f6d617Schristos 
1420*75f6d617Schristos static const size_t re_error_msgid_idx[] =
1421*75f6d617Schristos   {
1422*75f6d617Schristos     REG_NOERROR_IDX,
1423*75f6d617Schristos     REG_NOMATCH_IDX,
1424*75f6d617Schristos     REG_BADPAT_IDX,
1425*75f6d617Schristos     REG_ECOLLATE_IDX,
1426*75f6d617Schristos     REG_ECTYPE_IDX,
1427*75f6d617Schristos     REG_EESCAPE_IDX,
1428*75f6d617Schristos     REG_ESUBREG_IDX,
1429*75f6d617Schristos     REG_EBRACK_IDX,
1430*75f6d617Schristos     REG_EPAREN_IDX,
1431*75f6d617Schristos     REG_EBRACE_IDX,
1432*75f6d617Schristos     REG_BADBR_IDX,
1433*75f6d617Schristos     REG_ERANGE_IDX,
1434*75f6d617Schristos     REG_ESPACE_IDX,
1435*75f6d617Schristos     REG_BADRPT_IDX,
1436*75f6d617Schristos     REG_EEND_IDX,
1437*75f6d617Schristos     REG_ESIZE_IDX,
1438*75f6d617Schristos     REG_ERPAREN_IDX
1439*75f6d617Schristos   };
1440*75f6d617Schristos 
1441*75f6d617Schristos #endif /* INSIDE_RECURSION */
1442*75f6d617Schristos 
1443*75f6d617Schristos #ifndef DEFINED_ONCE
1444*75f6d617Schristos /* Avoiding alloca during matching, to placate r_alloc.  */
1445*75f6d617Schristos 
1446*75f6d617Schristos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1447*75f6d617Schristos    searching and matching functions should not call alloca.  On some
1448*75f6d617Schristos    systems, alloca is implemented in terms of malloc, and if we're
1449*75f6d617Schristos    using the relocating allocator routines, then malloc could cause a
1450*75f6d617Schristos    relocation, which might (if the strings being searched are in the
1451*75f6d617Schristos    ralloc heap) shift the data out from underneath the regexp
1452*75f6d617Schristos    routines.
1453*75f6d617Schristos 
1454*75f6d617Schristos    Here's another reason to avoid allocation: Emacs
1455*75f6d617Schristos    processes input from X in a signal handler; processing X input may
1456*75f6d617Schristos    call malloc; if input arrives while a matching routine is calling
1457*75f6d617Schristos    malloc, then we're scrod.  But Emacs can't just block input while
1458*75f6d617Schristos    calling matching routines; then we don't notice interrupts when
1459*75f6d617Schristos    they come in.  So, Emacs blocks input around all regexp calls
1460*75f6d617Schristos    except the matching calls, which it leaves unprotected, in the
1461*75f6d617Schristos    faith that they will not malloc.  */
1462*75f6d617Schristos 
1463*75f6d617Schristos /* Normally, this is fine.  */
1464*75f6d617Schristos # define MATCH_MAY_ALLOCATE
1465*75f6d617Schristos 
1466*75f6d617Schristos /* When using GNU C, we are not REALLY using the C alloca, no matter
1467*75f6d617Schristos    what config.h may say.  So don't take precautions for it.  */
1468*75f6d617Schristos # ifdef __GNUC__
1469*75f6d617Schristos #  undef C_ALLOCA
1470*75f6d617Schristos # endif
1471*75f6d617Schristos 
1472*75f6d617Schristos /* The match routines may not allocate if (1) they would do it with malloc
1473*75f6d617Schristos    and (2) it's not safe for them to use malloc.
1474*75f6d617Schristos    Note that if REL_ALLOC is defined, matching would not use malloc for the
1475*75f6d617Schristos    failure stack, but we would still use it for the register vectors;
1476*75f6d617Schristos    so REL_ALLOC should not affect this.  */
1477*75f6d617Schristos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1478*75f6d617Schristos #  undef MATCH_MAY_ALLOCATE
1479*75f6d617Schristos # endif
1480*75f6d617Schristos #endif /* not DEFINED_ONCE */
1481*75f6d617Schristos 
1482*75f6d617Schristos #ifdef INSIDE_RECURSION
1483*75f6d617Schristos /* Failure stack declarations and macros; both re_compile_fastmap and
1484*75f6d617Schristos    re_match_2 use a failure stack.  These have to be macros because of
1485*75f6d617Schristos    REGEX_ALLOCATE_STACK.  */
1486*75f6d617Schristos 
1487*75f6d617Schristos 
1488*75f6d617Schristos /* Number of failure points for which to initially allocate space
1489*75f6d617Schristos    when matching.  If this number is exceeded, we allocate more
1490*75f6d617Schristos    space, so it is not a hard limit.  */
1491*75f6d617Schristos # ifndef INIT_FAILURE_ALLOC
1492*75f6d617Schristos #  define INIT_FAILURE_ALLOC 5
1493*75f6d617Schristos # endif
1494*75f6d617Schristos 
1495*75f6d617Schristos /* Roughly the maximum number of failure points on the stack.  Would be
1496*75f6d617Schristos    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1497*75f6d617Schristos    This is a variable only so users of regex can assign to it; we never
1498*75f6d617Schristos    change it ourselves.  */
1499*75f6d617Schristos 
1500*75f6d617Schristos # ifdef INT_IS_16BIT
1501*75f6d617Schristos 
1502*75f6d617Schristos #  ifndef DEFINED_ONCE
1503*75f6d617Schristos #   if defined MATCH_MAY_ALLOCATE
1504*75f6d617Schristos /* 4400 was enough to cause a crash on Alpha OSF/1,
1505*75f6d617Schristos    whose default stack limit is 2mb.  */
1506*75f6d617Schristos long int re_max_failures = 4000;
1507*75f6d617Schristos #   else
1508*75f6d617Schristos long int re_max_failures = 2000;
1509*75f6d617Schristos #   endif
1510*75f6d617Schristos #  endif
1511*75f6d617Schristos 
PREFIX(fail_stack_elt)1512*75f6d617Schristos union PREFIX(fail_stack_elt)
1513*75f6d617Schristos {
1514*75f6d617Schristos   UCHAR_T *pointer;
1515*75f6d617Schristos   long int integer;
1516*75f6d617Schristos };
1517*75f6d617Schristos 
1518*75f6d617Schristos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1519*75f6d617Schristos 
1520*75f6d617Schristos typedef struct
1521*75f6d617Schristos {
1522*75f6d617Schristos   PREFIX(fail_stack_elt_t) *stack;
1523*75f6d617Schristos   unsigned long int size;
1524*75f6d617Schristos   unsigned long int avail;		/* Offset of next open position.  */
1525*75f6d617Schristos } PREFIX(fail_stack_type);
1526*75f6d617Schristos 
1527*75f6d617Schristos # else /* not INT_IS_16BIT */
1528*75f6d617Schristos 
1529*75f6d617Schristos #  ifndef DEFINED_ONCE
1530*75f6d617Schristos #   if defined MATCH_MAY_ALLOCATE
1531*75f6d617Schristos /* 4400 was enough to cause a crash on Alpha OSF/1,
1532*75f6d617Schristos    whose default stack limit is 2mb.  */
1533*75f6d617Schristos int re_max_failures = 4000;
1534*75f6d617Schristos #   else
1535*75f6d617Schristos int re_max_failures = 2000;
1536*75f6d617Schristos #   endif
1537*75f6d617Schristos #  endif
1538*75f6d617Schristos 
PREFIX(fail_stack_elt)1539*75f6d617Schristos union PREFIX(fail_stack_elt)
1540*75f6d617Schristos {
1541*75f6d617Schristos   UCHAR_T *pointer;
1542*75f6d617Schristos   int integer;
1543*75f6d617Schristos };
1544*75f6d617Schristos 
1545*75f6d617Schristos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1546*75f6d617Schristos 
1547*75f6d617Schristos typedef struct
1548*75f6d617Schristos {
1549*75f6d617Schristos   PREFIX(fail_stack_elt_t) *stack;
1550*75f6d617Schristos   unsigned size;
1551*75f6d617Schristos   unsigned avail;			/* Offset of next open position.  */
1552*75f6d617Schristos } PREFIX(fail_stack_type);
1553*75f6d617Schristos 
1554*75f6d617Schristos # endif /* INT_IS_16BIT */
1555*75f6d617Schristos 
1556*75f6d617Schristos # ifndef DEFINED_ONCE
1557*75f6d617Schristos #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1558*75f6d617Schristos #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1559*75f6d617Schristos #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1560*75f6d617Schristos # endif
1561*75f6d617Schristos 
1562*75f6d617Schristos 
1563*75f6d617Schristos /* Define macros to initialize and free the failure stack.
1564*75f6d617Schristos    Do `return -2' if the alloc fails.  */
1565*75f6d617Schristos 
1566*75f6d617Schristos # ifdef MATCH_MAY_ALLOCATE
1567*75f6d617Schristos #  define INIT_FAIL_STACK()						\
1568*75f6d617Schristos   do {									\
1569*75f6d617Schristos     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
1570*75f6d617Schristos       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1571*75f6d617Schristos 									\
1572*75f6d617Schristos     if (fail_stack.stack == NULL)				\
1573*75f6d617Schristos       return -2;							\
1574*75f6d617Schristos 									\
1575*75f6d617Schristos     fail_stack.size = INIT_FAILURE_ALLOC;			\
1576*75f6d617Schristos     fail_stack.avail = 0;					\
1577*75f6d617Schristos   } while (0)
1578*75f6d617Schristos 
1579*75f6d617Schristos #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1580*75f6d617Schristos # else
1581*75f6d617Schristos #  define INIT_FAIL_STACK()						\
1582*75f6d617Schristos   do {									\
1583*75f6d617Schristos     fail_stack.avail = 0;					\
1584*75f6d617Schristos   } while (0)
1585*75f6d617Schristos 
1586*75f6d617Schristos #  define RESET_FAIL_STACK()
1587*75f6d617Schristos # endif
1588*75f6d617Schristos 
1589*75f6d617Schristos 
1590*75f6d617Schristos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1591*75f6d617Schristos 
1592*75f6d617Schristos    Return 1 if succeeds, and 0 if either ran out of memory
1593*75f6d617Schristos    allocating space for it or it was already too large.
1594*75f6d617Schristos 
1595*75f6d617Schristos    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1596*75f6d617Schristos 
1597*75f6d617Schristos # define DOUBLE_FAIL_STACK(fail_stack)					\
1598*75f6d617Schristos   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1599*75f6d617Schristos    ? 0									\
1600*75f6d617Schristos    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
1601*75f6d617Schristos         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1602*75f6d617Schristos           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
1603*75f6d617Schristos           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1604*75f6d617Schristos 									\
1605*75f6d617Schristos       (fail_stack).stack == NULL					\
1606*75f6d617Schristos       ? 0								\
1607*75f6d617Schristos       : ((fail_stack).size <<= 1, 					\
1608*75f6d617Schristos          1)))
1609*75f6d617Schristos 
1610*75f6d617Schristos 
1611*75f6d617Schristos /* Push pointer POINTER on FAIL_STACK.
1612*75f6d617Schristos    Return 1 if was able to do so and 0 if ran out of memory allocating
1613*75f6d617Schristos    space to do so.  */
1614*75f6d617Schristos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1615*75f6d617Schristos   ((FAIL_STACK_FULL ()							\
1616*75f6d617Schristos     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1617*75f6d617Schristos    ? 0									\
1618*75f6d617Schristos    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1619*75f6d617Schristos       1))
1620*75f6d617Schristos 
1621*75f6d617Schristos /* Push a pointer value onto the failure stack.
1622*75f6d617Schristos    Assumes the variable `fail_stack'.  Probably should only
1623*75f6d617Schristos    be called from within `PUSH_FAILURE_POINT'.  */
1624*75f6d617Schristos # define PUSH_FAILURE_POINTER(item)					\
1625*75f6d617Schristos   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1626*75f6d617Schristos 
1627*75f6d617Schristos /* This pushes an integer-valued item onto the failure stack.
1628*75f6d617Schristos    Assumes the variable `fail_stack'.  Probably should only
1629*75f6d617Schristos    be called from within `PUSH_FAILURE_POINT'.  */
1630*75f6d617Schristos # define PUSH_FAILURE_INT(item)					\
1631*75f6d617Schristos   fail_stack.stack[fail_stack.avail++].integer = (item)
1632*75f6d617Schristos 
1633*75f6d617Schristos /* Push a fail_stack_elt_t value onto the failure stack.
1634*75f6d617Schristos    Assumes the variable `fail_stack'.  Probably should only
1635*75f6d617Schristos    be called from within `PUSH_FAILURE_POINT'.  */
1636*75f6d617Schristos # define PUSH_FAILURE_ELT(item)					\
1637*75f6d617Schristos   fail_stack.stack[fail_stack.avail++] =  (item)
1638*75f6d617Schristos 
1639*75f6d617Schristos /* These three POP... operations complement the three PUSH... operations.
1640*75f6d617Schristos    All assume that `fail_stack' is nonempty.  */
1641*75f6d617Schristos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1642*75f6d617Schristos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1643*75f6d617Schristos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1644*75f6d617Schristos 
1645*75f6d617Schristos /* Used to omit pushing failure point id's when we're not debugging.  */
1646*75f6d617Schristos # ifdef DEBUG
1647*75f6d617Schristos #  define DEBUG_PUSH PUSH_FAILURE_INT
1648*75f6d617Schristos #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1649*75f6d617Schristos # else
1650*75f6d617Schristos #  define DEBUG_PUSH(item)
1651*75f6d617Schristos #  define DEBUG_POP(item_addr)
1652*75f6d617Schristos # endif
1653*75f6d617Schristos 
1654*75f6d617Schristos 
1655*75f6d617Schristos /* Push the information about the state we will need
1656*75f6d617Schristos    if we ever fail back to it.
1657*75f6d617Schristos 
1658*75f6d617Schristos    Requires variables fail_stack, regstart, regend, reg_info, and
1659*75f6d617Schristos    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1660*75f6d617Schristos    be declared.
1661*75f6d617Schristos 
1662*75f6d617Schristos    Does `return FAILURE_CODE' if runs out of memory.  */
1663*75f6d617Schristos 
1664*75f6d617Schristos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1665*75f6d617Schristos   do {									\
1666*75f6d617Schristos     char *destination;							\
1667*75f6d617Schristos     /* Must be int, so when we don't save any registers, the arithmetic	\
1668*75f6d617Schristos        of 0 + -1 isn't done as unsigned.  */				\
1669*75f6d617Schristos     /* Can't be int, since there is not a shred of a guarantee that int	\
1670*75f6d617Schristos        is wide enough to hold a value of something to which pointer can	\
1671*75f6d617Schristos        be assigned */							\
1672*75f6d617Schristos     active_reg_t this_reg;						\
1673*75f6d617Schristos     									\
1674*75f6d617Schristos     DEBUG_STATEMENT (failure_id++);					\
1675*75f6d617Schristos     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1676*75f6d617Schristos     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1677*75f6d617Schristos     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1678*75f6d617Schristos     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1679*75f6d617Schristos 									\
1680*75f6d617Schristos     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1681*75f6d617Schristos     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1682*75f6d617Schristos 									\
1683*75f6d617Schristos     /* Ensure we have enough space allocated for what we will push.  */	\
1684*75f6d617Schristos     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1685*75f6d617Schristos       {									\
1686*75f6d617Schristos         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1687*75f6d617Schristos           return failure_code;						\
1688*75f6d617Schristos 									\
1689*75f6d617Schristos         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1690*75f6d617Schristos 		       (fail_stack).size);				\
1691*75f6d617Schristos         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1692*75f6d617Schristos       }									\
1693*75f6d617Schristos 									\
1694*75f6d617Schristos     /* Push the info, starting with the registers.  */			\
1695*75f6d617Schristos     DEBUG_PRINT1 ("\n");						\
1696*75f6d617Schristos 									\
1697*75f6d617Schristos     if (1)								\
1698*75f6d617Schristos       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1699*75f6d617Schristos 	   this_reg++)							\
1700*75f6d617Schristos 	{								\
1701*75f6d617Schristos 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1702*75f6d617Schristos 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1703*75f6d617Schristos 									\
1704*75f6d617Schristos 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1705*75f6d617Schristos 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1706*75f6d617Schristos 									\
1707*75f6d617Schristos 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1708*75f6d617Schristos 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1709*75f6d617Schristos 									\
1710*75f6d617Schristos 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1711*75f6d617Schristos 			reg_info[this_reg].word.pointer);		\
1712*75f6d617Schristos 	  DEBUG_PRINT2 (" match_null=%d",				\
1713*75f6d617Schristos 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1714*75f6d617Schristos 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1715*75f6d617Schristos 	  DEBUG_PRINT2 (" matched_something=%d",			\
1716*75f6d617Schristos 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1717*75f6d617Schristos 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1718*75f6d617Schristos 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1719*75f6d617Schristos 	  DEBUG_PRINT1 ("\n");						\
1720*75f6d617Schristos 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1721*75f6d617Schristos 	}								\
1722*75f6d617Schristos 									\
1723*75f6d617Schristos     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1724*75f6d617Schristos     PUSH_FAILURE_INT (lowest_active_reg);				\
1725*75f6d617Schristos 									\
1726*75f6d617Schristos     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1727*75f6d617Schristos     PUSH_FAILURE_INT (highest_active_reg);				\
1728*75f6d617Schristos 									\
1729*75f6d617Schristos     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1730*75f6d617Schristos     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1731*75f6d617Schristos     PUSH_FAILURE_POINTER (pattern_place);				\
1732*75f6d617Schristos 									\
1733*75f6d617Schristos     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1734*75f6d617Schristos     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1735*75f6d617Schristos 				 size2);				\
1736*75f6d617Schristos     DEBUG_PRINT1 ("'\n");						\
1737*75f6d617Schristos     PUSH_FAILURE_POINTER (string_place);				\
1738*75f6d617Schristos 									\
1739*75f6d617Schristos     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1740*75f6d617Schristos     DEBUG_PUSH (failure_id);						\
1741*75f6d617Schristos   } while (0)
1742*75f6d617Schristos 
1743*75f6d617Schristos # ifndef DEFINED_ONCE
1744*75f6d617Schristos /* This is the number of items that are pushed and popped on the stack
1745*75f6d617Schristos    for each register.  */
1746*75f6d617Schristos #  define NUM_REG_ITEMS  3
1747*75f6d617Schristos 
1748*75f6d617Schristos /* Individual items aside from the registers.  */
1749*75f6d617Schristos #  ifdef DEBUG
1750*75f6d617Schristos #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1751*75f6d617Schristos #  else
1752*75f6d617Schristos #   define NUM_NONREG_ITEMS 4
1753*75f6d617Schristos #  endif
1754*75f6d617Schristos 
1755*75f6d617Schristos /* We push at most this many items on the stack.  */
1756*75f6d617Schristos /* We used to use (num_regs - 1), which is the number of registers
1757*75f6d617Schristos    this regexp will save; but that was changed to 5
1758*75f6d617Schristos    to avoid stack overflow for a regexp with lots of parens.  */
1759*75f6d617Schristos #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1760*75f6d617Schristos 
1761*75f6d617Schristos /* We actually push this many items.  */
1762*75f6d617Schristos #  define NUM_FAILURE_ITEMS				\
1763*75f6d617Schristos   (((0							\
1764*75f6d617Schristos      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1765*75f6d617Schristos     * NUM_REG_ITEMS)					\
1766*75f6d617Schristos    + NUM_NONREG_ITEMS)
1767*75f6d617Schristos 
1768*75f6d617Schristos /* How many items can still be added to the stack without overflowing it.  */
1769*75f6d617Schristos #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1770*75f6d617Schristos # endif /* not DEFINED_ONCE */
1771*75f6d617Schristos 
1772*75f6d617Schristos 
1773*75f6d617Schristos /* Pops what PUSH_FAIL_STACK pushes.
1774*75f6d617Schristos 
1775*75f6d617Schristos    We restore into the parameters, all of which should be lvalues:
1776*75f6d617Schristos      STR -- the saved data position.
1777*75f6d617Schristos      PAT -- the saved pattern position.
1778*75f6d617Schristos      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1779*75f6d617Schristos      REGSTART, REGEND -- arrays of string positions.
1780*75f6d617Schristos      REG_INFO -- array of information about each subexpression.
1781*75f6d617Schristos 
1782*75f6d617Schristos    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1783*75f6d617Schristos    `pend', `string1', `size1', `string2', and `size2'.  */
1784*75f6d617Schristos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1785*75f6d617Schristos {									\
1786*75f6d617Schristos   DEBUG_STATEMENT (unsigned failure_id;)				\
1787*75f6d617Schristos   active_reg_t this_reg;						\
1788*75f6d617Schristos   const UCHAR_T *string_temp;						\
1789*75f6d617Schristos 									\
1790*75f6d617Schristos   assert (!FAIL_STACK_EMPTY ());					\
1791*75f6d617Schristos 									\
1792*75f6d617Schristos   /* Remove failure points and point to how many regs pushed.  */	\
1793*75f6d617Schristos   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1794*75f6d617Schristos   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1795*75f6d617Schristos   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1796*75f6d617Schristos 									\
1797*75f6d617Schristos   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1798*75f6d617Schristos 									\
1799*75f6d617Schristos   DEBUG_POP (&failure_id);						\
1800*75f6d617Schristos   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1801*75f6d617Schristos 									\
1802*75f6d617Schristos   /* If the saved string location is NULL, it came from an		\
1803*75f6d617Schristos      on_failure_keep_string_jump opcode, and we want to throw away the	\
1804*75f6d617Schristos      saved NULL, thus retaining our current position in the string.  */	\
1805*75f6d617Schristos   string_temp = POP_FAILURE_POINTER ();					\
1806*75f6d617Schristos   if (string_temp != NULL)						\
1807*75f6d617Schristos     str = (const CHAR_T *) string_temp;					\
1808*75f6d617Schristos 									\
1809*75f6d617Schristos   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1810*75f6d617Schristos   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1811*75f6d617Schristos   DEBUG_PRINT1 ("'\n");							\
1812*75f6d617Schristos 									\
1813*75f6d617Schristos   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
1814*75f6d617Schristos   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1815*75f6d617Schristos   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1816*75f6d617Schristos 									\
1817*75f6d617Schristos   /* Restore register info.  */						\
1818*75f6d617Schristos   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1819*75f6d617Schristos   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1820*75f6d617Schristos 									\
1821*75f6d617Schristos   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1822*75f6d617Schristos   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1823*75f6d617Schristos 									\
1824*75f6d617Schristos   if (1)								\
1825*75f6d617Schristos     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1826*75f6d617Schristos       {									\
1827*75f6d617Schristos 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1828*75f6d617Schristos 									\
1829*75f6d617Schristos 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1830*75f6d617Schristos 	DEBUG_PRINT2 ("      info: %p\n",				\
1831*75f6d617Schristos 		      reg_info[this_reg].word.pointer);			\
1832*75f6d617Schristos 									\
1833*75f6d617Schristos 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1834*75f6d617Schristos 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1835*75f6d617Schristos 									\
1836*75f6d617Schristos 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1837*75f6d617Schristos 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1838*75f6d617Schristos       }									\
1839*75f6d617Schristos   else									\
1840*75f6d617Schristos     {									\
1841*75f6d617Schristos       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1842*75f6d617Schristos 	{								\
1843*75f6d617Schristos 	  reg_info[this_reg].word.integer = 0;				\
1844*75f6d617Schristos 	  regend[this_reg] = 0;						\
1845*75f6d617Schristos 	  regstart[this_reg] = 0;					\
1846*75f6d617Schristos 	}								\
1847*75f6d617Schristos       highest_active_reg = high_reg;					\
1848*75f6d617Schristos     }									\
1849*75f6d617Schristos 									\
1850*75f6d617Schristos   set_regs_matched_done = 0;						\
1851*75f6d617Schristos   DEBUG_STATEMENT (nfailure_points_popped++);				\
1852*75f6d617Schristos } /* POP_FAILURE_POINT */
1853*75f6d617Schristos 
1854*75f6d617Schristos /* Structure for per-register (a.k.a. per-group) information.
1855*75f6d617Schristos    Other register information, such as the
1856*75f6d617Schristos    starting and ending positions (which are addresses), and the list of
1857*75f6d617Schristos    inner groups (which is a bits list) are maintained in separate
1858*75f6d617Schristos    variables.
1859*75f6d617Schristos 
1860*75f6d617Schristos    We are making a (strictly speaking) nonportable assumption here: that
1861*75f6d617Schristos    the compiler will pack our bit fields into something that fits into
1862*75f6d617Schristos    the type of `word', i.e., is something that fits into one item on the
1863*75f6d617Schristos    failure stack.  */
1864*75f6d617Schristos 
1865*75f6d617Schristos 
1866*75f6d617Schristos /* Declarations and macros for re_match_2.  */
1867*75f6d617Schristos 
1868*75f6d617Schristos typedef union
1869*75f6d617Schristos {
1870*75f6d617Schristos   PREFIX(fail_stack_elt_t) word;
1871*75f6d617Schristos   struct
1872*75f6d617Schristos   {
1873*75f6d617Schristos       /* This field is one if this group can match the empty string,
1874*75f6d617Schristos          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1875*75f6d617Schristos # define MATCH_NULL_UNSET_VALUE 3
1876*75f6d617Schristos     unsigned match_null_string_p : 2;
1877*75f6d617Schristos     unsigned is_active : 1;
1878*75f6d617Schristos     unsigned matched_something : 1;
1879*75f6d617Schristos     unsigned ever_matched_something : 1;
1880*75f6d617Schristos   } bits;
1881*75f6d617Schristos } PREFIX(register_info_type);
1882*75f6d617Schristos 
1883*75f6d617Schristos # ifndef DEFINED_ONCE
1884*75f6d617Schristos #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1885*75f6d617Schristos #  define IS_ACTIVE(R)  ((R).bits.is_active)
1886*75f6d617Schristos #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1887*75f6d617Schristos #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1888*75f6d617Schristos 
1889*75f6d617Schristos 
1890*75f6d617Schristos /* Call this when have matched a real character; it sets `matched' flags
1891*75f6d617Schristos    for the subexpressions which we are currently inside.  Also records
1892*75f6d617Schristos    that those subexprs have matched.  */
1893*75f6d617Schristos #  define SET_REGS_MATCHED()						\
1894*75f6d617Schristos   do									\
1895*75f6d617Schristos     {									\
1896*75f6d617Schristos       if (!set_regs_matched_done)					\
1897*75f6d617Schristos 	{								\
1898*75f6d617Schristos 	  active_reg_t r;						\
1899*75f6d617Schristos 	  set_regs_matched_done = 1;					\
1900*75f6d617Schristos 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1901*75f6d617Schristos 	    {								\
1902*75f6d617Schristos 	      MATCHED_SOMETHING (reg_info[r])				\
1903*75f6d617Schristos 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1904*75f6d617Schristos 		= 1;							\
1905*75f6d617Schristos 	    }								\
1906*75f6d617Schristos 	}								\
1907*75f6d617Schristos     }									\
1908*75f6d617Schristos   while (0)
1909*75f6d617Schristos # endif /* not DEFINED_ONCE */
1910*75f6d617Schristos 
1911*75f6d617Schristos /* Registers are set to a sentinel when they haven't yet matched.  */
1912*75f6d617Schristos static CHAR_T PREFIX(reg_unset_dummy);
1913*75f6d617Schristos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1914*75f6d617Schristos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1915*75f6d617Schristos 
1916*75f6d617Schristos /* Subroutine declarations and macros for regex_compile.  */
1917*75f6d617Schristos static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
1918*75f6d617Schristos static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1919*75f6d617Schristos 				 int arg1, int arg2));
1920*75f6d617Schristos static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1921*75f6d617Schristos 				  int arg, UCHAR_T *end));
1922*75f6d617Schristos static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1923*75f6d617Schristos 				  int arg1, int arg2, UCHAR_T *end));
1924*75f6d617Schristos static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
1925*75f6d617Schristos 					   const CHAR_T *p,
1926*75f6d617Schristos 					   reg_syntax_t syntax));
1927*75f6d617Schristos static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
1928*75f6d617Schristos 					   const CHAR_T *pend,
1929*75f6d617Schristos 					   reg_syntax_t syntax));
1930*75f6d617Schristos # ifdef WCHAR
1931*75f6d617Schristos static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
1932*75f6d617Schristos 						  const CHAR_T **p_ptr,
1933*75f6d617Schristos 						  const CHAR_T *pend,
1934*75f6d617Schristos 						  char *translate,
1935*75f6d617Schristos 						  reg_syntax_t syntax,
1936*75f6d617Schristos 						  UCHAR_T *b,
1937*75f6d617Schristos 						  CHAR_T *char_set));
1938*75f6d617Schristos static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
1939*75f6d617Schristos # else /* BYTE */
1940*75f6d617Schristos static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
1941*75f6d617Schristos 						   const char **p_ptr,
1942*75f6d617Schristos 						   const char *pend,
1943*75f6d617Schristos 						   char *translate,
1944*75f6d617Schristos 						   reg_syntax_t syntax,
1945*75f6d617Schristos 						   unsigned char *b));
1946*75f6d617Schristos # endif /* WCHAR */
1947*75f6d617Schristos 
1948*75f6d617Schristos /* Fetch the next character in the uncompiled pattern---translating it
1949*75f6d617Schristos    if necessary.  Also cast from a signed character in the constant
1950*75f6d617Schristos    string passed to us by the user to an unsigned char that we can use
1951*75f6d617Schristos    as an array index (in, e.g., `translate').  */
1952*75f6d617Schristos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1953*75f6d617Schristos    because it is impossible to allocate 4GB array for some encodings
1954*75f6d617Schristos    which have 4 byte character_set like UCS4.  */
1955*75f6d617Schristos # ifndef PATFETCH
1956*75f6d617Schristos #  ifdef WCHAR
1957*75f6d617Schristos #   define PATFETCH(c)							\
1958*75f6d617Schristos   do {if (p == pend) return REG_EEND;					\
1959*75f6d617Schristos     c = (UCHAR_T) *p++;							\
1960*75f6d617Schristos     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
1961*75f6d617Schristos   } while (0)
1962*75f6d617Schristos #  else /* BYTE */
1963*75f6d617Schristos #   define PATFETCH(c)							\
1964*75f6d617Schristos   do {if (p == pend) return REG_EEND;					\
1965*75f6d617Schristos     c = (unsigned char) *p++;						\
1966*75f6d617Schristos     if (translate) c = (unsigned char) translate[c];			\
1967*75f6d617Schristos   } while (0)
1968*75f6d617Schristos #  endif /* WCHAR */
1969*75f6d617Schristos # endif
1970*75f6d617Schristos 
1971*75f6d617Schristos /* Fetch the next character in the uncompiled pattern, with no
1972*75f6d617Schristos    translation.  */
1973*75f6d617Schristos # define PATFETCH_RAW(c)						\
1974*75f6d617Schristos   do {if (p == pend) return REG_EEND;					\
1975*75f6d617Schristos     c = (UCHAR_T) *p++; 	       					\
1976*75f6d617Schristos   } while (0)
1977*75f6d617Schristos 
1978*75f6d617Schristos /* Go backwards one character in the pattern.  */
1979*75f6d617Schristos # define PATUNFETCH p--
1980*75f6d617Schristos 
1981*75f6d617Schristos 
1982*75f6d617Schristos /* If `translate' is non-null, return translate[D], else just D.  We
1983*75f6d617Schristos    cast the subscript to translate because some data is declared as
1984*75f6d617Schristos    `char *', to avoid warnings when a string constant is passed.  But
1985*75f6d617Schristos    when we use a character as a subscript we must make it unsigned.  */
1986*75f6d617Schristos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1987*75f6d617Schristos    because it is impossible to allocate 4GB array for some encodings
1988*75f6d617Schristos    which have 4 byte character_set like UCS4.  */
1989*75f6d617Schristos 
1990*75f6d617Schristos # ifndef TRANSLATE
1991*75f6d617Schristos #  ifdef WCHAR
1992*75f6d617Schristos #   define TRANSLATE(d) \
1993*75f6d617Schristos   ((translate && ((UCHAR_T) (d)) <= 0xff) \
1994*75f6d617Schristos    ? (char) translate[(unsigned char) (d)] : (d))
1995*75f6d617Schristos # else /* BYTE */
1996*75f6d617Schristos #   define TRANSLATE(d) \
1997*75f6d617Schristos   (translate ? (char) translate[(unsigned char) (d)] : (d))
1998*75f6d617Schristos #  endif /* WCHAR */
1999*75f6d617Schristos # endif
2000*75f6d617Schristos 
2001*75f6d617Schristos 
2002*75f6d617Schristos /* Macros for outputting the compiled pattern into `buffer'.  */
2003*75f6d617Schristos 
2004*75f6d617Schristos /* If the buffer isn't allocated when it comes in, use this.  */
2005*75f6d617Schristos # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
2006*75f6d617Schristos 
2007*75f6d617Schristos /* Make sure we have at least N more bytes of space in buffer.  */
2008*75f6d617Schristos # ifdef WCHAR
2009*75f6d617Schristos #  define GET_BUFFER_SPACE(n)						\
2010*75f6d617Schristos     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
2011*75f6d617Schristos             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
2012*75f6d617Schristos       EXTEND_BUFFER ()
2013*75f6d617Schristos # else /* BYTE */
2014*75f6d617Schristos #  define GET_BUFFER_SPACE(n)						\
2015*75f6d617Schristos     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
2016*75f6d617Schristos       EXTEND_BUFFER ()
2017*75f6d617Schristos # endif /* WCHAR */
2018*75f6d617Schristos 
2019*75f6d617Schristos /* Make sure we have one more byte of buffer space and then add C to it.  */
2020*75f6d617Schristos # define BUF_PUSH(c)							\
2021*75f6d617Schristos   do {									\
2022*75f6d617Schristos     GET_BUFFER_SPACE (1);						\
2023*75f6d617Schristos     *b++ = (UCHAR_T) (c);						\
2024*75f6d617Schristos   } while (0)
2025*75f6d617Schristos 
2026*75f6d617Schristos 
2027*75f6d617Schristos /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
2028*75f6d617Schristos # define BUF_PUSH_2(c1, c2)						\
2029*75f6d617Schristos   do {									\
2030*75f6d617Schristos     GET_BUFFER_SPACE (2);						\
2031*75f6d617Schristos     *b++ = (UCHAR_T) (c1);						\
2032*75f6d617Schristos     *b++ = (UCHAR_T) (c2);						\
2033*75f6d617Schristos   } while (0)
2034*75f6d617Schristos 
2035*75f6d617Schristos 
2036*75f6d617Schristos /* As with BUF_PUSH_2, except for three bytes.  */
2037*75f6d617Schristos # define BUF_PUSH_3(c1, c2, c3)						\
2038*75f6d617Schristos   do {									\
2039*75f6d617Schristos     GET_BUFFER_SPACE (3);						\
2040*75f6d617Schristos     *b++ = (UCHAR_T) (c1);						\
2041*75f6d617Schristos     *b++ = (UCHAR_T) (c2);						\
2042*75f6d617Schristos     *b++ = (UCHAR_T) (c3);						\
2043*75f6d617Schristos   } while (0)
2044*75f6d617Schristos 
2045*75f6d617Schristos /* Store a jump with opcode OP at LOC to location TO.  We store a
2046*75f6d617Schristos    relative address offset by the three bytes the jump itself occupies.  */
2047*75f6d617Schristos # define STORE_JUMP(op, loc, to) \
2048*75f6d617Schristos  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
2049*75f6d617Schristos 
2050*75f6d617Schristos /* Likewise, for a two-argument jump.  */
2051*75f6d617Schristos # define STORE_JUMP2(op, loc, to, arg) \
2052*75f6d617Schristos   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
2053*75f6d617Schristos 
2054*75f6d617Schristos /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
2055*75f6d617Schristos # define INSERT_JUMP(op, loc, to) \
2056*75f6d617Schristos   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
2057*75f6d617Schristos 
2058*75f6d617Schristos /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
2059*75f6d617Schristos # define INSERT_JUMP2(op, loc, to, arg) \
2060*75f6d617Schristos   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
2061*75f6d617Schristos 	      arg, b)
2062*75f6d617Schristos 
2063*75f6d617Schristos /* This is not an arbitrary limit: the arguments which represent offsets
2064*75f6d617Schristos    into the pattern are two bytes long.  So if 2^16 bytes turns out to
2065*75f6d617Schristos    be too small, many things would have to change.  */
2066*75f6d617Schristos /* Any other compiler which, like MSC, has allocation limit below 2^16
2067*75f6d617Schristos    bytes will have to use approach similar to what was done below for
2068*75f6d617Schristos    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
2069*75f6d617Schristos    reallocating to 0 bytes.  Such thing is not going to work too well.
2070*75f6d617Schristos    You have been warned!!  */
2071*75f6d617Schristos # ifndef DEFINED_ONCE
2072*75f6d617Schristos #  if defined _MSC_VER  && !defined WIN32
2073*75f6d617Schristos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2074*75f6d617Schristos    The REALLOC define eliminates a flurry of conversion warnings,
2075*75f6d617Schristos    but is not required. */
2076*75f6d617Schristos #   define MAX_BUF_SIZE  65500L
2077*75f6d617Schristos #   define REALLOC(p,s) realloc ((p), (size_t) (s))
2078*75f6d617Schristos #  else
2079*75f6d617Schristos #   define MAX_BUF_SIZE (1L << 16)
2080*75f6d617Schristos #   define REALLOC(p,s) realloc ((p), (s))
2081*75f6d617Schristos #  endif
2082*75f6d617Schristos 
2083*75f6d617Schristos /* Extend the buffer by twice its current size via realloc and
2084*75f6d617Schristos    reset the pointers that pointed into the old block to point to the
2085*75f6d617Schristos    correct places in the new one.  If extending the buffer results in it
2086*75f6d617Schristos    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
2087*75f6d617Schristos #  if __BOUNDED_POINTERS__
2088*75f6d617Schristos #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2089*75f6d617Schristos #   define MOVE_BUFFER_POINTER(P) \
2090*75f6d617Schristos   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2091*75f6d617Schristos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
2092*75f6d617Schristos   else						\
2093*75f6d617Schristos     {						\
2094*75f6d617Schristos       SET_HIGH_BOUND (b);			\
2095*75f6d617Schristos       SET_HIGH_BOUND (begalt);			\
2096*75f6d617Schristos       if (fixup_alt_jump)			\
2097*75f6d617Schristos 	SET_HIGH_BOUND (fixup_alt_jump);	\
2098*75f6d617Schristos       if (laststart)				\
2099*75f6d617Schristos 	SET_HIGH_BOUND (laststart);		\
2100*75f6d617Schristos       if (pending_exact)			\
2101*75f6d617Schristos 	SET_HIGH_BOUND (pending_exact);		\
2102*75f6d617Schristos     }
2103*75f6d617Schristos #  else
2104*75f6d617Schristos #   define MOVE_BUFFER_POINTER(P) (P) += incr
2105*75f6d617Schristos #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
2106*75f6d617Schristos #  endif
2107*75f6d617Schristos # endif /* not DEFINED_ONCE */
2108*75f6d617Schristos 
2109*75f6d617Schristos # ifdef WCHAR
2110*75f6d617Schristos #  define EXTEND_BUFFER()						\
2111*75f6d617Schristos   do {									\
2112*75f6d617Schristos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2113*75f6d617Schristos     int wchar_count;							\
2114*75f6d617Schristos     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
2115*75f6d617Schristos       return REG_ESIZE;							\
2116*75f6d617Schristos     bufp->allocated <<= 1;						\
2117*75f6d617Schristos     if (bufp->allocated > MAX_BUF_SIZE)					\
2118*75f6d617Schristos       bufp->allocated = MAX_BUF_SIZE;					\
2119*75f6d617Schristos     /* How many characters the new buffer can have?  */			\
2120*75f6d617Schristos     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
2121*75f6d617Schristos     if (wchar_count == 0) wchar_count = 1;				\
2122*75f6d617Schristos     /* Truncate the buffer to CHAR_T align.  */			\
2123*75f6d617Schristos     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
2124*75f6d617Schristos     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
2125*75f6d617Schristos     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2126*75f6d617Schristos     if (COMPILED_BUFFER_VAR == NULL)					\
2127*75f6d617Schristos       return REG_ESPACE;						\
2128*75f6d617Schristos     /* If the buffer moved, move all the pointers into it.  */		\
2129*75f6d617Schristos     if (old_buffer != COMPILED_BUFFER_VAR)				\
2130*75f6d617Schristos       {									\
2131*75f6d617Schristos 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2132*75f6d617Schristos 	MOVE_BUFFER_POINTER (b);					\
2133*75f6d617Schristos 	MOVE_BUFFER_POINTER (begalt);					\
2134*75f6d617Schristos 	if (fixup_alt_jump)						\
2135*75f6d617Schristos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2136*75f6d617Schristos 	if (laststart)							\
2137*75f6d617Schristos 	  MOVE_BUFFER_POINTER (laststart);				\
2138*75f6d617Schristos 	if (pending_exact)						\
2139*75f6d617Schristos 	  MOVE_BUFFER_POINTER (pending_exact);				\
2140*75f6d617Schristos       }									\
2141*75f6d617Schristos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2142*75f6d617Schristos   } while (0)
2143*75f6d617Schristos # else /* BYTE */
2144*75f6d617Schristos #  define EXTEND_BUFFER()						\
2145*75f6d617Schristos   do {									\
2146*75f6d617Schristos     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2147*75f6d617Schristos     if (bufp->allocated == MAX_BUF_SIZE)				\
2148*75f6d617Schristos       return REG_ESIZE;							\
2149*75f6d617Schristos     bufp->allocated <<= 1;						\
2150*75f6d617Schristos     if (bufp->allocated > MAX_BUF_SIZE)					\
2151*75f6d617Schristos       bufp->allocated = MAX_BUF_SIZE;					\
2152*75f6d617Schristos     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
2153*75f6d617Schristos 						bufp->allocated);	\
2154*75f6d617Schristos     if (COMPILED_BUFFER_VAR == NULL)					\
2155*75f6d617Schristos       return REG_ESPACE;						\
2156*75f6d617Schristos     /* If the buffer moved, move all the pointers into it.  */		\
2157*75f6d617Schristos     if (old_buffer != COMPILED_BUFFER_VAR)				\
2158*75f6d617Schristos       {									\
2159*75f6d617Schristos 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2160*75f6d617Schristos 	MOVE_BUFFER_POINTER (b);					\
2161*75f6d617Schristos 	MOVE_BUFFER_POINTER (begalt);					\
2162*75f6d617Schristos 	if (fixup_alt_jump)						\
2163*75f6d617Schristos 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2164*75f6d617Schristos 	if (laststart)							\
2165*75f6d617Schristos 	  MOVE_BUFFER_POINTER (laststart);				\
2166*75f6d617Schristos 	if (pending_exact)						\
2167*75f6d617Schristos 	  MOVE_BUFFER_POINTER (pending_exact);				\
2168*75f6d617Schristos       }									\
2169*75f6d617Schristos     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2170*75f6d617Schristos   } while (0)
2171*75f6d617Schristos # endif /* WCHAR */
2172*75f6d617Schristos 
2173*75f6d617Schristos # ifndef DEFINED_ONCE
2174*75f6d617Schristos /* Since we have one byte reserved for the register number argument to
2175*75f6d617Schristos    {start,stop}_memory, the maximum number of groups we can report
2176*75f6d617Schristos    things about is what fits in that byte.  */
2177*75f6d617Schristos #  define MAX_REGNUM 255
2178*75f6d617Schristos 
2179*75f6d617Schristos /* But patterns can have more than `MAX_REGNUM' registers.  We just
2180*75f6d617Schristos    ignore the excess.  */
2181*75f6d617Schristos typedef unsigned regnum_t;
2182*75f6d617Schristos 
2183*75f6d617Schristos 
2184*75f6d617Schristos /* Macros for the compile stack.  */
2185*75f6d617Schristos 
2186*75f6d617Schristos /* Since offsets can go either forwards or backwards, this type needs to
2187*75f6d617Schristos    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2188*75f6d617Schristos /* int may be not enough when sizeof(int) == 2.  */
2189*75f6d617Schristos typedef long pattern_offset_t;
2190*75f6d617Schristos 
2191*75f6d617Schristos typedef struct
2192*75f6d617Schristos {
2193*75f6d617Schristos   pattern_offset_t begalt_offset;
2194*75f6d617Schristos   pattern_offset_t fixup_alt_jump;
2195*75f6d617Schristos   pattern_offset_t inner_group_offset;
2196*75f6d617Schristos   pattern_offset_t laststart_offset;
2197*75f6d617Schristos   regnum_t regnum;
2198*75f6d617Schristos } compile_stack_elt_t;
2199*75f6d617Schristos 
2200*75f6d617Schristos 
2201*75f6d617Schristos typedef struct
2202*75f6d617Schristos {
2203*75f6d617Schristos   compile_stack_elt_t *stack;
2204*75f6d617Schristos   unsigned size;
2205*75f6d617Schristos   unsigned avail;			/* Offset of next open position.  */
2206*75f6d617Schristos } compile_stack_type;
2207*75f6d617Schristos 
2208*75f6d617Schristos 
2209*75f6d617Schristos #  define INIT_COMPILE_STACK_SIZE 32
2210*75f6d617Schristos 
2211*75f6d617Schristos #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2212*75f6d617Schristos #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2213*75f6d617Schristos 
2214*75f6d617Schristos /* The next available element.  */
2215*75f6d617Schristos #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2216*75f6d617Schristos 
2217*75f6d617Schristos # endif /* not DEFINED_ONCE */
2218*75f6d617Schristos 
2219*75f6d617Schristos /* Set the bit for character C in a list.  */
2220*75f6d617Schristos # ifndef DEFINED_ONCE
2221*75f6d617Schristos #  define SET_LIST_BIT(c)                               \
2222*75f6d617Schristos   (b[((unsigned char) (c)) / BYTEWIDTH]               \
2223*75f6d617Schristos    |= 1 << (((unsigned char) c) % BYTEWIDTH))
2224*75f6d617Schristos # endif /* DEFINED_ONCE */
2225*75f6d617Schristos 
2226*75f6d617Schristos /* Get the next unsigned number in the uncompiled pattern.  */
2227*75f6d617Schristos # define GET_UNSIGNED_NUMBER(num) \
2228*75f6d617Schristos   {									\
2229*75f6d617Schristos     while (p != pend)							\
2230*75f6d617Schristos       {									\
2231*75f6d617Schristos 	PATFETCH (c);							\
2232*75f6d617Schristos 	if (c < '0' || c > '9')						\
2233*75f6d617Schristos 	  break;							\
2234*75f6d617Schristos 	if (num <= RE_DUP_MAX)						\
2235*75f6d617Schristos 	  {								\
2236*75f6d617Schristos 	    if (num < 0)						\
2237*75f6d617Schristos 	      num = 0;							\
2238*75f6d617Schristos 	    num = num * 10 + c - '0';					\
2239*75f6d617Schristos 	  }								\
2240*75f6d617Schristos       }									\
2241*75f6d617Schristos   }
2242*75f6d617Schristos 
2243*75f6d617Schristos # ifndef DEFINED_ONCE
2244*75f6d617Schristos #  if defined _LIBC || WIDE_CHAR_SUPPORT
2245*75f6d617Schristos /* The GNU C library provides support for user-defined character classes
2246*75f6d617Schristos    and the functions from ISO C amendement 1.  */
2247*75f6d617Schristos #   ifdef CHARCLASS_NAME_MAX
2248*75f6d617Schristos #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2249*75f6d617Schristos #   else
2250*75f6d617Schristos /* This shouldn't happen but some implementation might still have this
2251*75f6d617Schristos    problem.  Use a reasonable default value.  */
2252*75f6d617Schristos #    define CHAR_CLASS_MAX_LENGTH 256
2253*75f6d617Schristos #   endif
2254*75f6d617Schristos 
2255*75f6d617Schristos #   ifdef _LIBC
2256*75f6d617Schristos #    define IS_CHAR_CLASS(string) __wctype (string)
2257*75f6d617Schristos #   else
2258*75f6d617Schristos #    define IS_CHAR_CLASS(string) wctype (string)
2259*75f6d617Schristos #   endif
2260*75f6d617Schristos #  else
2261*75f6d617Schristos #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2262*75f6d617Schristos 
2263*75f6d617Schristos #   define IS_CHAR_CLASS(string)					\
2264*75f6d617Schristos    (STREQ (string, "alpha") || STREQ (string, "upper")			\
2265*75f6d617Schristos     || STREQ (string, "lower") || STREQ (string, "digit")		\
2266*75f6d617Schristos     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2267*75f6d617Schristos     || STREQ (string, "space") || STREQ (string, "print")		\
2268*75f6d617Schristos     || STREQ (string, "punct") || STREQ (string, "graph")		\
2269*75f6d617Schristos     || STREQ (string, "cntrl") || STREQ (string, "blank"))
2270*75f6d617Schristos #  endif
2271*75f6d617Schristos # endif /* DEFINED_ONCE */
2272*75f6d617Schristos 
2273*75f6d617Schristos # ifndef MATCH_MAY_ALLOCATE
2274*75f6d617Schristos 
2275*75f6d617Schristos /* If we cannot allocate large objects within re_match_2_internal,
2276*75f6d617Schristos    we make the fail stack and register vectors global.
2277*75f6d617Schristos    The fail stack, we grow to the maximum size when a regexp
2278*75f6d617Schristos    is compiled.
2279*75f6d617Schristos    The register vectors, we adjust in size each time we
2280*75f6d617Schristos    compile a regexp, according to the number of registers it needs.  */
2281*75f6d617Schristos 
2282*75f6d617Schristos static PREFIX(fail_stack_type) fail_stack;
2283*75f6d617Schristos 
2284*75f6d617Schristos /* Size with which the following vectors are currently allocated.
2285*75f6d617Schristos    That is so we can make them bigger as needed,
2286*75f6d617Schristos    but never make them smaller.  */
2287*75f6d617Schristos #  ifdef DEFINED_ONCE
2288*75f6d617Schristos static int regs_allocated_size;
2289*75f6d617Schristos 
2290*75f6d617Schristos static const char **     regstart, **     regend;
2291*75f6d617Schristos static const char ** old_regstart, ** old_regend;
2292*75f6d617Schristos static const char **best_regstart, **best_regend;
2293*75f6d617Schristos static const char **reg_dummy;
2294*75f6d617Schristos #  endif /* DEFINED_ONCE */
2295*75f6d617Schristos 
2296*75f6d617Schristos static PREFIX(register_info_type) *PREFIX(reg_info);
2297*75f6d617Schristos static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2298*75f6d617Schristos 
2299*75f6d617Schristos /* Make the register vectors big enough for NUM_REGS registers,
2300*75f6d617Schristos    but don't make them smaller.  */
2301*75f6d617Schristos 
2302*75f6d617Schristos static void
2303*75f6d617Schristos PREFIX(regex_grow_registers) (num_regs)
2304*75f6d617Schristos      int num_regs;
2305*75f6d617Schristos {
2306*75f6d617Schristos   if (num_regs > regs_allocated_size)
2307*75f6d617Schristos     {
2308*75f6d617Schristos       RETALLOC_IF (regstart,	 num_regs, const char *);
2309*75f6d617Schristos       RETALLOC_IF (regend,	 num_regs, const char *);
2310*75f6d617Schristos       RETALLOC_IF (old_regstart, num_regs, const char *);
2311*75f6d617Schristos       RETALLOC_IF (old_regend,	 num_regs, const char *);
2312*75f6d617Schristos       RETALLOC_IF (best_regstart, num_regs, const char *);
2313*75f6d617Schristos       RETALLOC_IF (best_regend,	 num_regs, const char *);
2314*75f6d617Schristos       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2315*75f6d617Schristos       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2316*75f6d617Schristos       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2317*75f6d617Schristos 
2318*75f6d617Schristos       regs_allocated_size = num_regs;
2319*75f6d617Schristos     }
2320*75f6d617Schristos }
2321*75f6d617Schristos 
2322*75f6d617Schristos # endif /* not MATCH_MAY_ALLOCATE */
2323*75f6d617Schristos 
2324*75f6d617Schristos # ifndef DEFINED_ONCE
2325*75f6d617Schristos static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2326*75f6d617Schristos 						 compile_stack,
2327*75f6d617Schristos 						 regnum_t regnum));
2328*75f6d617Schristos # endif /* not DEFINED_ONCE */
2329*75f6d617Schristos 
2330*75f6d617Schristos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2331*75f6d617Schristos    Returns one of error codes defined in `regex.h', or zero for success.
2332*75f6d617Schristos 
2333*75f6d617Schristos    Assumes the `allocated' (and perhaps `buffer') and `translate'
2334*75f6d617Schristos    fields are set in BUFP on entry.
2335*75f6d617Schristos 
2336*75f6d617Schristos    If it succeeds, results are put in BUFP (if it returns an error, the
2337*75f6d617Schristos    contents of BUFP are undefined):
2338*75f6d617Schristos      `buffer' is the compiled pattern;
2339*75f6d617Schristos      `syntax' is set to SYNTAX;
2340*75f6d617Schristos      `used' is set to the length of the compiled pattern;
2341*75f6d617Schristos      `fastmap_accurate' is zero;
2342*75f6d617Schristos      `re_nsub' is the number of subexpressions in PATTERN;
2343*75f6d617Schristos      `not_bol' and `not_eol' are zero;
2344*75f6d617Schristos 
2345*75f6d617Schristos    The `fastmap' and `newline_anchor' fields are neither
2346*75f6d617Schristos    examined nor set.  */
2347*75f6d617Schristos 
2348*75f6d617Schristos /* Return, freeing storage we allocated.  */
2349*75f6d617Schristos # ifdef WCHAR
2350*75f6d617Schristos #  define FREE_STACK_RETURN(value)		\
2351*75f6d617Schristos   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2352*75f6d617Schristos # else
2353*75f6d617Schristos #  define FREE_STACK_RETURN(value)		\
2354*75f6d617Schristos   return (free (compile_stack.stack), value)
2355*75f6d617Schristos # endif /* WCHAR */
2356*75f6d617Schristos 
2357*75f6d617Schristos static reg_errcode_t
2358*75f6d617Schristos PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
2359*75f6d617Schristos      const char *ARG_PREFIX(pattern);
2360*75f6d617Schristos      size_t ARG_PREFIX(size);
2361*75f6d617Schristos      reg_syntax_t syntax;
2362*75f6d617Schristos      struct re_pattern_buffer *bufp;
2363*75f6d617Schristos {
2364*75f6d617Schristos   /* We fetch characters from PATTERN here.  Even though PATTERN is
2365*75f6d617Schristos      `char *' (i.e., signed), we declare these variables as unsigned, so
2366*75f6d617Schristos      they can be reliably used as array indices.  */
2367*75f6d617Schristos   register UCHAR_T c, c1;
2368*75f6d617Schristos 
2369*75f6d617Schristos #ifdef WCHAR
2370*75f6d617Schristos   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2371*75f6d617Schristos   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2372*75f6d617Schristos   size_t size;
2373*75f6d617Schristos   /* offset buffer for optimization. See convert_mbs_to_wc.  */
2374*75f6d617Schristos   int *mbs_offset = NULL;
2375*75f6d617Schristos   /* It hold whether each wchar_t is binary data or not.  */
2376*75f6d617Schristos   char *is_binary = NULL;
2377*75f6d617Schristos   /* A flag whether exactn is handling binary data or not.  */
2378*75f6d617Schristos   char is_exactn_bin = FALSE;
2379*75f6d617Schristos #endif /* WCHAR */
2380*75f6d617Schristos 
2381*75f6d617Schristos   /* A random temporary spot in PATTERN.  */
2382*75f6d617Schristos   const CHAR_T *p1;
2383*75f6d617Schristos 
2384*75f6d617Schristos   /* Points to the end of the buffer, where we should append.  */
2385*75f6d617Schristos   register UCHAR_T *b;
2386*75f6d617Schristos 
2387*75f6d617Schristos   /* Keeps track of unclosed groups.  */
2388*75f6d617Schristos   compile_stack_type compile_stack;
2389*75f6d617Schristos 
2390*75f6d617Schristos   /* Points to the current (ending) position in the pattern.  */
2391*75f6d617Schristos #ifdef WCHAR
2392*75f6d617Schristos   const CHAR_T *p;
2393*75f6d617Schristos   const CHAR_T *pend;
2394*75f6d617Schristos #else /* BYTE */
2395*75f6d617Schristos   const CHAR_T *p = pattern;
2396*75f6d617Schristos   const CHAR_T *pend = pattern + size;
2397*75f6d617Schristos #endif /* WCHAR */
2398*75f6d617Schristos 
2399*75f6d617Schristos   /* How to translate the characters in the pattern.  */
2400*75f6d617Schristos   RE_TRANSLATE_TYPE translate = bufp->translate;
2401*75f6d617Schristos 
2402*75f6d617Schristos   /* Address of the count-byte of the most recently inserted `exactn'
2403*75f6d617Schristos      command.  This makes it possible to tell if a new exact-match
2404*75f6d617Schristos      character can be added to that command or if the character requires
2405*75f6d617Schristos      a new `exactn' command.  */
2406*75f6d617Schristos   UCHAR_T *pending_exact = 0;
2407*75f6d617Schristos 
2408*75f6d617Schristos   /* Address of start of the most recently finished expression.
2409*75f6d617Schristos      This tells, e.g., postfix * where to find the start of its
2410*75f6d617Schristos      operand.  Reset at the beginning of groups and alternatives.  */
2411*75f6d617Schristos   UCHAR_T *laststart = 0;
2412*75f6d617Schristos 
2413*75f6d617Schristos   /* Address of beginning of regexp, or inside of last group.  */
2414*75f6d617Schristos   UCHAR_T *begalt;
2415*75f6d617Schristos 
2416*75f6d617Schristos   /* Address of the place where a forward jump should go to the end of
2417*75f6d617Schristos      the containing expression.  Each alternative of an `or' -- except the
2418*75f6d617Schristos      last -- ends with a forward jump of this sort.  */
2419*75f6d617Schristos   UCHAR_T *fixup_alt_jump = 0;
2420*75f6d617Schristos 
2421*75f6d617Schristos   /* Counts open-groups as they are encountered.  Remembered for the
2422*75f6d617Schristos      matching close-group on the compile stack, so the same register
2423*75f6d617Schristos      number is put in the stop_memory as the start_memory.  */
2424*75f6d617Schristos   regnum_t regnum = 0;
2425*75f6d617Schristos 
2426*75f6d617Schristos #ifdef WCHAR
2427*75f6d617Schristos   /* Initialize the wchar_t PATTERN and offset_buffer.  */
2428*75f6d617Schristos   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2429*75f6d617Schristos   mbs_offset = TALLOC(csize + 1, int);
2430*75f6d617Schristos   is_binary = TALLOC(csize + 1, char);
2431*75f6d617Schristos   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2432*75f6d617Schristos     {
2433*75f6d617Schristos       free(pattern);
2434*75f6d617Schristos       free(mbs_offset);
2435*75f6d617Schristos       free(is_binary);
2436*75f6d617Schristos       return REG_ESPACE;
2437*75f6d617Schristos     }
2438*75f6d617Schristos   pattern[csize] = L'\0';	/* sentinel */
2439*75f6d617Schristos   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2440*75f6d617Schristos   pend = p + size;
2441*75f6d617Schristos   if (size < 0)
2442*75f6d617Schristos     {
2443*75f6d617Schristos       free(pattern);
2444*75f6d617Schristos       free(mbs_offset);
2445*75f6d617Schristos       free(is_binary);
2446*75f6d617Schristos       return REG_BADPAT;
2447*75f6d617Schristos     }
2448*75f6d617Schristos #endif
2449*75f6d617Schristos 
2450*75f6d617Schristos #ifdef DEBUG
2451*75f6d617Schristos   DEBUG_PRINT1 ("\nCompiling pattern: ");
2452*75f6d617Schristos   if (debug)
2453*75f6d617Schristos     {
2454*75f6d617Schristos       unsigned debug_count;
2455*75f6d617Schristos 
2456*75f6d617Schristos       for (debug_count = 0; debug_count < size; debug_count++)
2457*75f6d617Schristos         PUT_CHAR (pattern[debug_count]);
2458*75f6d617Schristos       putchar ('\n');
2459*75f6d617Schristos     }
2460*75f6d617Schristos #endif /* DEBUG */
2461*75f6d617Schristos 
2462*75f6d617Schristos   /* Initialize the compile stack.  */
2463*75f6d617Schristos   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2464*75f6d617Schristos   if (compile_stack.stack == NULL)
2465*75f6d617Schristos     {
2466*75f6d617Schristos #ifdef WCHAR
2467*75f6d617Schristos       free(pattern);
2468*75f6d617Schristos       free(mbs_offset);
2469*75f6d617Schristos       free(is_binary);
2470*75f6d617Schristos #endif
2471*75f6d617Schristos       return REG_ESPACE;
2472*75f6d617Schristos     }
2473*75f6d617Schristos 
2474*75f6d617Schristos   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2475*75f6d617Schristos   compile_stack.avail = 0;
2476*75f6d617Schristos 
2477*75f6d617Schristos   /* Initialize the pattern buffer.  */
2478*75f6d617Schristos   bufp->syntax = syntax;
2479*75f6d617Schristos   bufp->fastmap_accurate = 0;
2480*75f6d617Schristos   bufp->not_bol = bufp->not_eol = 0;
2481*75f6d617Schristos 
2482*75f6d617Schristos   /* Set `used' to zero, so that if we return an error, the pattern
2483*75f6d617Schristos      printer (for debugging) will think there's no pattern.  We reset it
2484*75f6d617Schristos      at the end.  */
2485*75f6d617Schristos   bufp->used = 0;
2486*75f6d617Schristos 
2487*75f6d617Schristos   /* Always count groups, whether or not bufp->no_sub is set.  */
2488*75f6d617Schristos   bufp->re_nsub = 0;
2489*75f6d617Schristos 
2490*75f6d617Schristos #if !defined emacs && !defined SYNTAX_TABLE
2491*75f6d617Schristos   /* Initialize the syntax table.  */
2492*75f6d617Schristos    init_syntax_once ();
2493*75f6d617Schristos #endif
2494*75f6d617Schristos 
2495*75f6d617Schristos   if (bufp->allocated == 0)
2496*75f6d617Schristos     {
2497*75f6d617Schristos       if (bufp->buffer)
2498*75f6d617Schristos 	{ /* If zero allocated, but buffer is non-null, try to realloc
2499*75f6d617Schristos              enough space.  This loses if buffer's address is bogus, but
2500*75f6d617Schristos              that is the user's responsibility.  */
2501*75f6d617Schristos #ifdef WCHAR
2502*75f6d617Schristos 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2503*75f6d617Schristos 	     buffer.  */
2504*75f6d617Schristos           free(bufp->buffer);
2505*75f6d617Schristos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2506*75f6d617Schristos 					UCHAR_T);
2507*75f6d617Schristos #else
2508*75f6d617Schristos           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2509*75f6d617Schristos #endif /* WCHAR */
2510*75f6d617Schristos         }
2511*75f6d617Schristos       else
2512*75f6d617Schristos         { /* Caller did not allocate a buffer.  Do it for them.  */
2513*75f6d617Schristos           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2514*75f6d617Schristos 					UCHAR_T);
2515*75f6d617Schristos         }
2516*75f6d617Schristos 
2517*75f6d617Schristos       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2518*75f6d617Schristos #ifdef WCHAR
2519*75f6d617Schristos       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2520*75f6d617Schristos #endif /* WCHAR */
2521*75f6d617Schristos       bufp->allocated = INIT_BUF_SIZE;
2522*75f6d617Schristos     }
2523*75f6d617Schristos #ifdef WCHAR
2524*75f6d617Schristos   else
2525*75f6d617Schristos     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2526*75f6d617Schristos #endif
2527*75f6d617Schristos 
2528*75f6d617Schristos   begalt = b = COMPILED_BUFFER_VAR;
2529*75f6d617Schristos 
2530*75f6d617Schristos   /* Loop through the uncompiled pattern until we're at the end.  */
2531*75f6d617Schristos   while (p != pend)
2532*75f6d617Schristos     {
2533*75f6d617Schristos       PATFETCH (c);
2534*75f6d617Schristos 
2535*75f6d617Schristos       switch (c)
2536*75f6d617Schristos         {
2537*75f6d617Schristos         case '^':
2538*75f6d617Schristos           {
2539*75f6d617Schristos             if (   /* If at start of pattern, it's an operator.  */
2540*75f6d617Schristos                    p == pattern + 1
2541*75f6d617Schristos                    /* If context independent, it's an operator.  */
2542*75f6d617Schristos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2543*75f6d617Schristos                    /* Otherwise, depends on what's come before.  */
2544*75f6d617Schristos                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2545*75f6d617Schristos               BUF_PUSH (begline);
2546*75f6d617Schristos             else
2547*75f6d617Schristos               goto normal_char;
2548*75f6d617Schristos           }
2549*75f6d617Schristos           break;
2550*75f6d617Schristos 
2551*75f6d617Schristos 
2552*75f6d617Schristos         case '$':
2553*75f6d617Schristos           {
2554*75f6d617Schristos             if (   /* If at end of pattern, it's an operator.  */
2555*75f6d617Schristos                    p == pend
2556*75f6d617Schristos                    /* If context independent, it's an operator.  */
2557*75f6d617Schristos                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2558*75f6d617Schristos                    /* Otherwise, depends on what's next.  */
2559*75f6d617Schristos                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
2560*75f6d617Schristos                BUF_PUSH (endline);
2561*75f6d617Schristos              else
2562*75f6d617Schristos                goto normal_char;
2563*75f6d617Schristos            }
2564*75f6d617Schristos            break;
2565*75f6d617Schristos 
2566*75f6d617Schristos 
2567*75f6d617Schristos 	case '+':
2568*75f6d617Schristos         case '?':
2569*75f6d617Schristos           if ((syntax & RE_BK_PLUS_QM)
2570*75f6d617Schristos               || (syntax & RE_LIMITED_OPS))
2571*75f6d617Schristos             goto normal_char;
2572*75f6d617Schristos         handle_plus:
2573*75f6d617Schristos         case '*':
2574*75f6d617Schristos           /* If there is no previous pattern... */
2575*75f6d617Schristos           if (!laststart)
2576*75f6d617Schristos             {
2577*75f6d617Schristos               if (syntax & RE_CONTEXT_INVALID_OPS)
2578*75f6d617Schristos                 FREE_STACK_RETURN (REG_BADRPT);
2579*75f6d617Schristos               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2580*75f6d617Schristos                 goto normal_char;
2581*75f6d617Schristos             }
2582*75f6d617Schristos 
2583*75f6d617Schristos           {
2584*75f6d617Schristos             /* Are we optimizing this jump?  */
2585*75f6d617Schristos             boolean keep_string_p = false;
2586*75f6d617Schristos 
2587*75f6d617Schristos             /* 1 means zero (many) matches is allowed.  */
2588*75f6d617Schristos             char zero_times_ok = 0, many_times_ok = 0;
2589*75f6d617Schristos 
2590*75f6d617Schristos             /* If there is a sequence of repetition chars, collapse it
2591*75f6d617Schristos                down to just one (the right one).  We can't combine
2592*75f6d617Schristos                interval operators with these because of, e.g., `a{2}*',
2593*75f6d617Schristos                which should only match an even number of `a's.  */
2594*75f6d617Schristos 
2595*75f6d617Schristos             for (;;)
2596*75f6d617Schristos               {
2597*75f6d617Schristos                 zero_times_ok |= c != '+';
2598*75f6d617Schristos                 many_times_ok |= c != '?';
2599*75f6d617Schristos 
2600*75f6d617Schristos                 if (p == pend)
2601*75f6d617Schristos                   break;
2602*75f6d617Schristos 
2603*75f6d617Schristos                 PATFETCH (c);
2604*75f6d617Schristos 
2605*75f6d617Schristos                 if (c == '*'
2606*75f6d617Schristos                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2607*75f6d617Schristos                   ;
2608*75f6d617Schristos 
2609*75f6d617Schristos                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2610*75f6d617Schristos                   {
2611*75f6d617Schristos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2612*75f6d617Schristos 
2613*75f6d617Schristos                     PATFETCH (c1);
2614*75f6d617Schristos                     if (!(c1 == '+' || c1 == '?'))
2615*75f6d617Schristos                       {
2616*75f6d617Schristos                         PATUNFETCH;
2617*75f6d617Schristos                         PATUNFETCH;
2618*75f6d617Schristos                         break;
2619*75f6d617Schristos                       }
2620*75f6d617Schristos 
2621*75f6d617Schristos                     c = c1;
2622*75f6d617Schristos                   }
2623*75f6d617Schristos                 else
2624*75f6d617Schristos                   {
2625*75f6d617Schristos                     PATUNFETCH;
2626*75f6d617Schristos                     break;
2627*75f6d617Schristos                   }
2628*75f6d617Schristos 
2629*75f6d617Schristos                 /* If we get here, we found another repeat character.  */
2630*75f6d617Schristos                }
2631*75f6d617Schristos 
2632*75f6d617Schristos             /* Star, etc. applied to an empty pattern is equivalent
2633*75f6d617Schristos                to an empty pattern.  */
2634*75f6d617Schristos             if (!laststart)
2635*75f6d617Schristos               break;
2636*75f6d617Schristos 
2637*75f6d617Schristos             /* Now we know whether or not zero matches is allowed
2638*75f6d617Schristos                and also whether or not two or more matches is allowed.  */
2639*75f6d617Schristos             if (many_times_ok)
2640*75f6d617Schristos               { /* More than one repetition is allowed, so put in at the
2641*75f6d617Schristos                    end a backward relative jump from `b' to before the next
2642*75f6d617Schristos                    jump we're going to put in below (which jumps from
2643*75f6d617Schristos                    laststart to after this jump).
2644*75f6d617Schristos 
2645*75f6d617Schristos                    But if we are at the `*' in the exact sequence `.*\n',
2646*75f6d617Schristos                    insert an unconditional jump backwards to the .,
2647*75f6d617Schristos                    instead of the beginning of the loop.  This way we only
2648*75f6d617Schristos                    push a failure point once, instead of every time
2649*75f6d617Schristos                    through the loop.  */
2650*75f6d617Schristos                 assert (p - 1 > pattern);
2651*75f6d617Schristos 
2652*75f6d617Schristos                 /* Allocate the space for the jump.  */
2653*75f6d617Schristos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2654*75f6d617Schristos 
2655*75f6d617Schristos                 /* We know we are not at the first character of the pattern,
2656*75f6d617Schristos                    because laststart was nonzero.  And we've already
2657*75f6d617Schristos                    incremented `p', by the way, to be the character after
2658*75f6d617Schristos                    the `*'.  Do we have to do something analogous here
2659*75f6d617Schristos                    for null bytes, because of RE_DOT_NOT_NULL?  */
2660*75f6d617Schristos                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2661*75f6d617Schristos 		    && zero_times_ok
2662*75f6d617Schristos                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2663*75f6d617Schristos                     && !(syntax & RE_DOT_NEWLINE))
2664*75f6d617Schristos                   { /* We have .*\n.  */
2665*75f6d617Schristos                     STORE_JUMP (jump, b, laststart);
2666*75f6d617Schristos                     keep_string_p = true;
2667*75f6d617Schristos                   }
2668*75f6d617Schristos                 else
2669*75f6d617Schristos                   /* Anything else.  */
2670*75f6d617Schristos                   STORE_JUMP (maybe_pop_jump, b, laststart -
2671*75f6d617Schristos 			      (1 + OFFSET_ADDRESS_SIZE));
2672*75f6d617Schristos 
2673*75f6d617Schristos                 /* We've added more stuff to the buffer.  */
2674*75f6d617Schristos                 b += 1 + OFFSET_ADDRESS_SIZE;
2675*75f6d617Schristos               }
2676*75f6d617Schristos 
2677*75f6d617Schristos             /* On failure, jump from laststart to b + 3, which will be the
2678*75f6d617Schristos                end of the buffer after this jump is inserted.  */
2679*75f6d617Schristos 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2680*75f6d617Schristos 	       'b + 3'.  */
2681*75f6d617Schristos             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2682*75f6d617Schristos             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2683*75f6d617Schristos                                        : on_failure_jump,
2684*75f6d617Schristos                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2685*75f6d617Schristos             pending_exact = 0;
2686*75f6d617Schristos             b += 1 + OFFSET_ADDRESS_SIZE;
2687*75f6d617Schristos 
2688*75f6d617Schristos             if (!zero_times_ok)
2689*75f6d617Schristos               {
2690*75f6d617Schristos                 /* At least one repetition is required, so insert a
2691*75f6d617Schristos                    `dummy_failure_jump' before the initial
2692*75f6d617Schristos                    `on_failure_jump' instruction of the loop. This
2693*75f6d617Schristos                    effects a skip over that instruction the first time
2694*75f6d617Schristos                    we hit that loop.  */
2695*75f6d617Schristos                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2696*75f6d617Schristos                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2697*75f6d617Schristos 			     2 + 2 * OFFSET_ADDRESS_SIZE);
2698*75f6d617Schristos                 b += 1 + OFFSET_ADDRESS_SIZE;
2699*75f6d617Schristos               }
2700*75f6d617Schristos             }
2701*75f6d617Schristos 	  break;
2702*75f6d617Schristos 
2703*75f6d617Schristos 
2704*75f6d617Schristos 	case '.':
2705*75f6d617Schristos           laststart = b;
2706*75f6d617Schristos           BUF_PUSH (anychar);
2707*75f6d617Schristos           break;
2708*75f6d617Schristos 
2709*75f6d617Schristos 
2710*75f6d617Schristos         case '[':
2711*75f6d617Schristos           {
2712*75f6d617Schristos             boolean had_char_class = false;
2713*75f6d617Schristos #ifdef WCHAR
2714*75f6d617Schristos 	    CHAR_T range_start = 0xffffffff;
2715*75f6d617Schristos #else
2716*75f6d617Schristos 	    unsigned int range_start = 0xffffffff;
2717*75f6d617Schristos #endif
2718*75f6d617Schristos             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2719*75f6d617Schristos 
2720*75f6d617Schristos #ifdef WCHAR
2721*75f6d617Schristos 	    /* We assume a charset(_not) structure as a wchar_t array.
2722*75f6d617Schristos 	       charset[0] = (re_opcode_t) charset(_not)
2723*75f6d617Schristos                charset[1] = l (= length of char_classes)
2724*75f6d617Schristos                charset[2] = m (= length of collating_symbols)
2725*75f6d617Schristos                charset[3] = n (= length of equivalence_classes)
2726*75f6d617Schristos 	       charset[4] = o (= length of char_ranges)
2727*75f6d617Schristos 	       charset[5] = p (= length of chars)
2728*75f6d617Schristos 
2729*75f6d617Schristos                charset[6] = char_class (wctype_t)
2730*75f6d617Schristos                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2731*75f6d617Schristos                          ...
2732*75f6d617Schristos                charset[l+5]  = char_class (wctype_t)
2733*75f6d617Schristos 
2734*75f6d617Schristos                charset[l+6]  = collating_symbol (wchar_t)
2735*75f6d617Schristos                             ...
2736*75f6d617Schristos                charset[l+m+5]  = collating_symbol (wchar_t)
2737*75f6d617Schristos 					ifdef _LIBC we use the index if
2738*75f6d617Schristos 					_NL_COLLATE_SYMB_EXTRAMB instead of
2739*75f6d617Schristos 					wchar_t string.
2740*75f6d617Schristos 
2741*75f6d617Schristos                charset[l+m+6]  = equivalence_classes (wchar_t)
2742*75f6d617Schristos                               ...
2743*75f6d617Schristos                charset[l+m+n+5]  = equivalence_classes (wchar_t)
2744*75f6d617Schristos 					ifdef _LIBC we use the index in
2745*75f6d617Schristos 					_NL_COLLATE_WEIGHT instead of
2746*75f6d617Schristos 					wchar_t string.
2747*75f6d617Schristos 
2748*75f6d617Schristos 	       charset[l+m+n+6] = range_start
2749*75f6d617Schristos 	       charset[l+m+n+7] = range_end
2750*75f6d617Schristos 	                       ...
2751*75f6d617Schristos 	       charset[l+m+n+2o+4] = range_start
2752*75f6d617Schristos 	       charset[l+m+n+2o+5] = range_end
2753*75f6d617Schristos 					ifdef _LIBC we use the value looked up
2754*75f6d617Schristos 					in _NL_COLLATE_COLLSEQ instead of
2755*75f6d617Schristos 					wchar_t character.
2756*75f6d617Schristos 
2757*75f6d617Schristos 	       charset[l+m+n+2o+6] = char
2758*75f6d617Schristos 	                          ...
2759*75f6d617Schristos 	       charset[l+m+n+2o+p+5] = char
2760*75f6d617Schristos 
2761*75f6d617Schristos 	     */
2762*75f6d617Schristos 
2763*75f6d617Schristos 	    /* We need at least 6 spaces: the opcode, the length of
2764*75f6d617Schristos                char_classes, the length of collating_symbols, the length of
2765*75f6d617Schristos                equivalence_classes, the length of char_ranges, the length of
2766*75f6d617Schristos                chars.  */
2767*75f6d617Schristos 	    GET_BUFFER_SPACE (6);
2768*75f6d617Schristos 
2769*75f6d617Schristos 	    /* Save b as laststart. And We use laststart as the pointer
2770*75f6d617Schristos 	       to the first element of the charset here.
2771*75f6d617Schristos 	       In other words, laststart[i] indicates charset[i].  */
2772*75f6d617Schristos             laststart = b;
2773*75f6d617Schristos 
2774*75f6d617Schristos             /* We test `*p == '^' twice, instead of using an if
2775*75f6d617Schristos                statement, so we only need one BUF_PUSH.  */
2776*75f6d617Schristos             BUF_PUSH (*p == '^' ? charset_not : charset);
2777*75f6d617Schristos             if (*p == '^')
2778*75f6d617Schristos               p++;
2779*75f6d617Schristos 
2780*75f6d617Schristos             /* Push the length of char_classes, the length of
2781*75f6d617Schristos                collating_symbols, the length of equivalence_classes, the
2782*75f6d617Schristos                length of char_ranges and the length of chars.  */
2783*75f6d617Schristos             BUF_PUSH_3 (0, 0, 0);
2784*75f6d617Schristos             BUF_PUSH_2 (0, 0);
2785*75f6d617Schristos 
2786*75f6d617Schristos             /* Remember the first position in the bracket expression.  */
2787*75f6d617Schristos             p1 = p;
2788*75f6d617Schristos 
2789*75f6d617Schristos             /* charset_not matches newline according to a syntax bit.  */
2790*75f6d617Schristos             if ((re_opcode_t) b[-6] == charset_not
2791*75f6d617Schristos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2792*75f6d617Schristos 	      {
2793*75f6d617Schristos 		BUF_PUSH('\n');
2794*75f6d617Schristos 		laststart[5]++; /* Update the length of characters  */
2795*75f6d617Schristos 	      }
2796*75f6d617Schristos 
2797*75f6d617Schristos             /* Read in characters and ranges, setting map bits.  */
2798*75f6d617Schristos             for (;;)
2799*75f6d617Schristos               {
2800*75f6d617Schristos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801*75f6d617Schristos 
2802*75f6d617Schristos                 PATFETCH (c);
2803*75f6d617Schristos 
2804*75f6d617Schristos                 /* \ might escape characters inside [...] and [^...].  */
2805*75f6d617Schristos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2806*75f6d617Schristos                   {
2807*75f6d617Schristos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2808*75f6d617Schristos 
2809*75f6d617Schristos                     PATFETCH (c1);
2810*75f6d617Schristos 		    BUF_PUSH(c1);
2811*75f6d617Schristos 		    laststart[5]++; /* Update the length of chars  */
2812*75f6d617Schristos 		    range_start = c1;
2813*75f6d617Schristos                     continue;
2814*75f6d617Schristos                   }
2815*75f6d617Schristos 
2816*75f6d617Schristos                 /* Could be the end of the bracket expression.  If it's
2817*75f6d617Schristos                    not (i.e., when the bracket expression is `[]' so
2818*75f6d617Schristos                    far), the ']' character bit gets set way below.  */
2819*75f6d617Schristos                 if (c == ']' && p != p1 + 1)
2820*75f6d617Schristos                   break;
2821*75f6d617Schristos 
2822*75f6d617Schristos                 /* Look ahead to see if it's a range when the last thing
2823*75f6d617Schristos                    was a character class.  */
2824*75f6d617Schristos                 if (had_char_class && c == '-' && *p != ']')
2825*75f6d617Schristos                   FREE_STACK_RETURN (REG_ERANGE);
2826*75f6d617Schristos 
2827*75f6d617Schristos                 /* Look ahead to see if it's a range when the last thing
2828*75f6d617Schristos                    was a character: if this is a hyphen not at the
2829*75f6d617Schristos                    beginning or the end of a list, then it's the range
2830*75f6d617Schristos                    operator.  */
2831*75f6d617Schristos                 if (c == '-'
2832*75f6d617Schristos                     && !(p - 2 >= pattern && p[-2] == '[')
2833*75f6d617Schristos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2834*75f6d617Schristos                     && *p != ']')
2835*75f6d617Schristos                   {
2836*75f6d617Schristos                     reg_errcode_t ret;
2837*75f6d617Schristos 		    /* Allocate the space for range_start and range_end.  */
2838*75f6d617Schristos 		    GET_BUFFER_SPACE (2);
2839*75f6d617Schristos 		    /* Update the pointer to indicate end of buffer.  */
2840*75f6d617Schristos                     b += 2;
2841*75f6d617Schristos                     ret = wcs_compile_range (range_start, &p, pend, translate,
2842*75f6d617Schristos                                          syntax, b, laststart);
2843*75f6d617Schristos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2844*75f6d617Schristos                     range_start = 0xffffffff;
2845*75f6d617Schristos                   }
2846*75f6d617Schristos                 else if (p[0] == '-' && p[1] != ']')
2847*75f6d617Schristos                   { /* This handles ranges made up of characters only.  */
2848*75f6d617Schristos                     reg_errcode_t ret;
2849*75f6d617Schristos 
2850*75f6d617Schristos 		    /* Move past the `-'.  */
2851*75f6d617Schristos                     PATFETCH (c1);
2852*75f6d617Schristos 		    /* Allocate the space for range_start and range_end.  */
2853*75f6d617Schristos 		    GET_BUFFER_SPACE (2);
2854*75f6d617Schristos 		    /* Update the pointer to indicate end of buffer.  */
2855*75f6d617Schristos                     b += 2;
2856*75f6d617Schristos                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2857*75f6d617Schristos                                          laststart);
2858*75f6d617Schristos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2859*75f6d617Schristos 		    range_start = 0xffffffff;
2860*75f6d617Schristos                   }
2861*75f6d617Schristos 
2862*75f6d617Schristos                 /* See if we're at the beginning of a possible character
2863*75f6d617Schristos                    class.  */
2864*75f6d617Schristos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2865*75f6d617Schristos                   { /* Leave room for the null.  */
2866*75f6d617Schristos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2867*75f6d617Schristos 
2868*75f6d617Schristos                     PATFETCH (c);
2869*75f6d617Schristos                     c1 = 0;
2870*75f6d617Schristos 
2871*75f6d617Schristos                     /* If pattern is `[[:'.  */
2872*75f6d617Schristos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2873*75f6d617Schristos 
2874*75f6d617Schristos                     for (;;)
2875*75f6d617Schristos                       {
2876*75f6d617Schristos                         PATFETCH (c);
2877*75f6d617Schristos                         if ((c == ':' && *p == ']') || p == pend)
2878*75f6d617Schristos                           break;
2879*75f6d617Schristos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
2880*75f6d617Schristos 			  str[c1++] = c;
2881*75f6d617Schristos 			else
2882*75f6d617Schristos 			  /* This is in any case an invalid class name.  */
2883*75f6d617Schristos 			  str[0] = '\0';
2884*75f6d617Schristos                       }
2885*75f6d617Schristos                     str[c1] = '\0';
2886*75f6d617Schristos 
2887*75f6d617Schristos                     /* If isn't a word bracketed by `[:' and `:]':
2888*75f6d617Schristos                        undo the ending character, the letters, and leave
2889*75f6d617Schristos                        the leading `:' and `[' (but store them as character).  */
2890*75f6d617Schristos                     if (c == ':' && *p == ']')
2891*75f6d617Schristos                       {
2892*75f6d617Schristos 			wctype_t wt;
2893*75f6d617Schristos 			uintptr_t alignedp;
2894*75f6d617Schristos 
2895*75f6d617Schristos 			/* Query the character class as wctype_t.  */
2896*75f6d617Schristos 			wt = IS_CHAR_CLASS (str);
2897*75f6d617Schristos 			if (wt == 0)
2898*75f6d617Schristos 			  FREE_STACK_RETURN (REG_ECTYPE);
2899*75f6d617Schristos 
2900*75f6d617Schristos                         /* Throw away the ] at the end of the character
2901*75f6d617Schristos                            class.  */
2902*75f6d617Schristos                         PATFETCH (c);
2903*75f6d617Schristos 
2904*75f6d617Schristos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2905*75f6d617Schristos 
2906*75f6d617Schristos 			/* Allocate the space for character class.  */
2907*75f6d617Schristos                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2908*75f6d617Schristos 			/* Update the pointer to indicate end of buffer.  */
2909*75f6d617Schristos                         b += CHAR_CLASS_SIZE;
2910*75f6d617Schristos 			/* Move data which follow character classes
2911*75f6d617Schristos 			    not to violate the data.  */
2912*75f6d617Schristos                         insert_space(CHAR_CLASS_SIZE,
2913*75f6d617Schristos 				     laststart + 6 + laststart[1],
2914*75f6d617Schristos 				     b - 1);
2915*75f6d617Schristos 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2916*75f6d617Schristos 				    + __alignof__(wctype_t) - 1)
2917*75f6d617Schristos 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2918*75f6d617Schristos 			/* Store the character class.  */
2919*75f6d617Schristos                         *((wctype_t*)alignedp) = wt;
2920*75f6d617Schristos                         /* Update length of char_classes */
2921*75f6d617Schristos                         laststart[1] += CHAR_CLASS_SIZE;
2922*75f6d617Schristos 
2923*75f6d617Schristos                         had_char_class = true;
2924*75f6d617Schristos                       }
2925*75f6d617Schristos                     else
2926*75f6d617Schristos                       {
2927*75f6d617Schristos                         c1++;
2928*75f6d617Schristos                         while (c1--)
2929*75f6d617Schristos                           PATUNFETCH;
2930*75f6d617Schristos                         BUF_PUSH ('[');
2931*75f6d617Schristos                         BUF_PUSH (':');
2932*75f6d617Schristos                         laststart[5] += 2; /* Update the length of characters  */
2933*75f6d617Schristos 			range_start = ':';
2934*75f6d617Schristos                         had_char_class = false;
2935*75f6d617Schristos                       }
2936*75f6d617Schristos                   }
2937*75f6d617Schristos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2938*75f6d617Schristos 							  || *p == '.'))
2939*75f6d617Schristos 		  {
2940*75f6d617Schristos 		    CHAR_T str[128];	/* Should be large enough.  */
2941*75f6d617Schristos 		    CHAR_T delim = *p; /* '=' or '.'  */
2942*75f6d617Schristos # ifdef _LIBC
2943*75f6d617Schristos 		    uint32_t nrules =
2944*75f6d617Schristos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2945*75f6d617Schristos # endif
2946*75f6d617Schristos 		    PATFETCH (c);
2947*75f6d617Schristos 		    c1 = 0;
2948*75f6d617Schristos 
2949*75f6d617Schristos 		    /* If pattern is `[[=' or '[[.'.  */
2950*75f6d617Schristos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2951*75f6d617Schristos 
2952*75f6d617Schristos 		    for (;;)
2953*75f6d617Schristos 		      {
2954*75f6d617Schristos 			PATFETCH (c);
2955*75f6d617Schristos 			if ((c == delim && *p == ']') || p == pend)
2956*75f6d617Schristos 			  break;
2957*75f6d617Schristos 			if (c1 < sizeof (str) - 1)
2958*75f6d617Schristos 			  str[c1++] = c;
2959*75f6d617Schristos 			else
2960*75f6d617Schristos 			  /* This is in any case an invalid class name.  */
2961*75f6d617Schristos 			  str[0] = '\0';
2962*75f6d617Schristos                       }
2963*75f6d617Schristos 		    str[c1] = '\0';
2964*75f6d617Schristos 
2965*75f6d617Schristos 		    if (c == delim && *p == ']' && str[0] != '\0')
2966*75f6d617Schristos 		      {
2967*75f6d617Schristos                         unsigned int i, offset;
2968*75f6d617Schristos 			/* If we have no collation data we use the default
2969*75f6d617Schristos 			   collation in which each character is in a class
2970*75f6d617Schristos 			   by itself.  It also means that ASCII is the
2971*75f6d617Schristos 			   character set and therefore we cannot have character
2972*75f6d617Schristos 			   with more than one byte in the multibyte
2973*75f6d617Schristos 			   representation.  */
2974*75f6d617Schristos 
2975*75f6d617Schristos                         /* If not defined _LIBC, we push the name and
2976*75f6d617Schristos 			   `\0' for the sake of matching performance.  */
2977*75f6d617Schristos 			int datasize = c1 + 1;
2978*75f6d617Schristos 
2979*75f6d617Schristos # ifdef _LIBC
2980*75f6d617Schristos 			int32_t idx = 0;
2981*75f6d617Schristos 			if (nrules == 0)
2982*75f6d617Schristos # endif
2983*75f6d617Schristos 			  {
2984*75f6d617Schristos 			    if (c1 != 1)
2985*75f6d617Schristos 			      FREE_STACK_RETURN (REG_ECOLLATE);
2986*75f6d617Schristos 			  }
2987*75f6d617Schristos # ifdef _LIBC
2988*75f6d617Schristos 			else
2989*75f6d617Schristos 			  {
2990*75f6d617Schristos 			    const int32_t *table;
2991*75f6d617Schristos 			    const int32_t *weights;
2992*75f6d617Schristos 			    const int32_t *extra;
2993*75f6d617Schristos 			    const int32_t *indirect;
2994*75f6d617Schristos 			    wint_t *cp;
2995*75f6d617Schristos 
2996*75f6d617Schristos 			    /* This #include defines a local function!  */
2997*75f6d617Schristos #  include <locale/weightwc.h>
2998*75f6d617Schristos 
2999*75f6d617Schristos 			    if(delim == '=')
3000*75f6d617Schristos 			      {
3001*75f6d617Schristos 				/* We push the index for equivalence class.  */
3002*75f6d617Schristos 				cp = (wint_t*)str;
3003*75f6d617Schristos 
3004*75f6d617Schristos 				table = (const int32_t *)
3005*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3006*75f6d617Schristos 					       _NL_COLLATE_TABLEWC);
3007*75f6d617Schristos 				weights = (const int32_t *)
3008*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3009*75f6d617Schristos 					       _NL_COLLATE_WEIGHTWC);
3010*75f6d617Schristos 				extra = (const int32_t *)
3011*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3012*75f6d617Schristos 					       _NL_COLLATE_EXTRAWC);
3013*75f6d617Schristos 				indirect = (const int32_t *)
3014*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3015*75f6d617Schristos 					       _NL_COLLATE_INDIRECTWC);
3016*75f6d617Schristos 
3017*75f6d617Schristos 				idx = findidx ((const wint_t**)&cp);
3018*75f6d617Schristos 				if (idx == 0 || cp < (wint_t*) str + c1)
3019*75f6d617Schristos 				  /* This is no valid character.  */
3020*75f6d617Schristos 				  FREE_STACK_RETURN (REG_ECOLLATE);
3021*75f6d617Schristos 
3022*75f6d617Schristos 				str[0] = (wchar_t)idx;
3023*75f6d617Schristos 			      }
3024*75f6d617Schristos 			    else /* delim == '.' */
3025*75f6d617Schristos 			      {
3026*75f6d617Schristos 				/* We push collation sequence value
3027*75f6d617Schristos 				   for collating symbol.  */
3028*75f6d617Schristos 				int32_t table_size;
3029*75f6d617Schristos 				const int32_t *symb_table;
3030*75f6d617Schristos 				const unsigned char *extra;
3031*75f6d617Schristos 				int32_t idx;
3032*75f6d617Schristos 				int32_t elem;
3033*75f6d617Schristos 				int32_t second;
3034*75f6d617Schristos 				int32_t hash;
3035*75f6d617Schristos 				char char_str[c1];
3036*75f6d617Schristos 
3037*75f6d617Schristos 				/* We have to convert the name to a single-byte
3038*75f6d617Schristos 				   string.  This is possible since the names
3039*75f6d617Schristos 				   consist of ASCII characters and the internal
3040*75f6d617Schristos 				   representation is UCS4.  */
3041*75f6d617Schristos 				for (i = 0; i < c1; ++i)
3042*75f6d617Schristos 				  char_str[i] = str[i];
3043*75f6d617Schristos 
3044*75f6d617Schristos 				table_size =
3045*75f6d617Schristos 				  _NL_CURRENT_WORD (LC_COLLATE,
3046*75f6d617Schristos 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
3047*75f6d617Schristos 				symb_table = (const int32_t *)
3048*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3049*75f6d617Schristos 					       _NL_COLLATE_SYMB_TABLEMB);
3050*75f6d617Schristos 				extra = (const unsigned char *)
3051*75f6d617Schristos 				  _NL_CURRENT (LC_COLLATE,
3052*75f6d617Schristos 					       _NL_COLLATE_SYMB_EXTRAMB);
3053*75f6d617Schristos 
3054*75f6d617Schristos 				/* Locate the character in the hashing table.  */
3055*75f6d617Schristos 				hash = elem_hash (char_str, c1);
3056*75f6d617Schristos 
3057*75f6d617Schristos 				idx = 0;
3058*75f6d617Schristos 				elem = hash % table_size;
3059*75f6d617Schristos 				second = hash % (table_size - 2);
3060*75f6d617Schristos 				while (symb_table[2 * elem] != 0)
3061*75f6d617Schristos 				  {
3062*75f6d617Schristos 				    /* First compare the hashing value.  */
3063*75f6d617Schristos 				    if (symb_table[2 * elem] == hash
3064*75f6d617Schristos 					&& c1 == extra[symb_table[2 * elem + 1]]
3065*75f6d617Schristos 					&& memcmp (char_str,
3066*75f6d617Schristos 						   &extra[symb_table[2 * elem + 1]
3067*75f6d617Schristos 							 + 1], c1) == 0)
3068*75f6d617Schristos 				      {
3069*75f6d617Schristos 					/* Yep, this is the entry.  */
3070*75f6d617Schristos 					idx = symb_table[2 * elem + 1];
3071*75f6d617Schristos 					idx += 1 + extra[idx];
3072*75f6d617Schristos 					break;
3073*75f6d617Schristos 				      }
3074*75f6d617Schristos 
3075*75f6d617Schristos 				    /* Next entry.  */
3076*75f6d617Schristos 				    elem += second;
3077*75f6d617Schristos 				  }
3078*75f6d617Schristos 
3079*75f6d617Schristos 				if (symb_table[2 * elem] != 0)
3080*75f6d617Schristos 				  {
3081*75f6d617Schristos 				    /* Compute the index of the byte sequence
3082*75f6d617Schristos 				       in the table.  */
3083*75f6d617Schristos 				    idx += 1 + extra[idx];
3084*75f6d617Schristos 				    /* Adjust for the alignment.  */
3085*75f6d617Schristos 				    idx = (idx + 3) & ~3;
3086*75f6d617Schristos 
3087*75f6d617Schristos 				    str[0] = (wchar_t) idx + 4;
3088*75f6d617Schristos 				  }
3089*75f6d617Schristos 				else if (symb_table[2 * elem] == 0 && c1 == 1)
3090*75f6d617Schristos 				  {
3091*75f6d617Schristos 				    /* No valid character.  Match it as a
3092*75f6d617Schristos 				       single byte character.  */
3093*75f6d617Schristos 				    had_char_class = false;
3094*75f6d617Schristos 				    BUF_PUSH(str[0]);
3095*75f6d617Schristos 				    /* Update the length of characters  */
3096*75f6d617Schristos 				    laststart[5]++;
3097*75f6d617Schristos 				    range_start = str[0];
3098*75f6d617Schristos 
3099*75f6d617Schristos 				    /* Throw away the ] at the end of the
3100*75f6d617Schristos 				       collating symbol.  */
3101*75f6d617Schristos 				    PATFETCH (c);
3102*75f6d617Schristos 				    /* exit from the switch block.  */
3103*75f6d617Schristos 				    continue;
3104*75f6d617Schristos 				  }
3105*75f6d617Schristos 				else
3106*75f6d617Schristos 				  FREE_STACK_RETURN (REG_ECOLLATE);
3107*75f6d617Schristos 			      }
3108*75f6d617Schristos 			    datasize = 1;
3109*75f6d617Schristos 			  }
3110*75f6d617Schristos # endif
3111*75f6d617Schristos                         /* Throw away the ] at the end of the equivalence
3112*75f6d617Schristos                            class (or collating symbol).  */
3113*75f6d617Schristos                         PATFETCH (c);
3114*75f6d617Schristos 
3115*75f6d617Schristos 			/* Allocate the space for the equivalence class
3116*75f6d617Schristos 			   (or collating symbol) (and '\0' if needed).  */
3117*75f6d617Schristos                         GET_BUFFER_SPACE(datasize);
3118*75f6d617Schristos 			/* Update the pointer to indicate end of buffer.  */
3119*75f6d617Schristos                         b += datasize;
3120*75f6d617Schristos 
3121*75f6d617Schristos 			if (delim == '=')
3122*75f6d617Schristos 			  { /* equivalence class  */
3123*75f6d617Schristos 			    /* Calculate the offset of char_ranges,
3124*75f6d617Schristos 			       which is next to equivalence_classes.  */
3125*75f6d617Schristos 			    offset = laststart[1] + laststart[2]
3126*75f6d617Schristos 			      + laststart[3] +6;
3127*75f6d617Schristos 			    /* Insert space.  */
3128*75f6d617Schristos 			    insert_space(datasize, laststart + offset, b - 1);
3129*75f6d617Schristos 
3130*75f6d617Schristos 			    /* Write the equivalence_class and \0.  */
3131*75f6d617Schristos 			    for (i = 0 ; i < datasize ; i++)
3132*75f6d617Schristos 			      laststart[offset + i] = str[i];
3133*75f6d617Schristos 
3134*75f6d617Schristos 			    /* Update the length of equivalence_classes.  */
3135*75f6d617Schristos 			    laststart[3] += datasize;
3136*75f6d617Schristos 			    had_char_class = true;
3137*75f6d617Schristos 			  }
3138*75f6d617Schristos 			else /* delim == '.' */
3139*75f6d617Schristos 			  { /* collating symbol  */
3140*75f6d617Schristos 			    /* Calculate the offset of the equivalence_classes,
3141*75f6d617Schristos 			       which is next to collating_symbols.  */
3142*75f6d617Schristos 			    offset = laststart[1] + laststart[2] + 6;
3143*75f6d617Schristos 			    /* Insert space and write the collationg_symbol
3144*75f6d617Schristos 			       and \0.  */
3145*75f6d617Schristos 			    insert_space(datasize, laststart + offset, b-1);
3146*75f6d617Schristos 			    for (i = 0 ; i < datasize ; i++)
3147*75f6d617Schristos 			      laststart[offset + i] = str[i];
3148*75f6d617Schristos 
3149*75f6d617Schristos 			    /* In re_match_2_internal if range_start < -1, we
3150*75f6d617Schristos 			       assume -range_start is the offset of the
3151*75f6d617Schristos 			       collating symbol which is specified as
3152*75f6d617Schristos 			       the character of the range start.  So we assign
3153*75f6d617Schristos 			       -(laststart[1] + laststart[2] + 6) to
3154*75f6d617Schristos 			       range_start.  */
3155*75f6d617Schristos 			    range_start = -(laststart[1] + laststart[2] + 6);
3156*75f6d617Schristos 			    /* Update the length of collating_symbol.  */
3157*75f6d617Schristos 			    laststart[2] += datasize;
3158*75f6d617Schristos 			    had_char_class = false;
3159*75f6d617Schristos 			  }
3160*75f6d617Schristos 		      }
3161*75f6d617Schristos                     else
3162*75f6d617Schristos                       {
3163*75f6d617Schristos                         c1++;
3164*75f6d617Schristos                         while (c1--)
3165*75f6d617Schristos                           PATUNFETCH;
3166*75f6d617Schristos                         BUF_PUSH ('[');
3167*75f6d617Schristos                         BUF_PUSH (delim);
3168*75f6d617Schristos                         laststart[5] += 2; /* Update the length of characters  */
3169*75f6d617Schristos 			range_start = delim;
3170*75f6d617Schristos                         had_char_class = false;
3171*75f6d617Schristos                       }
3172*75f6d617Schristos 		  }
3173*75f6d617Schristos                 else
3174*75f6d617Schristos                   {
3175*75f6d617Schristos                     had_char_class = false;
3176*75f6d617Schristos 		    BUF_PUSH(c);
3177*75f6d617Schristos 		    laststart[5]++;  /* Update the length of characters  */
3178*75f6d617Schristos 		    range_start = c;
3179*75f6d617Schristos                   }
3180*75f6d617Schristos 	      }
3181*75f6d617Schristos 
3182*75f6d617Schristos #else /* BYTE */
3183*75f6d617Schristos             /* Ensure that we have enough space to push a charset: the
3184*75f6d617Schristos                opcode, the length count, and the bitset; 34 bytes in all.  */
3185*75f6d617Schristos 	    GET_BUFFER_SPACE (34);
3186*75f6d617Schristos 
3187*75f6d617Schristos             laststart = b;
3188*75f6d617Schristos 
3189*75f6d617Schristos             /* We test `*p == '^' twice, instead of using an if
3190*75f6d617Schristos                statement, so we only need one BUF_PUSH.  */
3191*75f6d617Schristos             BUF_PUSH (*p == '^' ? charset_not : charset);
3192*75f6d617Schristos             if (*p == '^')
3193*75f6d617Schristos               p++;
3194*75f6d617Schristos 
3195*75f6d617Schristos             /* Remember the first position in the bracket expression.  */
3196*75f6d617Schristos             p1 = p;
3197*75f6d617Schristos 
3198*75f6d617Schristos             /* Push the number of bytes in the bitmap.  */
3199*75f6d617Schristos             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3200*75f6d617Schristos 
3201*75f6d617Schristos             /* Clear the whole map.  */
3202*75f6d617Schristos             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3203*75f6d617Schristos 
3204*75f6d617Schristos             /* charset_not matches newline according to a syntax bit.  */
3205*75f6d617Schristos             if ((re_opcode_t) b[-2] == charset_not
3206*75f6d617Schristos                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3207*75f6d617Schristos               SET_LIST_BIT ('\n');
3208*75f6d617Schristos 
3209*75f6d617Schristos             /* Read in characters and ranges, setting map bits.  */
3210*75f6d617Schristos             for (;;)
3211*75f6d617Schristos               {
3212*75f6d617Schristos                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3213*75f6d617Schristos 
3214*75f6d617Schristos                 PATFETCH (c);
3215*75f6d617Schristos 
3216*75f6d617Schristos                 /* \ might escape characters inside [...] and [^...].  */
3217*75f6d617Schristos                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3218*75f6d617Schristos                   {
3219*75f6d617Schristos                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3220*75f6d617Schristos 
3221*75f6d617Schristos                     PATFETCH (c1);
3222*75f6d617Schristos                     SET_LIST_BIT (c1);
3223*75f6d617Schristos 		    range_start = c1;
3224*75f6d617Schristos                     continue;
3225*75f6d617Schristos                   }
3226*75f6d617Schristos 
3227*75f6d617Schristos                 /* Could be the end of the bracket expression.  If it's
3228*75f6d617Schristos                    not (i.e., when the bracket expression is `[]' so
3229*75f6d617Schristos                    far), the ']' character bit gets set way below.  */
3230*75f6d617Schristos                 if (c == ']' && p != p1 + 1)
3231*75f6d617Schristos                   break;
3232*75f6d617Schristos 
3233*75f6d617Schristos                 /* Look ahead to see if it's a range when the last thing
3234*75f6d617Schristos                    was a character class.  */
3235*75f6d617Schristos                 if (had_char_class && c == '-' && *p != ']')
3236*75f6d617Schristos                   FREE_STACK_RETURN (REG_ERANGE);
3237*75f6d617Schristos 
3238*75f6d617Schristos                 /* Look ahead to see if it's a range when the last thing
3239*75f6d617Schristos                    was a character: if this is a hyphen not at the
3240*75f6d617Schristos                    beginning or the end of a list, then it's the range
3241*75f6d617Schristos                    operator.  */
3242*75f6d617Schristos                 if (c == '-'
3243*75f6d617Schristos                     && !(p - 2 >= pattern && p[-2] == '[')
3244*75f6d617Schristos                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3245*75f6d617Schristos                     && *p != ']')
3246*75f6d617Schristos                   {
3247*75f6d617Schristos                     reg_errcode_t ret
3248*75f6d617Schristos                       = byte_compile_range (range_start, &p, pend, translate,
3249*75f6d617Schristos 					    syntax, b);
3250*75f6d617Schristos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3251*75f6d617Schristos 		    range_start = 0xffffffff;
3252*75f6d617Schristos                   }
3253*75f6d617Schristos 
3254*75f6d617Schristos                 else if (p[0] == '-' && p[1] != ']')
3255*75f6d617Schristos                   { /* This handles ranges made up of characters only.  */
3256*75f6d617Schristos                     reg_errcode_t ret;
3257*75f6d617Schristos 
3258*75f6d617Schristos 		    /* Move past the `-'.  */
3259*75f6d617Schristos                     PATFETCH (c1);
3260*75f6d617Schristos 
3261*75f6d617Schristos                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3262*75f6d617Schristos                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3263*75f6d617Schristos 		    range_start = 0xffffffff;
3264*75f6d617Schristos                   }
3265*75f6d617Schristos 
3266*75f6d617Schristos                 /* See if we're at the beginning of a possible character
3267*75f6d617Schristos                    class.  */
3268*75f6d617Schristos 
3269*75f6d617Schristos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3270*75f6d617Schristos                   { /* Leave room for the null.  */
3271*75f6d617Schristos                     char str[CHAR_CLASS_MAX_LENGTH + 1];
3272*75f6d617Schristos 
3273*75f6d617Schristos                     PATFETCH (c);
3274*75f6d617Schristos                     c1 = 0;
3275*75f6d617Schristos 
3276*75f6d617Schristos                     /* If pattern is `[[:'.  */
3277*75f6d617Schristos                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3278*75f6d617Schristos 
3279*75f6d617Schristos                     for (;;)
3280*75f6d617Schristos                       {
3281*75f6d617Schristos                         PATFETCH (c);
3282*75f6d617Schristos                         if ((c == ':' && *p == ']') || p == pend)
3283*75f6d617Schristos                           break;
3284*75f6d617Schristos 			if (c1 < CHAR_CLASS_MAX_LENGTH)
3285*75f6d617Schristos 			  str[c1++] = c;
3286*75f6d617Schristos 			else
3287*75f6d617Schristos 			  /* This is in any case an invalid class name.  */
3288*75f6d617Schristos 			  str[0] = '\0';
3289*75f6d617Schristos                       }
3290*75f6d617Schristos                     str[c1] = '\0';
3291*75f6d617Schristos 
3292*75f6d617Schristos                     /* If isn't a word bracketed by `[:' and `:]':
3293*75f6d617Schristos                        undo the ending character, the letters, and leave
3294*75f6d617Schristos                        the leading `:' and `[' (but set bits for them).  */
3295*75f6d617Schristos                     if (c == ':' && *p == ']')
3296*75f6d617Schristos                       {
3297*75f6d617Schristos # if defined _LIBC || WIDE_CHAR_SUPPORT
3298*75f6d617Schristos                         boolean is_lower = STREQ (str, "lower");
3299*75f6d617Schristos                         boolean is_upper = STREQ (str, "upper");
3300*75f6d617Schristos 			wctype_t wt;
3301*75f6d617Schristos                         int ch;
3302*75f6d617Schristos 
3303*75f6d617Schristos 			wt = IS_CHAR_CLASS (str);
3304*75f6d617Schristos 			if (wt == 0)
3305*75f6d617Schristos 			  FREE_STACK_RETURN (REG_ECTYPE);
3306*75f6d617Schristos 
3307*75f6d617Schristos                         /* Throw away the ] at the end of the character
3308*75f6d617Schristos                            class.  */
3309*75f6d617Schristos                         PATFETCH (c);
3310*75f6d617Schristos 
3311*75f6d617Schristos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3312*75f6d617Schristos 
3313*75f6d617Schristos                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3314*75f6d617Schristos 			  {
3315*75f6d617Schristos 			    if (iswctype (btowc (ch), wt))
3316*75f6d617Schristos 			      SET_LIST_BIT (ch);
3317*75f6d617Schristos 
3318*75f6d617Schristos 			    if (translate && (is_upper || is_lower)
3319*75f6d617Schristos 				&& (ISUPPER (ch) || ISLOWER (ch)))
3320*75f6d617Schristos 			      SET_LIST_BIT (ch);
3321*75f6d617Schristos 			  }
3322*75f6d617Schristos 
3323*75f6d617Schristos                         had_char_class = true;
3324*75f6d617Schristos # else
3325*75f6d617Schristos                         int ch;
3326*75f6d617Schristos                         boolean is_alnum = STREQ (str, "alnum");
3327*75f6d617Schristos                         boolean is_alpha = STREQ (str, "alpha");
3328*75f6d617Schristos                         boolean is_blank = STREQ (str, "blank");
3329*75f6d617Schristos                         boolean is_cntrl = STREQ (str, "cntrl");
3330*75f6d617Schristos                         boolean is_digit = STREQ (str, "digit");
3331*75f6d617Schristos                         boolean is_graph = STREQ (str, "graph");
3332*75f6d617Schristos                         boolean is_lower = STREQ (str, "lower");
3333*75f6d617Schristos                         boolean is_print = STREQ (str, "print");
3334*75f6d617Schristos                         boolean is_punct = STREQ (str, "punct");
3335*75f6d617Schristos                         boolean is_space = STREQ (str, "space");
3336*75f6d617Schristos                         boolean is_upper = STREQ (str, "upper");
3337*75f6d617Schristos                         boolean is_xdigit = STREQ (str, "xdigit");
3338*75f6d617Schristos 
3339*75f6d617Schristos                         if (!IS_CHAR_CLASS (str))
3340*75f6d617Schristos 			  FREE_STACK_RETURN (REG_ECTYPE);
3341*75f6d617Schristos 
3342*75f6d617Schristos                         /* Throw away the ] at the end of the character
3343*75f6d617Schristos                            class.  */
3344*75f6d617Schristos                         PATFETCH (c);
3345*75f6d617Schristos 
3346*75f6d617Schristos                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3347*75f6d617Schristos 
3348*75f6d617Schristos                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3349*75f6d617Schristos                           {
3350*75f6d617Schristos 			    /* This was split into 3 if's to
3351*75f6d617Schristos 			       avoid an arbitrary limit in some compiler.  */
3352*75f6d617Schristos                             if (   (is_alnum  && ISALNUM (ch))
3353*75f6d617Schristos                                 || (is_alpha  && ISALPHA (ch))
3354*75f6d617Schristos                                 || (is_blank  && ISBLANK (ch))
3355*75f6d617Schristos                                 || (is_cntrl  && ISCNTRL (ch)))
3356*75f6d617Schristos 			      SET_LIST_BIT (ch);
3357*75f6d617Schristos 			    if (   (is_digit  && ISDIGIT (ch))
3358*75f6d617Schristos                                 || (is_graph  && ISGRAPH (ch))
3359*75f6d617Schristos                                 || (is_lower  && ISLOWER (ch))
3360*75f6d617Schristos                                 || (is_print  && ISPRINT (ch)))
3361*75f6d617Schristos 			      SET_LIST_BIT (ch);
3362*75f6d617Schristos 			    if (   (is_punct  && ISPUNCT (ch))
3363*75f6d617Schristos                                 || (is_space  && ISSPACE (ch))
3364*75f6d617Schristos                                 || (is_upper  && ISUPPER (ch))
3365*75f6d617Schristos                                 || (is_xdigit && ISXDIGIT (ch)))
3366*75f6d617Schristos 			      SET_LIST_BIT (ch);
3367*75f6d617Schristos 			    if (   translate && (is_upper || is_lower)
3368*75f6d617Schristos 				&& (ISUPPER (ch) || ISLOWER (ch)))
3369*75f6d617Schristos 			      SET_LIST_BIT (ch);
3370*75f6d617Schristos                           }
3371*75f6d617Schristos                         had_char_class = true;
3372*75f6d617Schristos # endif	/* libc || wctype.h */
3373*75f6d617Schristos                       }
3374*75f6d617Schristos                     else
3375*75f6d617Schristos                       {
3376*75f6d617Schristos                         c1++;
3377*75f6d617Schristos                         while (c1--)
3378*75f6d617Schristos                           PATUNFETCH;
3379*75f6d617Schristos                         SET_LIST_BIT ('[');
3380*75f6d617Schristos                         SET_LIST_BIT (':');
3381*75f6d617Schristos 			range_start = ':';
3382*75f6d617Schristos                         had_char_class = false;
3383*75f6d617Schristos                       }
3384*75f6d617Schristos                   }
3385*75f6d617Schristos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3386*75f6d617Schristos 		  {
3387*75f6d617Schristos 		    unsigned char str[MB_LEN_MAX + 1];
3388*75f6d617Schristos # ifdef _LIBC
3389*75f6d617Schristos 		    uint32_t nrules =
3390*75f6d617Schristos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3391*75f6d617Schristos # endif
3392*75f6d617Schristos 
3393*75f6d617Schristos 		    PATFETCH (c);
3394*75f6d617Schristos 		    c1 = 0;
3395*75f6d617Schristos 
3396*75f6d617Schristos 		    /* If pattern is `[[='.  */
3397*75f6d617Schristos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3398*75f6d617Schristos 
3399*75f6d617Schristos 		    for (;;)
3400*75f6d617Schristos 		      {
3401*75f6d617Schristos 			PATFETCH (c);
3402*75f6d617Schristos 			if ((c == '=' && *p == ']') || p == pend)
3403*75f6d617Schristos 			  break;
3404*75f6d617Schristos 			if (c1 < MB_LEN_MAX)
3405*75f6d617Schristos 			  str[c1++] = c;
3406*75f6d617Schristos 			else
3407*75f6d617Schristos 			  /* This is in any case an invalid class name.  */
3408*75f6d617Schristos 			  str[0] = '\0';
3409*75f6d617Schristos                       }
3410*75f6d617Schristos 		    str[c1] = '\0';
3411*75f6d617Schristos 
3412*75f6d617Schristos 		    if (c == '=' && *p == ']' && str[0] != '\0')
3413*75f6d617Schristos 		      {
3414*75f6d617Schristos 			/* If we have no collation data we use the default
3415*75f6d617Schristos 			   collation in which each character is in a class
3416*75f6d617Schristos 			   by itself.  It also means that ASCII is the
3417*75f6d617Schristos 			   character set and therefore we cannot have character
3418*75f6d617Schristos 			   with more than one byte in the multibyte
3419*75f6d617Schristos 			   representation.  */
3420*75f6d617Schristos # ifdef _LIBC
3421*75f6d617Schristos 			if (nrules == 0)
3422*75f6d617Schristos # endif
3423*75f6d617Schristos 			  {
3424*75f6d617Schristos 			    if (c1 != 1)
3425*75f6d617Schristos 			      FREE_STACK_RETURN (REG_ECOLLATE);
3426*75f6d617Schristos 
3427*75f6d617Schristos 			    /* Throw away the ] at the end of the equivalence
3428*75f6d617Schristos 			       class.  */
3429*75f6d617Schristos 			    PATFETCH (c);
3430*75f6d617Schristos 
3431*75f6d617Schristos 			    /* Set the bit for the character.  */
3432*75f6d617Schristos 			    SET_LIST_BIT (str[0]);
3433*75f6d617Schristos 			  }
3434*75f6d617Schristos # ifdef _LIBC
3435*75f6d617Schristos 			else
3436*75f6d617Schristos 			  {
3437*75f6d617Schristos 			    /* Try to match the byte sequence in `str' against
3438*75f6d617Schristos 			       those known to the collate implementation.
3439*75f6d617Schristos 			       First find out whether the bytes in `str' are
3440*75f6d617Schristos 			       actually from exactly one character.  */
3441*75f6d617Schristos 			    const int32_t *table;
3442*75f6d617Schristos 			    const unsigned char *weights;
3443*75f6d617Schristos 			    const unsigned char *extra;
3444*75f6d617Schristos 			    const int32_t *indirect;
3445*75f6d617Schristos 			    int32_t idx;
3446*75f6d617Schristos 			    const unsigned char *cp = str;
3447*75f6d617Schristos 			    int ch;
3448*75f6d617Schristos 
3449*75f6d617Schristos 			    /* This #include defines a local function!  */
3450*75f6d617Schristos #  include <locale/weight.h>
3451*75f6d617Schristos 
3452*75f6d617Schristos 			    table = (const int32_t *)
3453*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3454*75f6d617Schristos 			    weights = (const unsigned char *)
3455*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3456*75f6d617Schristos 			    extra = (const unsigned char *)
3457*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3458*75f6d617Schristos 			    indirect = (const int32_t *)
3459*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3460*75f6d617Schristos 
3461*75f6d617Schristos 			    idx = findidx (&cp);
3462*75f6d617Schristos 			    if (idx == 0 || cp < str + c1)
3463*75f6d617Schristos 			      /* This is no valid character.  */
3464*75f6d617Schristos 			      FREE_STACK_RETURN (REG_ECOLLATE);
3465*75f6d617Schristos 
3466*75f6d617Schristos 			    /* Throw away the ] at the end of the equivalence
3467*75f6d617Schristos 			       class.  */
3468*75f6d617Schristos 			    PATFETCH (c);
3469*75f6d617Schristos 
3470*75f6d617Schristos 			    /* Now we have to go throught the whole table
3471*75f6d617Schristos 			       and find all characters which have the same
3472*75f6d617Schristos 			       first level weight.
3473*75f6d617Schristos 
3474*75f6d617Schristos 			       XXX Note that this is not entirely correct.
3475*75f6d617Schristos 			       we would have to match multibyte sequences
3476*75f6d617Schristos 			       but this is not possible with the current
3477*75f6d617Schristos 			       implementation.  */
3478*75f6d617Schristos 			    for (ch = 1; ch < 256; ++ch)
3479*75f6d617Schristos 			      /* XXX This test would have to be changed if we
3480*75f6d617Schristos 				 would allow matching multibyte sequences.  */
3481*75f6d617Schristos 			      if (table[ch] > 0)
3482*75f6d617Schristos 				{
3483*75f6d617Schristos 				  int32_t idx2 = table[ch];
3484*75f6d617Schristos 				  size_t len = weights[idx2];
3485*75f6d617Schristos 
3486*75f6d617Schristos 				  /* Test whether the lenghts match.  */
3487*75f6d617Schristos 				  if (weights[idx] == len)
3488*75f6d617Schristos 				    {
3489*75f6d617Schristos 				      /* They do.  New compare the bytes of
3490*75f6d617Schristos 					 the weight.  */
3491*75f6d617Schristos 				      size_t cnt = 0;
3492*75f6d617Schristos 
3493*75f6d617Schristos 				      while (cnt < len
3494*75f6d617Schristos 					     && (weights[idx + 1 + cnt]
3495*75f6d617Schristos 						 == weights[idx2 + 1 + cnt]))
3496*75f6d617Schristos 					++cnt;
3497*75f6d617Schristos 
3498*75f6d617Schristos 				      if (cnt == len)
3499*75f6d617Schristos 					/* They match.  Mark the character as
3500*75f6d617Schristos 					   acceptable.  */
3501*75f6d617Schristos 					SET_LIST_BIT (ch);
3502*75f6d617Schristos 				    }
3503*75f6d617Schristos 				}
3504*75f6d617Schristos 			  }
3505*75f6d617Schristos # endif
3506*75f6d617Schristos 			had_char_class = true;
3507*75f6d617Schristos 		      }
3508*75f6d617Schristos                     else
3509*75f6d617Schristos                       {
3510*75f6d617Schristos                         c1++;
3511*75f6d617Schristos                         while (c1--)
3512*75f6d617Schristos                           PATUNFETCH;
3513*75f6d617Schristos                         SET_LIST_BIT ('[');
3514*75f6d617Schristos                         SET_LIST_BIT ('=');
3515*75f6d617Schristos 			range_start = '=';
3516*75f6d617Schristos                         had_char_class = false;
3517*75f6d617Schristos                       }
3518*75f6d617Schristos 		  }
3519*75f6d617Schristos                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3520*75f6d617Schristos 		  {
3521*75f6d617Schristos 		    unsigned char str[128];	/* Should be large enough.  */
3522*75f6d617Schristos # ifdef _LIBC
3523*75f6d617Schristos 		    uint32_t nrules =
3524*75f6d617Schristos 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3525*75f6d617Schristos # endif
3526*75f6d617Schristos 
3527*75f6d617Schristos 		    PATFETCH (c);
3528*75f6d617Schristos 		    c1 = 0;
3529*75f6d617Schristos 
3530*75f6d617Schristos 		    /* If pattern is `[[.'.  */
3531*75f6d617Schristos 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3532*75f6d617Schristos 
3533*75f6d617Schristos 		    for (;;)
3534*75f6d617Schristos 		      {
3535*75f6d617Schristos 			PATFETCH (c);
3536*75f6d617Schristos 			if ((c == '.' && *p == ']') || p == pend)
3537*75f6d617Schristos 			  break;
3538*75f6d617Schristos 			if (c1 < sizeof (str))
3539*75f6d617Schristos 			  str[c1++] = c;
3540*75f6d617Schristos 			else
3541*75f6d617Schristos 			  /* This is in any case an invalid class name.  */
3542*75f6d617Schristos 			  str[0] = '\0';
3543*75f6d617Schristos                       }
3544*75f6d617Schristos 		    str[c1] = '\0';
3545*75f6d617Schristos 
3546*75f6d617Schristos 		    if (c == '.' && *p == ']' && str[0] != '\0')
3547*75f6d617Schristos 		      {
3548*75f6d617Schristos 			/* If we have no collation data we use the default
3549*75f6d617Schristos 			   collation in which each character is the name
3550*75f6d617Schristos 			   for its own class which contains only the one
3551*75f6d617Schristos 			   character.  It also means that ASCII is the
3552*75f6d617Schristos 			   character set and therefore we cannot have character
3553*75f6d617Schristos 			   with more than one byte in the multibyte
3554*75f6d617Schristos 			   representation.  */
3555*75f6d617Schristos # ifdef _LIBC
3556*75f6d617Schristos 			if (nrules == 0)
3557*75f6d617Schristos # endif
3558*75f6d617Schristos 			  {
3559*75f6d617Schristos 			    if (c1 != 1)
3560*75f6d617Schristos 			      FREE_STACK_RETURN (REG_ECOLLATE);
3561*75f6d617Schristos 
3562*75f6d617Schristos 			    /* Throw away the ] at the end of the equivalence
3563*75f6d617Schristos 			       class.  */
3564*75f6d617Schristos 			    PATFETCH (c);
3565*75f6d617Schristos 
3566*75f6d617Schristos 			    /* Set the bit for the character.  */
3567*75f6d617Schristos 			    SET_LIST_BIT (str[0]);
3568*75f6d617Schristos 			    range_start = ((const unsigned char *) str)[0];
3569*75f6d617Schristos 			  }
3570*75f6d617Schristos # ifdef _LIBC
3571*75f6d617Schristos 			else
3572*75f6d617Schristos 			  {
3573*75f6d617Schristos 			    /* Try to match the byte sequence in `str' against
3574*75f6d617Schristos 			       those known to the collate implementation.
3575*75f6d617Schristos 			       First find out whether the bytes in `str' are
3576*75f6d617Schristos 			       actually from exactly one character.  */
3577*75f6d617Schristos 			    int32_t table_size;
3578*75f6d617Schristos 			    const int32_t *symb_table;
3579*75f6d617Schristos 			    const unsigned char *extra;
3580*75f6d617Schristos 			    int32_t idx;
3581*75f6d617Schristos 			    int32_t elem;
3582*75f6d617Schristos 			    int32_t second;
3583*75f6d617Schristos 			    int32_t hash;
3584*75f6d617Schristos 
3585*75f6d617Schristos 			    table_size =
3586*75f6d617Schristos 			      _NL_CURRENT_WORD (LC_COLLATE,
3587*75f6d617Schristos 						_NL_COLLATE_SYMB_HASH_SIZEMB);
3588*75f6d617Schristos 			    symb_table = (const int32_t *)
3589*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE,
3590*75f6d617Schristos 					   _NL_COLLATE_SYMB_TABLEMB);
3591*75f6d617Schristos 			    extra = (const unsigned char *)
3592*75f6d617Schristos 			      _NL_CURRENT (LC_COLLATE,
3593*75f6d617Schristos 					   _NL_COLLATE_SYMB_EXTRAMB);
3594*75f6d617Schristos 
3595*75f6d617Schristos 			    /* Locate the character in the hashing table.  */
3596*75f6d617Schristos 			    hash = elem_hash (str, c1);
3597*75f6d617Schristos 
3598*75f6d617Schristos 			    idx = 0;
3599*75f6d617Schristos 			    elem = hash % table_size;
3600*75f6d617Schristos 			    second = hash % (table_size - 2);
3601*75f6d617Schristos 			    while (symb_table[2 * elem] != 0)
3602*75f6d617Schristos 			      {
3603*75f6d617Schristos 				/* First compare the hashing value.  */
3604*75f6d617Schristos 				if (symb_table[2 * elem] == hash
3605*75f6d617Schristos 				    && c1 == extra[symb_table[2 * elem + 1]]
3606*75f6d617Schristos 				    && memcmp (str,
3607*75f6d617Schristos 					       &extra[symb_table[2 * elem + 1]
3608*75f6d617Schristos 						     + 1],
3609*75f6d617Schristos 					       c1) == 0)
3610*75f6d617Schristos 				  {
3611*75f6d617Schristos 				    /* Yep, this is the entry.  */
3612*75f6d617Schristos 				    idx = symb_table[2 * elem + 1];
3613*75f6d617Schristos 				    idx += 1 + extra[idx];
3614*75f6d617Schristos 				    break;
3615*75f6d617Schristos 				  }
3616*75f6d617Schristos 
3617*75f6d617Schristos 				/* Next entry.  */
3618*75f6d617Schristos 				elem += second;
3619*75f6d617Schristos 			      }
3620*75f6d617Schristos 
3621*75f6d617Schristos 			    if (symb_table[2 * elem] == 0)
3622*75f6d617Schristos 			      /* This is no valid character.  */
3623*75f6d617Schristos 			      FREE_STACK_RETURN (REG_ECOLLATE);
3624*75f6d617Schristos 
3625*75f6d617Schristos 			    /* Throw away the ] at the end of the equivalence
3626*75f6d617Schristos 			       class.  */
3627*75f6d617Schristos 			    PATFETCH (c);
3628*75f6d617Schristos 
3629*75f6d617Schristos 			    /* Now add the multibyte character(s) we found
3630*75f6d617Schristos 			       to the accept list.
3631*75f6d617Schristos 
3632*75f6d617Schristos 			       XXX Note that this is not entirely correct.
3633*75f6d617Schristos 			       we would have to match multibyte sequences
3634*75f6d617Schristos 			       but this is not possible with the current
3635*75f6d617Schristos 			       implementation.  Also, we have to match
3636*75f6d617Schristos 			       collating symbols, which expand to more than
3637*75f6d617Schristos 			       one file, as a whole and not allow the
3638*75f6d617Schristos 			       individual bytes.  */
3639*75f6d617Schristos 			    c1 = extra[idx++];
3640*75f6d617Schristos 			    if (c1 == 1)
3641*75f6d617Schristos 			      range_start = extra[idx];
3642*75f6d617Schristos 			    while (c1-- > 0)
3643*75f6d617Schristos 			      {
3644*75f6d617Schristos 				SET_LIST_BIT (extra[idx]);
3645*75f6d617Schristos 				++idx;
3646*75f6d617Schristos 			      }
3647*75f6d617Schristos 			  }
3648*75f6d617Schristos # endif
3649*75f6d617Schristos 			had_char_class = false;
3650*75f6d617Schristos 		      }
3651*75f6d617Schristos                     else
3652*75f6d617Schristos                       {
3653*75f6d617Schristos                         c1++;
3654*75f6d617Schristos                         while (c1--)
3655*75f6d617Schristos                           PATUNFETCH;
3656*75f6d617Schristos                         SET_LIST_BIT ('[');
3657*75f6d617Schristos                         SET_LIST_BIT ('.');
3658*75f6d617Schristos 			range_start = '.';
3659*75f6d617Schristos                         had_char_class = false;
3660*75f6d617Schristos                       }
3661*75f6d617Schristos 		  }
3662*75f6d617Schristos                 else
3663*75f6d617Schristos                   {
3664*75f6d617Schristos                     had_char_class = false;
3665*75f6d617Schristos                     SET_LIST_BIT (c);
3666*75f6d617Schristos 		    range_start = c;
3667*75f6d617Schristos                   }
3668*75f6d617Schristos               }
3669*75f6d617Schristos 
3670*75f6d617Schristos             /* Discard any (non)matching list bytes that are all 0 at the
3671*75f6d617Schristos                end of the map.  Decrease the map-length byte too.  */
3672*75f6d617Schristos             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3673*75f6d617Schristos               b[-1]--;
3674*75f6d617Schristos             b += b[-1];
3675*75f6d617Schristos #endif /* WCHAR */
3676*75f6d617Schristos           }
3677*75f6d617Schristos           break;
3678*75f6d617Schristos 
3679*75f6d617Schristos 
3680*75f6d617Schristos 	case '(':
3681*75f6d617Schristos           if (syntax & RE_NO_BK_PARENS)
3682*75f6d617Schristos             goto handle_open;
3683*75f6d617Schristos           else
3684*75f6d617Schristos             goto normal_char;
3685*75f6d617Schristos 
3686*75f6d617Schristos 
3687*75f6d617Schristos         case ')':
3688*75f6d617Schristos           if (syntax & RE_NO_BK_PARENS)
3689*75f6d617Schristos             goto handle_close;
3690*75f6d617Schristos           else
3691*75f6d617Schristos             goto normal_char;
3692*75f6d617Schristos 
3693*75f6d617Schristos 
3694*75f6d617Schristos         case '\n':
3695*75f6d617Schristos           if (syntax & RE_NEWLINE_ALT)
3696*75f6d617Schristos             goto handle_alt;
3697*75f6d617Schristos           else
3698*75f6d617Schristos             goto normal_char;
3699*75f6d617Schristos 
3700*75f6d617Schristos 
3701*75f6d617Schristos 	case '|':
3702*75f6d617Schristos           if (syntax & RE_NO_BK_VBAR)
3703*75f6d617Schristos             goto handle_alt;
3704*75f6d617Schristos           else
3705*75f6d617Schristos             goto normal_char;
3706*75f6d617Schristos 
3707*75f6d617Schristos 
3708*75f6d617Schristos         case '{':
3709*75f6d617Schristos            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3710*75f6d617Schristos              goto handle_interval;
3711*75f6d617Schristos            else
3712*75f6d617Schristos              goto normal_char;
3713*75f6d617Schristos 
3714*75f6d617Schristos 
3715*75f6d617Schristos         case '\\':
3716*75f6d617Schristos           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3717*75f6d617Schristos 
3718*75f6d617Schristos           /* Do not translate the character after the \, so that we can
3719*75f6d617Schristos              distinguish, e.g., \B from \b, even if we normally would
3720*75f6d617Schristos              translate, e.g., B to b.  */
3721*75f6d617Schristos           PATFETCH_RAW (c);
3722*75f6d617Schristos 
3723*75f6d617Schristos           switch (c)
3724*75f6d617Schristos             {
3725*75f6d617Schristos             case '(':
3726*75f6d617Schristos               if (syntax & RE_NO_BK_PARENS)
3727*75f6d617Schristos                 goto normal_backslash;
3728*75f6d617Schristos 
3729*75f6d617Schristos             handle_open:
3730*75f6d617Schristos               bufp->re_nsub++;
3731*75f6d617Schristos               regnum++;
3732*75f6d617Schristos 
3733*75f6d617Schristos               if (COMPILE_STACK_FULL)
3734*75f6d617Schristos                 {
3735*75f6d617Schristos                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
3736*75f6d617Schristos                             compile_stack_elt_t);
3737*75f6d617Schristos                   if (compile_stack.stack == NULL) return REG_ESPACE;
3738*75f6d617Schristos 
3739*75f6d617Schristos                   compile_stack.size <<= 1;
3740*75f6d617Schristos                 }
3741*75f6d617Schristos 
3742*75f6d617Schristos               /* These are the values to restore when we hit end of this
3743*75f6d617Schristos                  group.  They are all relative offsets, so that if the
3744*75f6d617Schristos                  whole pattern moves because of realloc, they will still
3745*75f6d617Schristos                  be valid.  */
3746*75f6d617Schristos               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3747*75f6d617Schristos               COMPILE_STACK_TOP.fixup_alt_jump
3748*75f6d617Schristos                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3749*75f6d617Schristos               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3750*75f6d617Schristos               COMPILE_STACK_TOP.regnum = regnum;
3751*75f6d617Schristos 
3752*75f6d617Schristos               /* We will eventually replace the 0 with the number of
3753*75f6d617Schristos                  groups inner to this one.  But do not push a
3754*75f6d617Schristos                  start_memory for groups beyond the last one we can
3755*75f6d617Schristos                  represent in the compiled pattern.  */
3756*75f6d617Schristos               if (regnum <= MAX_REGNUM)
3757*75f6d617Schristos                 {
3758*75f6d617Schristos                   COMPILE_STACK_TOP.inner_group_offset = b
3759*75f6d617Schristos 		    - COMPILED_BUFFER_VAR + 2;
3760*75f6d617Schristos                   BUF_PUSH_3 (start_memory, regnum, 0);
3761*75f6d617Schristos                 }
3762*75f6d617Schristos 
3763*75f6d617Schristos               compile_stack.avail++;
3764*75f6d617Schristos 
3765*75f6d617Schristos               fixup_alt_jump = 0;
3766*75f6d617Schristos               laststart = 0;
3767*75f6d617Schristos               begalt = b;
3768*75f6d617Schristos 	      /* If we've reached MAX_REGNUM groups, then this open
3769*75f6d617Schristos 		 won't actually generate any code, so we'll have to
3770*75f6d617Schristos 		 clear pending_exact explicitly.  */
3771*75f6d617Schristos 	      pending_exact = 0;
3772*75f6d617Schristos               break;
3773*75f6d617Schristos 
3774*75f6d617Schristos 
3775*75f6d617Schristos             case ')':
3776*75f6d617Schristos               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3777*75f6d617Schristos 
3778*75f6d617Schristos               if (COMPILE_STACK_EMPTY)
3779*75f6d617Schristos 		{
3780*75f6d617Schristos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3781*75f6d617Schristos 		    goto normal_backslash;
3782*75f6d617Schristos 		  else
3783*75f6d617Schristos 		    FREE_STACK_RETURN (REG_ERPAREN);
3784*75f6d617Schristos 		}
3785*75f6d617Schristos 
3786*75f6d617Schristos             handle_close:
3787*75f6d617Schristos               if (fixup_alt_jump)
3788*75f6d617Schristos                 { /* Push a dummy failure point at the end of the
3789*75f6d617Schristos                      alternative for a possible future
3790*75f6d617Schristos                      `pop_failure_jump' to pop.  See comments at
3791*75f6d617Schristos                      `push_dummy_failure' in `re_match_2'.  */
3792*75f6d617Schristos                   BUF_PUSH (push_dummy_failure);
3793*75f6d617Schristos 
3794*75f6d617Schristos                   /* We allocated space for this jump when we assigned
3795*75f6d617Schristos                      to `fixup_alt_jump', in the `handle_alt' case below.  */
3796*75f6d617Schristos                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3797*75f6d617Schristos                 }
3798*75f6d617Schristos 
3799*75f6d617Schristos               /* See similar code for backslashed left paren above.  */
3800*75f6d617Schristos               if (COMPILE_STACK_EMPTY)
3801*75f6d617Schristos 		{
3802*75f6d617Schristos 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3803*75f6d617Schristos 		    goto normal_char;
3804*75f6d617Schristos 		  else
3805*75f6d617Schristos 		    FREE_STACK_RETURN (REG_ERPAREN);
3806*75f6d617Schristos 		}
3807*75f6d617Schristos 
3808*75f6d617Schristos               /* Since we just checked for an empty stack above, this
3809*75f6d617Schristos                  ``can't happen''.  */
3810*75f6d617Schristos               assert (compile_stack.avail != 0);
3811*75f6d617Schristos               {
3812*75f6d617Schristos                 /* We don't just want to restore into `regnum', because
3813*75f6d617Schristos                    later groups should continue to be numbered higher,
3814*75f6d617Schristos                    as in `(ab)c(de)' -- the second group is #2.  */
3815*75f6d617Schristos                 regnum_t this_group_regnum;
3816*75f6d617Schristos 
3817*75f6d617Schristos                 compile_stack.avail--;
3818*75f6d617Schristos                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3819*75f6d617Schristos                 fixup_alt_jump
3820*75f6d617Schristos                   = COMPILE_STACK_TOP.fixup_alt_jump
3821*75f6d617Schristos                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3822*75f6d617Schristos                     : 0;
3823*75f6d617Schristos                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3824*75f6d617Schristos                 this_group_regnum = COMPILE_STACK_TOP.regnum;
3825*75f6d617Schristos 		/* If we've reached MAX_REGNUM groups, then this open
3826*75f6d617Schristos 		   won't actually generate any code, so we'll have to
3827*75f6d617Schristos 		   clear pending_exact explicitly.  */
3828*75f6d617Schristos 		pending_exact = 0;
3829*75f6d617Schristos 
3830*75f6d617Schristos                 /* We're at the end of the group, so now we know how many
3831*75f6d617Schristos                    groups were inside this one.  */
3832*75f6d617Schristos                 if (this_group_regnum <= MAX_REGNUM)
3833*75f6d617Schristos                   {
3834*75f6d617Schristos 		    UCHAR_T *inner_group_loc
3835*75f6d617Schristos                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3836*75f6d617Schristos 
3837*75f6d617Schristos                     *inner_group_loc = regnum - this_group_regnum;
3838*75f6d617Schristos                     BUF_PUSH_3 (stop_memory, this_group_regnum,
3839*75f6d617Schristos                                 regnum - this_group_regnum);
3840*75f6d617Schristos                   }
3841*75f6d617Schristos               }
3842*75f6d617Schristos               break;
3843*75f6d617Schristos 
3844*75f6d617Schristos 
3845*75f6d617Schristos             case '|':					/* `\|'.  */
3846*75f6d617Schristos               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3847*75f6d617Schristos                 goto normal_backslash;
3848*75f6d617Schristos             handle_alt:
3849*75f6d617Schristos               if (syntax & RE_LIMITED_OPS)
3850*75f6d617Schristos                 goto normal_char;
3851*75f6d617Schristos 
3852*75f6d617Schristos               /* Insert before the previous alternative a jump which
3853*75f6d617Schristos                  jumps to this alternative if the former fails.  */
3854*75f6d617Schristos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3855*75f6d617Schristos               INSERT_JUMP (on_failure_jump, begalt,
3856*75f6d617Schristos 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3857*75f6d617Schristos               pending_exact = 0;
3858*75f6d617Schristos               b += 1 + OFFSET_ADDRESS_SIZE;
3859*75f6d617Schristos 
3860*75f6d617Schristos               /* The alternative before this one has a jump after it
3861*75f6d617Schristos                  which gets executed if it gets matched.  Adjust that
3862*75f6d617Schristos                  jump so it will jump to this alternative's analogous
3863*75f6d617Schristos                  jump (put in below, which in turn will jump to the next
3864*75f6d617Schristos                  (if any) alternative's such jump, etc.).  The last such
3865*75f6d617Schristos                  jump jumps to the correct final destination.  A picture:
3866*75f6d617Schristos                           _____ _____
3867*75f6d617Schristos                           |   | |   |
3868*75f6d617Schristos                           |   v |   v
3869*75f6d617Schristos                          a | b   | c
3870*75f6d617Schristos 
3871*75f6d617Schristos                  If we are at `b', then fixup_alt_jump right now points to a
3872*75f6d617Schristos                  three-byte space after `a'.  We'll put in the jump, set
3873*75f6d617Schristos                  fixup_alt_jump to right after `b', and leave behind three
3874*75f6d617Schristos                  bytes which we'll fill in when we get to after `c'.  */
3875*75f6d617Schristos 
3876*75f6d617Schristos               if (fixup_alt_jump)
3877*75f6d617Schristos                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3878*75f6d617Schristos 
3879*75f6d617Schristos               /* Mark and leave space for a jump after this alternative,
3880*75f6d617Schristos                  to be filled in later either by next alternative or
3881*75f6d617Schristos                  when know we're at the end of a series of alternatives.  */
3882*75f6d617Schristos               fixup_alt_jump = b;
3883*75f6d617Schristos               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3884*75f6d617Schristos               b += 1 + OFFSET_ADDRESS_SIZE;
3885*75f6d617Schristos 
3886*75f6d617Schristos               laststart = 0;
3887*75f6d617Schristos               begalt = b;
3888*75f6d617Schristos               break;
3889*75f6d617Schristos 
3890*75f6d617Schristos 
3891*75f6d617Schristos             case '{':
3892*75f6d617Schristos               /* If \{ is a literal.  */
3893*75f6d617Schristos               if (!(syntax & RE_INTERVALS)
3894*75f6d617Schristos                      /* If we're at `\{' and it's not the open-interval
3895*75f6d617Schristos                         operator.  */
3896*75f6d617Schristos 		  || (syntax & RE_NO_BK_BRACES))
3897*75f6d617Schristos                 goto normal_backslash;
3898*75f6d617Schristos 
3899*75f6d617Schristos             handle_interval:
3900*75f6d617Schristos               {
3901*75f6d617Schristos                 /* If got here, then the syntax allows intervals.  */
3902*75f6d617Schristos 
3903*75f6d617Schristos                 /* At least (most) this many matches must be made.  */
3904*75f6d617Schristos                 int lower_bound = -1, upper_bound = -1;
3905*75f6d617Schristos 
3906*75f6d617Schristos 		/* Place in the uncompiled pattern (i.e., just after
3907*75f6d617Schristos 		   the '{') to go back to if the interval is invalid.  */
3908*75f6d617Schristos 		const CHAR_T *beg_interval = p;
3909*75f6d617Schristos 
3910*75f6d617Schristos                 if (p == pend)
3911*75f6d617Schristos 		  goto invalid_interval;
3912*75f6d617Schristos 
3913*75f6d617Schristos                 GET_UNSIGNED_NUMBER (lower_bound);
3914*75f6d617Schristos 
3915*75f6d617Schristos                 if (c == ',')
3916*75f6d617Schristos                   {
3917*75f6d617Schristos                     GET_UNSIGNED_NUMBER (upper_bound);
3918*75f6d617Schristos 		    if (upper_bound < 0)
3919*75f6d617Schristos 		      upper_bound = RE_DUP_MAX;
3920*75f6d617Schristos                   }
3921*75f6d617Schristos                 else
3922*75f6d617Schristos                   /* Interval such as `{1}' => match exactly once. */
3923*75f6d617Schristos                   upper_bound = lower_bound;
3924*75f6d617Schristos 
3925*75f6d617Schristos                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
3926*75f6d617Schristos 		  goto invalid_interval;
3927*75f6d617Schristos 
3928*75f6d617Schristos                 if (!(syntax & RE_NO_BK_BRACES))
3929*75f6d617Schristos                   {
3930*75f6d617Schristos 		    if (c != '\\' || p == pend)
3931*75f6d617Schristos 		      goto invalid_interval;
3932*75f6d617Schristos                     PATFETCH (c);
3933*75f6d617Schristos                   }
3934*75f6d617Schristos 
3935*75f6d617Schristos                 if (c != '}')
3936*75f6d617Schristos 		  goto invalid_interval;
3937*75f6d617Schristos 
3938*75f6d617Schristos                 /* If it's invalid to have no preceding re.  */
3939*75f6d617Schristos                 if (!laststart)
3940*75f6d617Schristos                   {
3941*75f6d617Schristos 		    if (syntax & RE_CONTEXT_INVALID_OPS
3942*75f6d617Schristos 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3943*75f6d617Schristos                       FREE_STACK_RETURN (REG_BADRPT);
3944*75f6d617Schristos                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3945*75f6d617Schristos                       laststart = b;
3946*75f6d617Schristos                     else
3947*75f6d617Schristos                       goto unfetch_interval;
3948*75f6d617Schristos                   }
3949*75f6d617Schristos 
3950*75f6d617Schristos                 /* We just parsed a valid interval.  */
3951*75f6d617Schristos 
3952*75f6d617Schristos                 if (RE_DUP_MAX < upper_bound)
3953*75f6d617Schristos 		  FREE_STACK_RETURN (REG_BADBR);
3954*75f6d617Schristos 
3955*75f6d617Schristos                 /* If the upper bound is zero, don't want to succeed at
3956*75f6d617Schristos                    all; jump from `laststart' to `b + 3', which will be
3957*75f6d617Schristos 		   the end of the buffer after we insert the jump.  */
3958*75f6d617Schristos 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3959*75f6d617Schristos 		   instead of 'b + 3'.  */
3960*75f6d617Schristos                  if (upper_bound == 0)
3961*75f6d617Schristos                    {
3962*75f6d617Schristos                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3963*75f6d617Schristos                      INSERT_JUMP (jump, laststart, b + 1
3964*75f6d617Schristos 				  + OFFSET_ADDRESS_SIZE);
3965*75f6d617Schristos                      b += 1 + OFFSET_ADDRESS_SIZE;
3966*75f6d617Schristos                    }
3967*75f6d617Schristos 
3968*75f6d617Schristos                  /* Otherwise, we have a nontrivial interval.  When
3969*75f6d617Schristos                     we're all done, the pattern will look like:
3970*75f6d617Schristos                       set_number_at <jump count> <upper bound>
3971*75f6d617Schristos                       set_number_at <succeed_n count> <lower bound>
3972*75f6d617Schristos                       succeed_n <after jump addr> <succeed_n count>
3973*75f6d617Schristos                       <body of loop>
3974*75f6d617Schristos                       jump_n <succeed_n addr> <jump count>
3975*75f6d617Schristos                     (The upper bound and `jump_n' are omitted if
3976*75f6d617Schristos                     `upper_bound' is 1, though.)  */
3977*75f6d617Schristos                  else
3978*75f6d617Schristos                    { /* If the upper bound is > 1, we need to insert
3979*75f6d617Schristos                         more at the end of the loop.  */
3980*75f6d617Schristos                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3981*75f6d617Schristos 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3982*75f6d617Schristos 
3983*75f6d617Schristos                      GET_BUFFER_SPACE (nbytes);
3984*75f6d617Schristos 
3985*75f6d617Schristos                      /* Initialize lower bound of the `succeed_n', even
3986*75f6d617Schristos                         though it will be set during matching by its
3987*75f6d617Schristos                         attendant `set_number_at' (inserted next),
3988*75f6d617Schristos                         because `re_compile_fastmap' needs to know.
3989*75f6d617Schristos                         Jump to the `jump_n' we might insert below.  */
3990*75f6d617Schristos                      INSERT_JUMP2 (succeed_n, laststart,
3991*75f6d617Schristos                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
3992*75f6d617Schristos 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3993*75f6d617Schristos 				   , lower_bound);
3994*75f6d617Schristos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3995*75f6d617Schristos 
3996*75f6d617Schristos                      /* Code to initialize the lower bound.  Insert
3997*75f6d617Schristos                         before the `succeed_n'.  The `5' is the last two
3998*75f6d617Schristos                         bytes of this `set_number_at', plus 3 bytes of
3999*75f6d617Schristos                         the following `succeed_n'.  */
4000*75f6d617Schristos 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
4001*75f6d617Schristos 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
4002*75f6d617Schristos 			of the following `succeed_n'.  */
4003*75f6d617Schristos                      PREFIX(insert_op2) (set_number_at, laststart, 1
4004*75f6d617Schristos 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
4005*75f6d617Schristos                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4006*75f6d617Schristos 
4007*75f6d617Schristos                      if (upper_bound > 1)
4008*75f6d617Schristos                        { /* More than one repetition is allowed, so
4009*75f6d617Schristos                             append a backward jump to the `succeed_n'
4010*75f6d617Schristos                             that starts this interval.
4011*75f6d617Schristos 
4012*75f6d617Schristos                             When we've reached this during matching,
4013*75f6d617Schristos                             we'll have matched the interval once, so
4014*75f6d617Schristos                             jump back only `upper_bound - 1' times.  */
4015*75f6d617Schristos                          STORE_JUMP2 (jump_n, b, laststart
4016*75f6d617Schristos 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
4017*75f6d617Schristos                                       upper_bound - 1);
4018*75f6d617Schristos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4019*75f6d617Schristos 
4020*75f6d617Schristos                          /* The location we want to set is the second
4021*75f6d617Schristos                             parameter of the `jump_n'; that is `b-2' as
4022*75f6d617Schristos                             an absolute address.  `laststart' will be
4023*75f6d617Schristos                             the `set_number_at' we're about to insert;
4024*75f6d617Schristos                             `laststart+3' the number to set, the source
4025*75f6d617Schristos                             for the relative address.  But we are
4026*75f6d617Schristos                             inserting into the middle of the pattern --
4027*75f6d617Schristos                             so everything is getting moved up by 5.
4028*75f6d617Schristos                             Conclusion: (b - 2) - (laststart + 3) + 5,
4029*75f6d617Schristos                             i.e., b - laststart.
4030*75f6d617Schristos 
4031*75f6d617Schristos                             We insert this at the beginning of the loop
4032*75f6d617Schristos                             so that if we fail during matching, we'll
4033*75f6d617Schristos                             reinitialize the bounds.  */
4034*75f6d617Schristos                          PREFIX(insert_op2) (set_number_at, laststart,
4035*75f6d617Schristos 					     b - laststart,
4036*75f6d617Schristos 					     upper_bound - 1, b);
4037*75f6d617Schristos                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4038*75f6d617Schristos                        }
4039*75f6d617Schristos                    }
4040*75f6d617Schristos                 pending_exact = 0;
4041*75f6d617Schristos 		break;
4042*75f6d617Schristos 
4043*75f6d617Schristos 	      invalid_interval:
4044*75f6d617Schristos 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
4045*75f6d617Schristos 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
4046*75f6d617Schristos 	      unfetch_interval:
4047*75f6d617Schristos 		/* Match the characters as literals.  */
4048*75f6d617Schristos 		p = beg_interval;
4049*75f6d617Schristos 		c = '{';
4050*75f6d617Schristos 		if (syntax & RE_NO_BK_BRACES)
4051*75f6d617Schristos 		  goto normal_char;
4052*75f6d617Schristos 		else
4053*75f6d617Schristos 		  goto normal_backslash;
4054*75f6d617Schristos 	      }
4055*75f6d617Schristos 
4056*75f6d617Schristos #ifdef emacs
4057*75f6d617Schristos             /* There is no way to specify the before_dot and after_dot
4058*75f6d617Schristos                operators.  rms says this is ok.  --karl  */
4059*75f6d617Schristos             case '=':
4060*75f6d617Schristos               BUF_PUSH (at_dot);
4061*75f6d617Schristos               break;
4062*75f6d617Schristos 
4063*75f6d617Schristos             case 's':
4064*75f6d617Schristos               laststart = b;
4065*75f6d617Schristos               PATFETCH (c);
4066*75f6d617Schristos               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4067*75f6d617Schristos               break;
4068*75f6d617Schristos 
4069*75f6d617Schristos             case 'S':
4070*75f6d617Schristos               laststart = b;
4071*75f6d617Schristos               PATFETCH (c);
4072*75f6d617Schristos               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4073*75f6d617Schristos               break;
4074*75f6d617Schristos #endif /* emacs */
4075*75f6d617Schristos 
4076*75f6d617Schristos 
4077*75f6d617Schristos             case 'w':
4078*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4079*75f6d617Schristos 		goto normal_char;
4080*75f6d617Schristos               laststart = b;
4081*75f6d617Schristos               BUF_PUSH (wordchar);
4082*75f6d617Schristos               break;
4083*75f6d617Schristos 
4084*75f6d617Schristos 
4085*75f6d617Schristos             case 'W':
4086*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4087*75f6d617Schristos 		goto normal_char;
4088*75f6d617Schristos               laststart = b;
4089*75f6d617Schristos               BUF_PUSH (notwordchar);
4090*75f6d617Schristos               break;
4091*75f6d617Schristos 
4092*75f6d617Schristos 
4093*75f6d617Schristos             case '<':
4094*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4095*75f6d617Schristos 		goto normal_char;
4096*75f6d617Schristos               BUF_PUSH (wordbeg);
4097*75f6d617Schristos               break;
4098*75f6d617Schristos 
4099*75f6d617Schristos             case '>':
4100*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4101*75f6d617Schristos 		goto normal_char;
4102*75f6d617Schristos               BUF_PUSH (wordend);
4103*75f6d617Schristos               break;
4104*75f6d617Schristos 
4105*75f6d617Schristos             case 'b':
4106*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4107*75f6d617Schristos 		goto normal_char;
4108*75f6d617Schristos               BUF_PUSH (wordbound);
4109*75f6d617Schristos               break;
4110*75f6d617Schristos 
4111*75f6d617Schristos             case 'B':
4112*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4113*75f6d617Schristos 		goto normal_char;
4114*75f6d617Schristos               BUF_PUSH (notwordbound);
4115*75f6d617Schristos               break;
4116*75f6d617Schristos 
4117*75f6d617Schristos             case '`':
4118*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4119*75f6d617Schristos 		goto normal_char;
4120*75f6d617Schristos               BUF_PUSH (begbuf);
4121*75f6d617Schristos               break;
4122*75f6d617Schristos 
4123*75f6d617Schristos             case '\'':
4124*75f6d617Schristos 	      if (syntax & RE_NO_GNU_OPS)
4125*75f6d617Schristos 		goto normal_char;
4126*75f6d617Schristos               BUF_PUSH (endbuf);
4127*75f6d617Schristos               break;
4128*75f6d617Schristos 
4129*75f6d617Schristos             case '1': case '2': case '3': case '4': case '5':
4130*75f6d617Schristos             case '6': case '7': case '8': case '9':
4131*75f6d617Schristos               if (syntax & RE_NO_BK_REFS)
4132*75f6d617Schristos                 goto normal_char;
4133*75f6d617Schristos 
4134*75f6d617Schristos               c1 = c - '0';
4135*75f6d617Schristos 
4136*75f6d617Schristos               if (c1 > regnum)
4137*75f6d617Schristos                 FREE_STACK_RETURN (REG_ESUBREG);
4138*75f6d617Schristos 
4139*75f6d617Schristos               /* Can't back reference to a subexpression if inside of it.  */
4140*75f6d617Schristos               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4141*75f6d617Schristos                 goto normal_char;
4142*75f6d617Schristos 
4143*75f6d617Schristos               laststart = b;
4144*75f6d617Schristos               BUF_PUSH_2 (duplicate, c1);
4145*75f6d617Schristos               break;
4146*75f6d617Schristos 
4147*75f6d617Schristos 
4148*75f6d617Schristos             case '+':
4149*75f6d617Schristos             case '?':
4150*75f6d617Schristos               if (syntax & RE_BK_PLUS_QM)
4151*75f6d617Schristos                 goto handle_plus;
4152*75f6d617Schristos               else
4153*75f6d617Schristos                 goto normal_backslash;
4154*75f6d617Schristos 
4155*75f6d617Schristos             default:
4156*75f6d617Schristos             normal_backslash:
4157*75f6d617Schristos               /* You might think it would be useful for \ to mean
4158*75f6d617Schristos                  not to translate; but if we don't translate it
4159*75f6d617Schristos                  it will never match anything.  */
4160*75f6d617Schristos               c = TRANSLATE (c);
4161*75f6d617Schristos               goto normal_char;
4162*75f6d617Schristos             }
4163*75f6d617Schristos           break;
4164*75f6d617Schristos 
4165*75f6d617Schristos 
4166*75f6d617Schristos 	default:
4167*75f6d617Schristos         /* Expects the character in `c'.  */
4168*75f6d617Schristos 	normal_char:
4169*75f6d617Schristos 	      /* If no exactn currently being built.  */
4170*75f6d617Schristos           if (!pending_exact
4171*75f6d617Schristos #ifdef WCHAR
4172*75f6d617Schristos 	      /* If last exactn handle binary(or character) and
4173*75f6d617Schristos 		 new exactn handle character(or binary).  */
4174*75f6d617Schristos 	      || is_exactn_bin != is_binary[p - 1 - pattern]
4175*75f6d617Schristos #endif /* WCHAR */
4176*75f6d617Schristos 
4177*75f6d617Schristos               /* If last exactn not at current position.  */
4178*75f6d617Schristos               || pending_exact + *pending_exact + 1 != b
4179*75f6d617Schristos 
4180*75f6d617Schristos               /* We have only one byte following the exactn for the count.  */
4181*75f6d617Schristos 	      || *pending_exact == (1 << BYTEWIDTH) - 1
4182*75f6d617Schristos 
4183*75f6d617Schristos               /* If followed by a repetition operator.  */
4184*75f6d617Schristos               || *p == '*' || *p == '^'
4185*75f6d617Schristos 	      || ((syntax & RE_BK_PLUS_QM)
4186*75f6d617Schristos 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4187*75f6d617Schristos 		  : (*p == '+' || *p == '?'))
4188*75f6d617Schristos 	      || ((syntax & RE_INTERVALS)
4189*75f6d617Schristos                   && ((syntax & RE_NO_BK_BRACES)
4190*75f6d617Schristos 		      ? *p == '{'
4191*75f6d617Schristos                       : (p[0] == '\\' && p[1] == '{'))))
4192*75f6d617Schristos 	    {
4193*75f6d617Schristos 	      /* Start building a new exactn.  */
4194*75f6d617Schristos 
4195*75f6d617Schristos               laststart = b;
4196*75f6d617Schristos 
4197*75f6d617Schristos #ifdef WCHAR
4198*75f6d617Schristos 	      /* Is this exactn binary data or character? */
4199*75f6d617Schristos 	      is_exactn_bin = is_binary[p - 1 - pattern];
4200*75f6d617Schristos 	      if (is_exactn_bin)
4201*75f6d617Schristos 		  BUF_PUSH_2 (exactn_bin, 0);
4202*75f6d617Schristos 	      else
4203*75f6d617Schristos 		  BUF_PUSH_2 (exactn, 0);
4204*75f6d617Schristos #else
4205*75f6d617Schristos 	      BUF_PUSH_2 (exactn, 0);
4206*75f6d617Schristos #endif /* WCHAR */
4207*75f6d617Schristos 	      pending_exact = b - 1;
4208*75f6d617Schristos             }
4209*75f6d617Schristos 
4210*75f6d617Schristos 	  BUF_PUSH (c);
4211*75f6d617Schristos           (*pending_exact)++;
4212*75f6d617Schristos 	  break;
4213*75f6d617Schristos         } /* switch (c) */
4214*75f6d617Schristos     } /* while p != pend */
4215*75f6d617Schristos 
4216*75f6d617Schristos 
4217*75f6d617Schristos   /* Through the pattern now.  */
4218*75f6d617Schristos 
4219*75f6d617Schristos   if (fixup_alt_jump)
4220*75f6d617Schristos     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4221*75f6d617Schristos 
4222*75f6d617Schristos   if (!COMPILE_STACK_EMPTY)
4223*75f6d617Schristos     FREE_STACK_RETURN (REG_EPAREN);
4224*75f6d617Schristos 
4225*75f6d617Schristos   /* If we don't want backtracking, force success
4226*75f6d617Schristos      the first time we reach the end of the compiled pattern.  */
4227*75f6d617Schristos   if (syntax & RE_NO_POSIX_BACKTRACKING)
4228*75f6d617Schristos     BUF_PUSH (succeed);
4229*75f6d617Schristos 
4230*75f6d617Schristos #ifdef WCHAR
4231*75f6d617Schristos   free (pattern);
4232*75f6d617Schristos   free (mbs_offset);
4233*75f6d617Schristos   free (is_binary);
4234*75f6d617Schristos #endif
4235*75f6d617Schristos   free (compile_stack.stack);
4236*75f6d617Schristos 
4237*75f6d617Schristos   /* We have succeeded; set the length of the buffer.  */
4238*75f6d617Schristos #ifdef WCHAR
4239*75f6d617Schristos   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4240*75f6d617Schristos #else
4241*75f6d617Schristos   bufp->used = b - bufp->buffer;
4242*75f6d617Schristos #endif
4243*75f6d617Schristos 
4244*75f6d617Schristos #ifdef DEBUG
4245*75f6d617Schristos   if (debug)
4246*75f6d617Schristos     {
4247*75f6d617Schristos       DEBUG_PRINT1 ("\nCompiled pattern: \n");
4248*75f6d617Schristos       PREFIX(print_compiled_pattern) (bufp);
4249*75f6d617Schristos     }
4250*75f6d617Schristos #endif /* DEBUG */
4251*75f6d617Schristos 
4252*75f6d617Schristos #ifndef MATCH_MAY_ALLOCATE
4253*75f6d617Schristos   /* Initialize the failure stack to the largest possible stack.  This
4254*75f6d617Schristos      isn't necessary unless we're trying to avoid calling alloca in
4255*75f6d617Schristos      the search and match routines.  */
4256*75f6d617Schristos   {
4257*75f6d617Schristos     int num_regs = bufp->re_nsub + 1;
4258*75f6d617Schristos 
4259*75f6d617Schristos     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4260*75f6d617Schristos        is strictly greater than re_max_failures, the largest possible stack
4261*75f6d617Schristos        is 2 * re_max_failures failure points.  */
4262*75f6d617Schristos     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4263*75f6d617Schristos       {
4264*75f6d617Schristos 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4265*75f6d617Schristos 
4266*75f6d617Schristos # ifdef emacs
4267*75f6d617Schristos 	if (! fail_stack.stack)
4268*75f6d617Schristos 	  fail_stack.stack
4269*75f6d617Schristos 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4270*75f6d617Schristos 				    * sizeof (PREFIX(fail_stack_elt_t)));
4271*75f6d617Schristos 	else
4272*75f6d617Schristos 	  fail_stack.stack
4273*75f6d617Schristos 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4274*75f6d617Schristos 				     (fail_stack.size
4275*75f6d617Schristos 				      * sizeof (PREFIX(fail_stack_elt_t))));
4276*75f6d617Schristos # else /* not emacs */
4277*75f6d617Schristos 	if (! fail_stack.stack)
4278*75f6d617Schristos 	  fail_stack.stack
4279*75f6d617Schristos 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4280*75f6d617Schristos 				   * sizeof (PREFIX(fail_stack_elt_t)));
4281*75f6d617Schristos 	else
4282*75f6d617Schristos 	  fail_stack.stack
4283*75f6d617Schristos 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4284*75f6d617Schristos 					    (fail_stack.size
4285*75f6d617Schristos 				     * sizeof (PREFIX(fail_stack_elt_t))));
4286*75f6d617Schristos # endif /* not emacs */
4287*75f6d617Schristos       }
4288*75f6d617Schristos 
4289*75f6d617Schristos    PREFIX(regex_grow_registers) (num_regs);
4290*75f6d617Schristos   }
4291*75f6d617Schristos #endif /* not MATCH_MAY_ALLOCATE */
4292*75f6d617Schristos 
4293*75f6d617Schristos   return REG_NOERROR;
4294*75f6d617Schristos } /* regex_compile */
4295*75f6d617Schristos 
4296*75f6d617Schristos /* Subroutines for `regex_compile'.  */
4297*75f6d617Schristos 
4298*75f6d617Schristos /* Store OP at LOC followed by two-byte integer parameter ARG.  */
4299*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4300*75f6d617Schristos 
4301*75f6d617Schristos static void
4302*75f6d617Schristos PREFIX(store_op1) (op, loc, arg)
4303*75f6d617Schristos     re_opcode_t op;
4304*75f6d617Schristos     UCHAR_T *loc;
4305*75f6d617Schristos     int arg;
4306*75f6d617Schristos {
4307*75f6d617Schristos   *loc = (UCHAR_T) op;
4308*75f6d617Schristos   STORE_NUMBER (loc + 1, arg);
4309*75f6d617Schristos }
4310*75f6d617Schristos 
4311*75f6d617Schristos 
4312*75f6d617Schristos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4313*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4314*75f6d617Schristos 
4315*75f6d617Schristos static void
4316*75f6d617Schristos PREFIX(store_op2) (op, loc, arg1, arg2)
4317*75f6d617Schristos     re_opcode_t op;
4318*75f6d617Schristos     UCHAR_T *loc;
4319*75f6d617Schristos     int arg1, arg2;
4320*75f6d617Schristos {
4321*75f6d617Schristos   *loc = (UCHAR_T) op;
4322*75f6d617Schristos   STORE_NUMBER (loc + 1, arg1);
4323*75f6d617Schristos   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4324*75f6d617Schristos }
4325*75f6d617Schristos 
4326*75f6d617Schristos 
4327*75f6d617Schristos /* Copy the bytes from LOC to END to open up three bytes of space at LOC
4328*75f6d617Schristos    for OP followed by two-byte integer parameter ARG.  */
4329*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4330*75f6d617Schristos 
4331*75f6d617Schristos static void
4332*75f6d617Schristos PREFIX(insert_op1) (op, loc, arg, end)
4333*75f6d617Schristos     re_opcode_t op;
4334*75f6d617Schristos     UCHAR_T *loc;
4335*75f6d617Schristos     int arg;
4336*75f6d617Schristos     UCHAR_T *end;
4337*75f6d617Schristos {
4338*75f6d617Schristos   register UCHAR_T *pfrom = end;
4339*75f6d617Schristos   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4340*75f6d617Schristos 
4341*75f6d617Schristos   while (pfrom != loc)
4342*75f6d617Schristos     *--pto = *--pfrom;
4343*75f6d617Schristos 
4344*75f6d617Schristos   PREFIX(store_op1) (op, loc, arg);
4345*75f6d617Schristos }
4346*75f6d617Schristos 
4347*75f6d617Schristos 
4348*75f6d617Schristos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4349*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4350*75f6d617Schristos 
4351*75f6d617Schristos static void
4352*75f6d617Schristos PREFIX(insert_op2) (op, loc, arg1, arg2, end)
4353*75f6d617Schristos     re_opcode_t op;
4354*75f6d617Schristos     UCHAR_T *loc;
4355*75f6d617Schristos     int arg1, arg2;
4356*75f6d617Schristos     UCHAR_T *end;
4357*75f6d617Schristos {
4358*75f6d617Schristos   register UCHAR_T *pfrom = end;
4359*75f6d617Schristos   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4360*75f6d617Schristos 
4361*75f6d617Schristos   while (pfrom != loc)
4362*75f6d617Schristos     *--pto = *--pfrom;
4363*75f6d617Schristos 
4364*75f6d617Schristos   PREFIX(store_op2) (op, loc, arg1, arg2);
4365*75f6d617Schristos }
4366*75f6d617Schristos 
4367*75f6d617Schristos 
4368*75f6d617Schristos /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4369*75f6d617Schristos    after an alternative or a begin-subexpression.  We assume there is at
4370*75f6d617Schristos    least one character before the ^.  */
4371*75f6d617Schristos 
4372*75f6d617Schristos static boolean
4373*75f6d617Schristos PREFIX(at_begline_loc_p) (pattern, p, syntax)
4374*75f6d617Schristos     const CHAR_T *pattern, *p;
4375*75f6d617Schristos     reg_syntax_t syntax;
4376*75f6d617Schristos {
4377*75f6d617Schristos   const CHAR_T *prev = p - 2;
4378*75f6d617Schristos   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4379*75f6d617Schristos 
4380*75f6d617Schristos   return
4381*75f6d617Schristos        /* After a subexpression?  */
4382*75f6d617Schristos        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4383*75f6d617Schristos        /* After an alternative?  */
4384*75f6d617Schristos     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4385*75f6d617Schristos }
4386*75f6d617Schristos 
4387*75f6d617Schristos 
4388*75f6d617Schristos /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4389*75f6d617Schristos    at least one character after the $, i.e., `P < PEND'.  */
4390*75f6d617Schristos 
4391*75f6d617Schristos static boolean
4392*75f6d617Schristos PREFIX(at_endline_loc_p) (p, pend, syntax)
4393*75f6d617Schristos     const CHAR_T *p, *pend;
4394*75f6d617Schristos     reg_syntax_t syntax;
4395*75f6d617Schristos {
4396*75f6d617Schristos   const CHAR_T *next = p;
4397*75f6d617Schristos   boolean next_backslash = *next == '\\';
4398*75f6d617Schristos   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4399*75f6d617Schristos 
4400*75f6d617Schristos   return
4401*75f6d617Schristos        /* Before a subexpression?  */
4402*75f6d617Schristos        (syntax & RE_NO_BK_PARENS ? *next == ')'
4403*75f6d617Schristos         : next_backslash && next_next && *next_next == ')')
4404*75f6d617Schristos        /* Before an alternative?  */
4405*75f6d617Schristos     || (syntax & RE_NO_BK_VBAR ? *next == '|'
4406*75f6d617Schristos         : next_backslash && next_next && *next_next == '|');
4407*75f6d617Schristos }
4408*75f6d617Schristos 
4409*75f6d617Schristos #else /* not INSIDE_RECURSION */
4410*75f6d617Schristos 
4411*75f6d617Schristos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4412*75f6d617Schristos    false if it's not.  */
4413*75f6d617Schristos 
4414*75f6d617Schristos static boolean
group_in_compile_stack(compile_stack,regnum)4415*75f6d617Schristos group_in_compile_stack (compile_stack, regnum)
4416*75f6d617Schristos     compile_stack_type compile_stack;
4417*75f6d617Schristos     regnum_t regnum;
4418*75f6d617Schristos {
4419*75f6d617Schristos   int this_element;
4420*75f6d617Schristos 
4421*75f6d617Schristos   for (this_element = compile_stack.avail - 1;
4422*75f6d617Schristos        this_element >= 0;
4423*75f6d617Schristos        this_element--)
4424*75f6d617Schristos     if (compile_stack.stack[this_element].regnum == regnum)
4425*75f6d617Schristos       return true;
4426*75f6d617Schristos 
4427*75f6d617Schristos   return false;
4428*75f6d617Schristos }
4429*75f6d617Schristos #endif /* not INSIDE_RECURSION */
4430*75f6d617Schristos 
4431*75f6d617Schristos #ifdef INSIDE_RECURSION
4432*75f6d617Schristos 
4433*75f6d617Schristos #ifdef WCHAR
4434*75f6d617Schristos /* This insert space, which size is "num", into the pattern at "loc".
4435*75f6d617Schristos    "end" must point the end of the allocated buffer.  */
4436*75f6d617Schristos static void
insert_space(num,loc,end)4437*75f6d617Schristos insert_space (num, loc, end)
4438*75f6d617Schristos      int num;
4439*75f6d617Schristos      CHAR_T *loc;
4440*75f6d617Schristos      CHAR_T *end;
4441*75f6d617Schristos {
4442*75f6d617Schristos   register CHAR_T *pto = end;
4443*75f6d617Schristos   register CHAR_T *pfrom = end - num;
4444*75f6d617Schristos 
4445*75f6d617Schristos   while (pfrom >= loc)
4446*75f6d617Schristos     *pto-- = *pfrom--;
4447*75f6d617Schristos }
4448*75f6d617Schristos #endif /* WCHAR */
4449*75f6d617Schristos 
4450*75f6d617Schristos #ifdef WCHAR
4451*75f6d617Schristos static reg_errcode_t
wcs_compile_range(range_start_char,p_ptr,pend,translate,syntax,b,char_set)4452*75f6d617Schristos wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
4453*75f6d617Schristos 		   char_set)
4454*75f6d617Schristos      CHAR_T range_start_char;
4455*75f6d617Schristos      const CHAR_T **p_ptr, *pend;
4456*75f6d617Schristos      CHAR_T *char_set, *b;
4457*75f6d617Schristos      RE_TRANSLATE_TYPE translate;
4458*75f6d617Schristos      reg_syntax_t syntax;
4459*75f6d617Schristos {
4460*75f6d617Schristos   const CHAR_T *p = *p_ptr;
4461*75f6d617Schristos   CHAR_T range_start, range_end;
4462*75f6d617Schristos   reg_errcode_t ret;
4463*75f6d617Schristos # ifdef _LIBC
4464*75f6d617Schristos   uint32_t nrules;
4465*75f6d617Schristos   uint32_t start_val, end_val;
4466*75f6d617Schristos # endif
4467*75f6d617Schristos   if (p == pend)
4468*75f6d617Schristos     return REG_ERANGE;
4469*75f6d617Schristos 
4470*75f6d617Schristos # ifdef _LIBC
4471*75f6d617Schristos   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4472*75f6d617Schristos   if (nrules != 0)
4473*75f6d617Schristos     {
4474*75f6d617Schristos       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4475*75f6d617Schristos 						       _NL_COLLATE_COLLSEQWC);
4476*75f6d617Schristos       const unsigned char *extra = (const unsigned char *)
4477*75f6d617Schristos 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4478*75f6d617Schristos 
4479*75f6d617Schristos       if (range_start_char < -1)
4480*75f6d617Schristos 	{
4481*75f6d617Schristos 	  /* range_start is a collating symbol.  */
4482*75f6d617Schristos 	  int32_t *wextra;
4483*75f6d617Schristos 	  /* Retreive the index and get collation sequence value.  */
4484*75f6d617Schristos 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4485*75f6d617Schristos 	  start_val = wextra[1 + *wextra];
4486*75f6d617Schristos 	}
4487*75f6d617Schristos       else
4488*75f6d617Schristos 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4489*75f6d617Schristos 
4490*75f6d617Schristos       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4491*75f6d617Schristos 
4492*75f6d617Schristos       /* Report an error if the range is empty and the syntax prohibits
4493*75f6d617Schristos 	 this.  */
4494*75f6d617Schristos       ret = ((syntax & RE_NO_EMPTY_RANGES)
4495*75f6d617Schristos 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4496*75f6d617Schristos 
4497*75f6d617Schristos       /* Insert space to the end of the char_ranges.  */
4498*75f6d617Schristos       insert_space(2, b - char_set[5] - 2, b - 1);
4499*75f6d617Schristos       *(b - char_set[5] - 2) = (wchar_t)start_val;
4500*75f6d617Schristos       *(b - char_set[5] - 1) = (wchar_t)end_val;
4501*75f6d617Schristos       char_set[4]++; /* ranges_index */
4502*75f6d617Schristos     }
4503*75f6d617Schristos   else
4504*75f6d617Schristos # endif
4505*75f6d617Schristos     {
4506*75f6d617Schristos       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4507*75f6d617Schristos 	range_start_char;
4508*75f6d617Schristos       range_end = TRANSLATE (p[0]);
4509*75f6d617Schristos       /* Report an error if the range is empty and the syntax prohibits
4510*75f6d617Schristos 	 this.  */
4511*75f6d617Schristos       ret = ((syntax & RE_NO_EMPTY_RANGES)
4512*75f6d617Schristos 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4513*75f6d617Schristos 
4514*75f6d617Schristos       /* Insert space to the end of the char_ranges.  */
4515*75f6d617Schristos       insert_space(2, b - char_set[5] - 2, b - 1);
4516*75f6d617Schristos       *(b - char_set[5] - 2) = range_start;
4517*75f6d617Schristos       *(b - char_set[5] - 1) = range_end;
4518*75f6d617Schristos       char_set[4]++; /* ranges_index */
4519*75f6d617Schristos     }
4520*75f6d617Schristos   /* Have to increment the pointer into the pattern string, so the
4521*75f6d617Schristos      caller isn't still at the ending character.  */
4522*75f6d617Schristos   (*p_ptr)++;
4523*75f6d617Schristos 
4524*75f6d617Schristos   return ret;
4525*75f6d617Schristos }
4526*75f6d617Schristos #else /* BYTE */
4527*75f6d617Schristos /* Read the ending character of a range (in a bracket expression) from the
4528*75f6d617Schristos    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4529*75f6d617Schristos    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4530*75f6d617Schristos    Then we set the translation of all bits between the starting and
4531*75f6d617Schristos    ending characters (inclusive) in the compiled pattern B.
4532*75f6d617Schristos 
4533*75f6d617Schristos    Return an error code.
4534*75f6d617Schristos 
4535*75f6d617Schristos    We use these short variable names so we can use the same macros as
4536*75f6d617Schristos    `regex_compile' itself.  */
4537*75f6d617Schristos 
4538*75f6d617Schristos static reg_errcode_t
byte_compile_range(range_start_char,p_ptr,pend,translate,syntax,b)4539*75f6d617Schristos byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
4540*75f6d617Schristos      unsigned int range_start_char;
4541*75f6d617Schristos      const char **p_ptr, *pend;
4542*75f6d617Schristos      RE_TRANSLATE_TYPE translate;
4543*75f6d617Schristos      reg_syntax_t syntax;
4544*75f6d617Schristos      unsigned char *b;
4545*75f6d617Schristos {
4546*75f6d617Schristos   unsigned this_char;
4547*75f6d617Schristos   const char *p = *p_ptr;
4548*75f6d617Schristos   reg_errcode_t ret;
4549*75f6d617Schristos # if _LIBC
4550*75f6d617Schristos   const unsigned char *collseq;
4551*75f6d617Schristos   unsigned int start_colseq;
4552*75f6d617Schristos   unsigned int end_colseq;
4553*75f6d617Schristos # else
4554*75f6d617Schristos   unsigned end_char;
4555*75f6d617Schristos # endif
4556*75f6d617Schristos 
4557*75f6d617Schristos   if (p == pend)
4558*75f6d617Schristos     return REG_ERANGE;
4559*75f6d617Schristos 
4560*75f6d617Schristos   /* Have to increment the pointer into the pattern string, so the
4561*75f6d617Schristos      caller isn't still at the ending character.  */
4562*75f6d617Schristos   (*p_ptr)++;
4563*75f6d617Schristos 
4564*75f6d617Schristos   /* Report an error if the range is empty and the syntax prohibits this.  */
4565*75f6d617Schristos   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4566*75f6d617Schristos 
4567*75f6d617Schristos # if _LIBC
4568*75f6d617Schristos   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4569*75f6d617Schristos 						 _NL_COLLATE_COLLSEQMB);
4570*75f6d617Schristos 
4571*75f6d617Schristos   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4572*75f6d617Schristos   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4573*75f6d617Schristos   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4574*75f6d617Schristos     {
4575*75f6d617Schristos       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4576*75f6d617Schristos 
4577*75f6d617Schristos       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4578*75f6d617Schristos 	{
4579*75f6d617Schristos 	  SET_LIST_BIT (TRANSLATE (this_char));
4580*75f6d617Schristos 	  ret = REG_NOERROR;
4581*75f6d617Schristos 	}
4582*75f6d617Schristos     }
4583*75f6d617Schristos # else
4584*75f6d617Schristos   /* Here we see why `this_char' has to be larger than an `unsigned
4585*75f6d617Schristos      char' -- we would otherwise go into an infinite loop, since all
4586*75f6d617Schristos      characters <= 0xff.  */
4587*75f6d617Schristos   range_start_char = TRANSLATE (range_start_char);
4588*75f6d617Schristos   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4589*75f6d617Schristos      and some compilers cast it to int implicitly, so following for_loop
4590*75f6d617Schristos      may fall to (almost) infinite loop.
4591*75f6d617Schristos      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4592*75f6d617Schristos      To avoid this, we cast p[0] to unsigned int and truncate it.  */
4593*75f6d617Schristos   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4594*75f6d617Schristos 
4595*75f6d617Schristos   for (this_char = range_start_char; this_char <= end_char; ++this_char)
4596*75f6d617Schristos     {
4597*75f6d617Schristos       SET_LIST_BIT (TRANSLATE (this_char));
4598*75f6d617Schristos       ret = REG_NOERROR;
4599*75f6d617Schristos     }
4600*75f6d617Schristos # endif
4601*75f6d617Schristos 
4602*75f6d617Schristos   return ret;
4603*75f6d617Schristos }
4604*75f6d617Schristos #endif /* WCHAR */
4605*75f6d617Schristos 
4606*75f6d617Schristos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4607*75f6d617Schristos    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4608*75f6d617Schristos    characters can start a string that matches the pattern.  This fastmap
4609*75f6d617Schristos    is used by re_search to skip quickly over impossible starting points.
4610*75f6d617Schristos 
4611*75f6d617Schristos    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4612*75f6d617Schristos    area as BUFP->fastmap.
4613*75f6d617Schristos 
4614*75f6d617Schristos    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4615*75f6d617Schristos    the pattern buffer.
4616*75f6d617Schristos 
4617*75f6d617Schristos    Returns 0 if we succeed, -2 if an internal error.   */
4618*75f6d617Schristos 
4619*75f6d617Schristos #ifdef WCHAR
4620*75f6d617Schristos /* local function for re_compile_fastmap.
4621*75f6d617Schristos    truncate wchar_t character to char.  */
4622*75f6d617Schristos static unsigned char truncate_wchar (CHAR_T c);
4623*75f6d617Schristos 
4624*75f6d617Schristos static unsigned char
truncate_wchar(c)4625*75f6d617Schristos truncate_wchar (c)
4626*75f6d617Schristos      CHAR_T c;
4627*75f6d617Schristos {
4628*75f6d617Schristos   unsigned char buf[MB_CUR_MAX];
4629*75f6d617Schristos   mbstate_t state;
4630*75f6d617Schristos   int retval;
4631*75f6d617Schristos   memset (&state, '\0', sizeof (state));
4632*75f6d617Schristos   retval = wcrtomb (buf, c, &state);
4633*75f6d617Schristos   return retval > 0 ? buf[0] : (unsigned char) c;
4634*75f6d617Schristos }
4635*75f6d617Schristos #endif /* WCHAR */
4636*75f6d617Schristos 
4637*75f6d617Schristos static int
4638*75f6d617Schristos PREFIX(re_compile_fastmap) (bufp)
4639*75f6d617Schristos      struct re_pattern_buffer *bufp;
4640*75f6d617Schristos {
4641*75f6d617Schristos   int j, k;
4642*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
4643*75f6d617Schristos   PREFIX(fail_stack_type) fail_stack;
4644*75f6d617Schristos #endif
4645*75f6d617Schristos #ifndef REGEX_MALLOC
4646*75f6d617Schristos   char *destination;
4647*75f6d617Schristos #endif
4648*75f6d617Schristos 
4649*75f6d617Schristos   register char *fastmap = bufp->fastmap;
4650*75f6d617Schristos 
4651*75f6d617Schristos #ifdef WCHAR
4652*75f6d617Schristos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
4653*75f6d617Schristos      pattern to (char*) in regex_compile.  */
4654*75f6d617Schristos   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4655*75f6d617Schristos   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4656*75f6d617Schristos #else /* BYTE */
4657*75f6d617Schristos   UCHAR_T *pattern = bufp->buffer;
4658*75f6d617Schristos   register UCHAR_T *pend = pattern + bufp->used;
4659*75f6d617Schristos #endif /* WCHAR */
4660*75f6d617Schristos   UCHAR_T *p = pattern;
4661*75f6d617Schristos 
4662*75f6d617Schristos #ifdef REL_ALLOC
4663*75f6d617Schristos   /* This holds the pointer to the failure stack, when
4664*75f6d617Schristos      it is allocated relocatably.  */
4665*75f6d617Schristos   fail_stack_elt_t *failure_stack_ptr;
4666*75f6d617Schristos #endif
4667*75f6d617Schristos 
4668*75f6d617Schristos   /* Assume that each path through the pattern can be null until
4669*75f6d617Schristos      proven otherwise.  We set this false at the bottom of switch
4670*75f6d617Schristos      statement, to which we get only if a particular path doesn't
4671*75f6d617Schristos      match the empty string.  */
4672*75f6d617Schristos   boolean path_can_be_null = true;
4673*75f6d617Schristos 
4674*75f6d617Schristos   /* We aren't doing a `succeed_n' to begin with.  */
4675*75f6d617Schristos   boolean succeed_n_p = false;
4676*75f6d617Schristos 
4677*75f6d617Schristos   assert (fastmap != NULL && p != NULL);
4678*75f6d617Schristos 
4679*75f6d617Schristos   INIT_FAIL_STACK ();
4680*75f6d617Schristos   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4681*75f6d617Schristos   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4682*75f6d617Schristos   bufp->can_be_null = 0;
4683*75f6d617Schristos 
4684*75f6d617Schristos   while (1)
4685*75f6d617Schristos     {
4686*75f6d617Schristos       if (p == pend || *p == succeed)
4687*75f6d617Schristos 	{
4688*75f6d617Schristos 	  /* We have reached the (effective) end of pattern.  */
4689*75f6d617Schristos 	  if (!FAIL_STACK_EMPTY ())
4690*75f6d617Schristos 	    {
4691*75f6d617Schristos 	      bufp->can_be_null |= path_can_be_null;
4692*75f6d617Schristos 
4693*75f6d617Schristos 	      /* Reset for next path.  */
4694*75f6d617Schristos 	      path_can_be_null = true;
4695*75f6d617Schristos 
4696*75f6d617Schristos 	      p = fail_stack.stack[--fail_stack.avail].pointer;
4697*75f6d617Schristos 
4698*75f6d617Schristos 	      continue;
4699*75f6d617Schristos 	    }
4700*75f6d617Schristos 	  else
4701*75f6d617Schristos 	    break;
4702*75f6d617Schristos 	}
4703*75f6d617Schristos 
4704*75f6d617Schristos       /* We should never be about to go beyond the end of the pattern.  */
4705*75f6d617Schristos       assert (p < pend);
4706*75f6d617Schristos 
4707*75f6d617Schristos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4708*75f6d617Schristos 	{
4709*75f6d617Schristos 
4710*75f6d617Schristos         /* I guess the idea here is to simply not bother with a fastmap
4711*75f6d617Schristos            if a backreference is used, since it's too hard to figure out
4712*75f6d617Schristos            the fastmap for the corresponding group.  Setting
4713*75f6d617Schristos            `can_be_null' stops `re_search_2' from using the fastmap, so
4714*75f6d617Schristos            that is all we do.  */
4715*75f6d617Schristos 	case duplicate:
4716*75f6d617Schristos 	  bufp->can_be_null = 1;
4717*75f6d617Schristos           goto done;
4718*75f6d617Schristos 
4719*75f6d617Schristos 
4720*75f6d617Schristos       /* Following are the cases which match a character.  These end
4721*75f6d617Schristos          with `break'.  */
4722*75f6d617Schristos 
4723*75f6d617Schristos #ifdef WCHAR
4724*75f6d617Schristos 	case exactn:
4725*75f6d617Schristos           fastmap[truncate_wchar(p[1])] = 1;
4726*75f6d617Schristos 	  break;
4727*75f6d617Schristos #else /* BYTE */
4728*75f6d617Schristos 	case exactn:
4729*75f6d617Schristos           fastmap[p[1]] = 1;
4730*75f6d617Schristos 	  break;
4731*75f6d617Schristos #endif /* WCHAR */
4732*75f6d617Schristos #ifdef MBS_SUPPORT
4733*75f6d617Schristos 	case exactn_bin:
4734*75f6d617Schristos 	  fastmap[p[1]] = 1;
4735*75f6d617Schristos 	  break;
4736*75f6d617Schristos #endif
4737*75f6d617Schristos 
4738*75f6d617Schristos #ifdef WCHAR
4739*75f6d617Schristos         /* It is hard to distinguish fastmap from (multi byte) characters
4740*75f6d617Schristos            which depends on current locale.  */
4741*75f6d617Schristos         case charset:
4742*75f6d617Schristos 	case charset_not:
4743*75f6d617Schristos 	case wordchar:
4744*75f6d617Schristos 	case notwordchar:
4745*75f6d617Schristos           bufp->can_be_null = 1;
4746*75f6d617Schristos           goto done;
4747*75f6d617Schristos #else /* BYTE */
4748*75f6d617Schristos         case charset:
4749*75f6d617Schristos           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4750*75f6d617Schristos 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4751*75f6d617Schristos               fastmap[j] = 1;
4752*75f6d617Schristos 	  break;
4753*75f6d617Schristos 
4754*75f6d617Schristos 
4755*75f6d617Schristos 	case charset_not:
4756*75f6d617Schristos 	  /* Chars beyond end of map must be allowed.  */
4757*75f6d617Schristos 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4758*75f6d617Schristos             fastmap[j] = 1;
4759*75f6d617Schristos 
4760*75f6d617Schristos 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4761*75f6d617Schristos 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4762*75f6d617Schristos               fastmap[j] = 1;
4763*75f6d617Schristos           break;
4764*75f6d617Schristos 
4765*75f6d617Schristos 
4766*75f6d617Schristos 	case wordchar:
4767*75f6d617Schristos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4768*75f6d617Schristos 	    if (SYNTAX (j) == Sword)
4769*75f6d617Schristos 	      fastmap[j] = 1;
4770*75f6d617Schristos 	  break;
4771*75f6d617Schristos 
4772*75f6d617Schristos 
4773*75f6d617Schristos 	case notwordchar:
4774*75f6d617Schristos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4775*75f6d617Schristos 	    if (SYNTAX (j) != Sword)
4776*75f6d617Schristos 	      fastmap[j] = 1;
4777*75f6d617Schristos 	  break;
4778*75f6d617Schristos #endif /* WCHAR */
4779*75f6d617Schristos 
4780*75f6d617Schristos         case anychar:
4781*75f6d617Schristos 	  {
4782*75f6d617Schristos 	    int fastmap_newline = fastmap['\n'];
4783*75f6d617Schristos 
4784*75f6d617Schristos 	    /* `.' matches anything ...  */
4785*75f6d617Schristos 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4786*75f6d617Schristos 	      fastmap[j] = 1;
4787*75f6d617Schristos 
4788*75f6d617Schristos 	    /* ... except perhaps newline.  */
4789*75f6d617Schristos 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4790*75f6d617Schristos 	      fastmap['\n'] = fastmap_newline;
4791*75f6d617Schristos 
4792*75f6d617Schristos 	    /* Return if we have already set `can_be_null'; if we have,
4793*75f6d617Schristos 	       then the fastmap is irrelevant.  Something's wrong here.  */
4794*75f6d617Schristos 	    else if (bufp->can_be_null)
4795*75f6d617Schristos 	      goto done;
4796*75f6d617Schristos 
4797*75f6d617Schristos 	    /* Otherwise, have to check alternative paths.  */
4798*75f6d617Schristos 	    break;
4799*75f6d617Schristos 	  }
4800*75f6d617Schristos 
4801*75f6d617Schristos #ifdef emacs
4802*75f6d617Schristos         case syntaxspec:
4803*75f6d617Schristos 	  k = *p++;
4804*75f6d617Schristos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4805*75f6d617Schristos 	    if (SYNTAX (j) == (enum syntaxcode) k)
4806*75f6d617Schristos 	      fastmap[j] = 1;
4807*75f6d617Schristos 	  break;
4808*75f6d617Schristos 
4809*75f6d617Schristos 
4810*75f6d617Schristos 	case notsyntaxspec:
4811*75f6d617Schristos 	  k = *p++;
4812*75f6d617Schristos 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4813*75f6d617Schristos 	    if (SYNTAX (j) != (enum syntaxcode) k)
4814*75f6d617Schristos 	      fastmap[j] = 1;
4815*75f6d617Schristos 	  break;
4816*75f6d617Schristos 
4817*75f6d617Schristos 
4818*75f6d617Schristos       /* All cases after this match the empty string.  These end with
4819*75f6d617Schristos          `continue'.  */
4820*75f6d617Schristos 
4821*75f6d617Schristos 
4822*75f6d617Schristos 	case before_dot:
4823*75f6d617Schristos 	case at_dot:
4824*75f6d617Schristos 	case after_dot:
4825*75f6d617Schristos           continue;
4826*75f6d617Schristos #endif /* emacs */
4827*75f6d617Schristos 
4828*75f6d617Schristos 
4829*75f6d617Schristos         case no_op:
4830*75f6d617Schristos         case begline:
4831*75f6d617Schristos         case endline:
4832*75f6d617Schristos 	case begbuf:
4833*75f6d617Schristos 	case endbuf:
4834*75f6d617Schristos 	case wordbound:
4835*75f6d617Schristos 	case notwordbound:
4836*75f6d617Schristos 	case wordbeg:
4837*75f6d617Schristos 	case wordend:
4838*75f6d617Schristos         case push_dummy_failure:
4839*75f6d617Schristos           continue;
4840*75f6d617Schristos 
4841*75f6d617Schristos 
4842*75f6d617Schristos 	case jump_n:
4843*75f6d617Schristos         case pop_failure_jump:
4844*75f6d617Schristos 	case maybe_pop_jump:
4845*75f6d617Schristos 	case jump:
4846*75f6d617Schristos         case jump_past_alt:
4847*75f6d617Schristos 	case dummy_failure_jump:
4848*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (j, p);
4849*75f6d617Schristos 	  p += j;
4850*75f6d617Schristos 	  if (j > 0)
4851*75f6d617Schristos 	    continue;
4852*75f6d617Schristos 
4853*75f6d617Schristos           /* Jump backward implies we just went through the body of a
4854*75f6d617Schristos              loop and matched nothing.  Opcode jumped to should be
4855*75f6d617Schristos              `on_failure_jump' or `succeed_n'.  Just treat it like an
4856*75f6d617Schristos              ordinary jump.  For a * loop, it has pushed its failure
4857*75f6d617Schristos              point already; if so, discard that as redundant.  */
4858*75f6d617Schristos           if ((re_opcode_t) *p != on_failure_jump
4859*75f6d617Schristos 	      && (re_opcode_t) *p != succeed_n)
4860*75f6d617Schristos 	    continue;
4861*75f6d617Schristos 
4862*75f6d617Schristos           p++;
4863*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (j, p);
4864*75f6d617Schristos           p += j;
4865*75f6d617Schristos 
4866*75f6d617Schristos           /* If what's on the stack is where we are now, pop it.  */
4867*75f6d617Schristos           if (!FAIL_STACK_EMPTY ()
4868*75f6d617Schristos 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4869*75f6d617Schristos             fail_stack.avail--;
4870*75f6d617Schristos 
4871*75f6d617Schristos           continue;
4872*75f6d617Schristos 
4873*75f6d617Schristos 
4874*75f6d617Schristos         case on_failure_jump:
4875*75f6d617Schristos         case on_failure_keep_string_jump:
4876*75f6d617Schristos 	handle_on_failure_jump:
4877*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (j, p);
4878*75f6d617Schristos 
4879*75f6d617Schristos           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4880*75f6d617Schristos              end of the pattern.  We don't want to push such a point,
4881*75f6d617Schristos              since when we restore it above, entering the switch will
4882*75f6d617Schristos              increment `p' past the end of the pattern.  We don't need
4883*75f6d617Schristos              to push such a point since we obviously won't find any more
4884*75f6d617Schristos              fastmap entries beyond `pend'.  Such a pattern can match
4885*75f6d617Schristos              the null string, though.  */
4886*75f6d617Schristos           if (p + j < pend)
4887*75f6d617Schristos             {
4888*75f6d617Schristos               if (!PUSH_PATTERN_OP (p + j, fail_stack))
4889*75f6d617Schristos 		{
4890*75f6d617Schristos 		  RESET_FAIL_STACK ();
4891*75f6d617Schristos 		  return -2;
4892*75f6d617Schristos 		}
4893*75f6d617Schristos             }
4894*75f6d617Schristos           else
4895*75f6d617Schristos             bufp->can_be_null = 1;
4896*75f6d617Schristos 
4897*75f6d617Schristos           if (succeed_n_p)
4898*75f6d617Schristos             {
4899*75f6d617Schristos               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4900*75f6d617Schristos               succeed_n_p = false;
4901*75f6d617Schristos 	    }
4902*75f6d617Schristos 
4903*75f6d617Schristos           continue;
4904*75f6d617Schristos 
4905*75f6d617Schristos 
4906*75f6d617Schristos 	case succeed_n:
4907*75f6d617Schristos           /* Get to the number of times to succeed.  */
4908*75f6d617Schristos           p += OFFSET_ADDRESS_SIZE;
4909*75f6d617Schristos 
4910*75f6d617Schristos           /* Increment p past the n for when k != 0.  */
4911*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (k, p);
4912*75f6d617Schristos           if (k == 0)
4913*75f6d617Schristos 	    {
4914*75f6d617Schristos               p -= 2 * OFFSET_ADDRESS_SIZE;
4915*75f6d617Schristos   	      succeed_n_p = true;  /* Spaghetti code alert.  */
4916*75f6d617Schristos               goto handle_on_failure_jump;
4917*75f6d617Schristos             }
4918*75f6d617Schristos           continue;
4919*75f6d617Schristos 
4920*75f6d617Schristos 
4921*75f6d617Schristos 	case set_number_at:
4922*75f6d617Schristos           p += 2 * OFFSET_ADDRESS_SIZE;
4923*75f6d617Schristos           continue;
4924*75f6d617Schristos 
4925*75f6d617Schristos 
4926*75f6d617Schristos 	case start_memory:
4927*75f6d617Schristos         case stop_memory:
4928*75f6d617Schristos 	  p += 2;
4929*75f6d617Schristos 	  continue;
4930*75f6d617Schristos 
4931*75f6d617Schristos 
4932*75f6d617Schristos 	default:
4933*75f6d617Schristos           abort (); /* We have listed all the cases.  */
4934*75f6d617Schristos         } /* switch *p++ */
4935*75f6d617Schristos 
4936*75f6d617Schristos       /* Getting here means we have found the possible starting
4937*75f6d617Schristos          characters for one path of the pattern -- and that the empty
4938*75f6d617Schristos          string does not match.  We need not follow this path further.
4939*75f6d617Schristos          Instead, look at the next alternative (remembered on the
4940*75f6d617Schristos          stack), or quit if no more.  The test at the top of the loop
4941*75f6d617Schristos          does these things.  */
4942*75f6d617Schristos       path_can_be_null = false;
4943*75f6d617Schristos       p = pend;
4944*75f6d617Schristos     } /* while p */
4945*75f6d617Schristos 
4946*75f6d617Schristos   /* Set `can_be_null' for the last path (also the first path, if the
4947*75f6d617Schristos      pattern is empty).  */
4948*75f6d617Schristos   bufp->can_be_null |= path_can_be_null;
4949*75f6d617Schristos 
4950*75f6d617Schristos  done:
4951*75f6d617Schristos   RESET_FAIL_STACK ();
4952*75f6d617Schristos   return 0;
4953*75f6d617Schristos }
4954*75f6d617Schristos 
4955*75f6d617Schristos #else /* not INSIDE_RECURSION */
4956*75f6d617Schristos 
4957*75f6d617Schristos int
re_compile_fastmap(bufp)4958*75f6d617Schristos re_compile_fastmap (bufp)
4959*75f6d617Schristos      struct re_pattern_buffer *bufp;
4960*75f6d617Schristos {
4961*75f6d617Schristos # ifdef MBS_SUPPORT
4962*75f6d617Schristos   if (MB_CUR_MAX != 1)
4963*75f6d617Schristos     return wcs_re_compile_fastmap(bufp);
4964*75f6d617Schristos   else
4965*75f6d617Schristos # endif
4966*75f6d617Schristos     return byte_re_compile_fastmap(bufp);
4967*75f6d617Schristos } /* re_compile_fastmap */
4968*75f6d617Schristos #ifdef _LIBC
4969*75f6d617Schristos weak_alias (__re_compile_fastmap, re_compile_fastmap)
4970*75f6d617Schristos #endif
4971*75f6d617Schristos 
4972*75f6d617Schristos 
4973*75f6d617Schristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4974*75f6d617Schristos    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4975*75f6d617Schristos    this memory for recording register information.  STARTS and ENDS
4976*75f6d617Schristos    must be allocated using the malloc library routine, and must each
4977*75f6d617Schristos    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4978*75f6d617Schristos 
4979*75f6d617Schristos    If NUM_REGS == 0, then subsequent matches should allocate their own
4980*75f6d617Schristos    register data.
4981*75f6d617Schristos 
4982*75f6d617Schristos    Unless this function is called, the first search or match using
4983*75f6d617Schristos    PATTERN_BUFFER will allocate its own register data, without
4984*75f6d617Schristos    freeing the old data.  */
4985*75f6d617Schristos 
4986*75f6d617Schristos void
4987*75f6d617Schristos re_set_registers (bufp, regs, num_regs, starts, ends)
4988*75f6d617Schristos     struct re_pattern_buffer *bufp;
4989*75f6d617Schristos     struct re_registers *regs;
4990*75f6d617Schristos     unsigned num_regs;
4991*75f6d617Schristos     regoff_t *starts, *ends;
4992*75f6d617Schristos {
4993*75f6d617Schristos   if (num_regs)
4994*75f6d617Schristos     {
4995*75f6d617Schristos       bufp->regs_allocated = REGS_REALLOCATE;
4996*75f6d617Schristos       regs->num_regs = num_regs;
4997*75f6d617Schristos       regs->start = starts;
4998*75f6d617Schristos       regs->end = ends;
4999*75f6d617Schristos     }
5000*75f6d617Schristos   else
5001*75f6d617Schristos     {
5002*75f6d617Schristos       bufp->regs_allocated = REGS_UNALLOCATED;
5003*75f6d617Schristos       regs->num_regs = 0;
5004*75f6d617Schristos       regs->start = regs->end = (regoff_t *) 0;
5005*75f6d617Schristos     }
5006*75f6d617Schristos }
5007*75f6d617Schristos #ifdef _LIBC
5008*75f6d617Schristos weak_alias (__re_set_registers, re_set_registers)
5009*75f6d617Schristos #endif
5010*75f6d617Schristos 
5011*75f6d617Schristos /* Searching routines.  */
5012*75f6d617Schristos 
5013*75f6d617Schristos /* Like re_search_2, below, but only one string is specified, and
5014*75f6d617Schristos    doesn't let you say where to stop matching.  */
5015*75f6d617Schristos 
5016*75f6d617Schristos int
5017*75f6d617Schristos re_search (bufp, string, size, startpos, range, regs)
5018*75f6d617Schristos      struct re_pattern_buffer *bufp;
5019*75f6d617Schristos      const char *string;
5020*75f6d617Schristos      int size, startpos, range;
5021*75f6d617Schristos      struct re_registers *regs;
5022*75f6d617Schristos {
5023*75f6d617Schristos   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
5024*75f6d617Schristos 		      regs, size);
5025*75f6d617Schristos }
5026*75f6d617Schristos #ifdef _LIBC
5027*75f6d617Schristos weak_alias (__re_search, re_search)
5028*75f6d617Schristos #endif
5029*75f6d617Schristos 
5030*75f6d617Schristos 
5031*75f6d617Schristos /* Using the compiled pattern in BUFP->buffer, first tries to match the
5032*75f6d617Schristos    virtual concatenation of STRING1 and STRING2, starting first at index
5033*75f6d617Schristos    STARTPOS, then at STARTPOS + 1, and so on.
5034*75f6d617Schristos 
5035*75f6d617Schristos    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5036*75f6d617Schristos 
5037*75f6d617Schristos    RANGE is how far to scan while trying to match.  RANGE = 0 means try
5038*75f6d617Schristos    only at STARTPOS; in general, the last start tried is STARTPOS +
5039*75f6d617Schristos    RANGE.
5040*75f6d617Schristos 
5041*75f6d617Schristos    In REGS, return the indices of the virtual concatenation of STRING1
5042*75f6d617Schristos    and STRING2 that matched the entire BUFP->buffer and its contained
5043*75f6d617Schristos    subexpressions.
5044*75f6d617Schristos 
5045*75f6d617Schristos    Do not consider matching one past the index STOP in the virtual
5046*75f6d617Schristos    concatenation of STRING1 and STRING2.
5047*75f6d617Schristos 
5048*75f6d617Schristos    We return either the position in the strings at which the match was
5049*75f6d617Schristos    found, -1 if no match, or -2 if error (such as failure
5050*75f6d617Schristos    stack overflow).  */
5051*75f6d617Schristos 
5052*75f6d617Schristos int
5053*75f6d617Schristos re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
5054*75f6d617Schristos      struct re_pattern_buffer *bufp;
5055*75f6d617Schristos      const char *string1, *string2;
5056*75f6d617Schristos      int size1, size2;
5057*75f6d617Schristos      int startpos;
5058*75f6d617Schristos      int range;
5059*75f6d617Schristos      struct re_registers *regs;
5060*75f6d617Schristos      int stop;
5061*75f6d617Schristos {
5062*75f6d617Schristos # ifdef MBS_SUPPORT
5063*75f6d617Schristos   if (MB_CUR_MAX != 1)
5064*75f6d617Schristos     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5065*75f6d617Schristos 			    range, regs, stop);
5066*75f6d617Schristos   else
5067*75f6d617Schristos # endif
5068*75f6d617Schristos     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5069*75f6d617Schristos 			     range, regs, stop);
5070*75f6d617Schristos } /* re_search_2 */
5071*75f6d617Schristos #ifdef _LIBC
5072*75f6d617Schristos weak_alias (__re_search_2, re_search_2)
5073*75f6d617Schristos #endif
5074*75f6d617Schristos 
5075*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5076*75f6d617Schristos 
5077*75f6d617Schristos #ifdef INSIDE_RECURSION
5078*75f6d617Schristos 
5079*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5080*75f6d617Schristos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
5081*75f6d617Schristos #else
5082*75f6d617Schristos # define FREE_VAR(var) if (var) free (var); var = NULL
5083*75f6d617Schristos #endif
5084*75f6d617Schristos 
5085*75f6d617Schristos #ifdef WCHAR
5086*75f6d617Schristos # define MAX_ALLOCA_SIZE	2000
5087*75f6d617Schristos 
5088*75f6d617Schristos # define FREE_WCS_BUFFERS() \
5089*75f6d617Schristos   do {									      \
5090*75f6d617Schristos     if (size1 > MAX_ALLOCA_SIZE)					      \
5091*75f6d617Schristos       {									      \
5092*75f6d617Schristos 	free (wcs_string1);						      \
5093*75f6d617Schristos 	free (mbs_offset1);						      \
5094*75f6d617Schristos       }									      \
5095*75f6d617Schristos     else								      \
5096*75f6d617Schristos       {									      \
5097*75f6d617Schristos 	FREE_VAR (wcs_string1);						      \
5098*75f6d617Schristos 	FREE_VAR (mbs_offset1);						      \
5099*75f6d617Schristos       }									      \
5100*75f6d617Schristos     if (size2 > MAX_ALLOCA_SIZE) 					      \
5101*75f6d617Schristos       {									      \
5102*75f6d617Schristos 	free (wcs_string2);						      \
5103*75f6d617Schristos 	free (mbs_offset2);						      \
5104*75f6d617Schristos       }									      \
5105*75f6d617Schristos     else								      \
5106*75f6d617Schristos       {									      \
5107*75f6d617Schristos 	FREE_VAR (wcs_string2);						      \
5108*75f6d617Schristos 	FREE_VAR (mbs_offset2);						      \
5109*75f6d617Schristos       }									      \
5110*75f6d617Schristos   } while (0)
5111*75f6d617Schristos 
5112*75f6d617Schristos #endif
5113*75f6d617Schristos 
5114*75f6d617Schristos 
5115*75f6d617Schristos static int
5116*75f6d617Schristos PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
5117*75f6d617Schristos 		     regs, stop)
5118*75f6d617Schristos      struct re_pattern_buffer *bufp;
5119*75f6d617Schristos      const char *string1, *string2;
5120*75f6d617Schristos      int size1, size2;
5121*75f6d617Schristos      int startpos;
5122*75f6d617Schristos      int range;
5123*75f6d617Schristos      struct re_registers *regs;
5124*75f6d617Schristos      int stop;
5125*75f6d617Schristos {
5126*75f6d617Schristos   int val;
5127*75f6d617Schristos   register char *fastmap = bufp->fastmap;
5128*75f6d617Schristos   register RE_TRANSLATE_TYPE translate = bufp->translate;
5129*75f6d617Schristos   int total_size = size1 + size2;
5130*75f6d617Schristos   int endpos = startpos + range;
5131*75f6d617Schristos #ifdef WCHAR
5132*75f6d617Schristos   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5133*75f6d617Schristos   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5134*75f6d617Schristos   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5135*75f6d617Schristos   int wcs_size1 = 0, wcs_size2 = 0;
5136*75f6d617Schristos   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5137*75f6d617Schristos   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5138*75f6d617Schristos   /* They hold whether each wchar_t is binary data or not.  */
5139*75f6d617Schristos   char *is_binary = NULL;
5140*75f6d617Schristos #endif /* WCHAR */
5141*75f6d617Schristos 
5142*75f6d617Schristos   /* Check for out-of-range STARTPOS.  */
5143*75f6d617Schristos   if (startpos < 0 || startpos > total_size)
5144*75f6d617Schristos     return -1;
5145*75f6d617Schristos 
5146*75f6d617Schristos   /* Fix up RANGE if it might eventually take us outside
5147*75f6d617Schristos      the virtual concatenation of STRING1 and STRING2.
5148*75f6d617Schristos      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
5149*75f6d617Schristos   if (endpos < 0)
5150*75f6d617Schristos     range = 0 - startpos;
5151*75f6d617Schristos   else if (endpos > total_size)
5152*75f6d617Schristos     range = total_size - startpos;
5153*75f6d617Schristos 
5154*75f6d617Schristos   /* If the search isn't to be a backwards one, don't waste time in a
5155*75f6d617Schristos      search for a pattern that must be anchored.  */
5156*75f6d617Schristos   if (bufp->used > 0 && range > 0
5157*75f6d617Schristos       && ((re_opcode_t) bufp->buffer[0] == begbuf
5158*75f6d617Schristos 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
5159*75f6d617Schristos 	  || ((re_opcode_t) bufp->buffer[0] == begline
5160*75f6d617Schristos 	      && !bufp->newline_anchor)))
5161*75f6d617Schristos     {
5162*75f6d617Schristos       if (startpos > 0)
5163*75f6d617Schristos 	return -1;
5164*75f6d617Schristos       else
5165*75f6d617Schristos 	range = 1;
5166*75f6d617Schristos     }
5167*75f6d617Schristos 
5168*75f6d617Schristos #ifdef emacs
5169*75f6d617Schristos   /* In a forward search for something that starts with \=.
5170*75f6d617Schristos      don't keep searching past point.  */
5171*75f6d617Schristos   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5172*75f6d617Schristos     {
5173*75f6d617Schristos       range = PT - startpos;
5174*75f6d617Schristos       if (range <= 0)
5175*75f6d617Schristos 	return -1;
5176*75f6d617Schristos     }
5177*75f6d617Schristos #endif /* emacs */
5178*75f6d617Schristos 
5179*75f6d617Schristos   /* Update the fastmap now if not correct already.  */
5180*75f6d617Schristos   if (fastmap && !bufp->fastmap_accurate)
5181*75f6d617Schristos     if (re_compile_fastmap (bufp) == -2)
5182*75f6d617Schristos       return -2;
5183*75f6d617Schristos 
5184*75f6d617Schristos #ifdef WCHAR
5185*75f6d617Schristos   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5186*75f6d617Schristos      fill them with converted string.  */
5187*75f6d617Schristos   if (size1 != 0)
5188*75f6d617Schristos     {
5189*75f6d617Schristos       if (size1 > MAX_ALLOCA_SIZE)
5190*75f6d617Schristos 	{
5191*75f6d617Schristos 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5192*75f6d617Schristos 	  mbs_offset1 = TALLOC (size1 + 1, int);
5193*75f6d617Schristos 	  is_binary = TALLOC (size1 + 1, char);
5194*75f6d617Schristos 	}
5195*75f6d617Schristos       else
5196*75f6d617Schristos 	{
5197*75f6d617Schristos 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5198*75f6d617Schristos 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5199*75f6d617Schristos 	  is_binary = REGEX_TALLOC (size1 + 1, char);
5200*75f6d617Schristos 	}
5201*75f6d617Schristos       if (!wcs_string1 || !mbs_offset1 || !is_binary)
5202*75f6d617Schristos 	{
5203*75f6d617Schristos 	  if (size1 > MAX_ALLOCA_SIZE)
5204*75f6d617Schristos 	    {
5205*75f6d617Schristos 	      free (wcs_string1);
5206*75f6d617Schristos 	      free (mbs_offset1);
5207*75f6d617Schristos 	      free (is_binary);
5208*75f6d617Schristos 	    }
5209*75f6d617Schristos 	  else
5210*75f6d617Schristos 	    {
5211*75f6d617Schristos 	      FREE_VAR (wcs_string1);
5212*75f6d617Schristos 	      FREE_VAR (mbs_offset1);
5213*75f6d617Schristos 	      FREE_VAR (is_binary);
5214*75f6d617Schristos 	    }
5215*75f6d617Schristos 	  return -2;
5216*75f6d617Schristos 	}
5217*75f6d617Schristos       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5218*75f6d617Schristos 				     mbs_offset1, is_binary);
5219*75f6d617Schristos       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
5220*75f6d617Schristos       if (size1 > MAX_ALLOCA_SIZE)
5221*75f6d617Schristos 	free (is_binary);
5222*75f6d617Schristos       else
5223*75f6d617Schristos 	FREE_VAR (is_binary);
5224*75f6d617Schristos     }
5225*75f6d617Schristos   if (size2 != 0)
5226*75f6d617Schristos     {
5227*75f6d617Schristos       if (size2 > MAX_ALLOCA_SIZE)
5228*75f6d617Schristos 	{
5229*75f6d617Schristos 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5230*75f6d617Schristos 	  mbs_offset2 = TALLOC (size2 + 1, int);
5231*75f6d617Schristos 	  is_binary = TALLOC (size2 + 1, char);
5232*75f6d617Schristos 	}
5233*75f6d617Schristos       else
5234*75f6d617Schristos 	{
5235*75f6d617Schristos 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5236*75f6d617Schristos 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5237*75f6d617Schristos 	  is_binary = REGEX_TALLOC (size2 + 1, char);
5238*75f6d617Schristos 	}
5239*75f6d617Schristos       if (!wcs_string2 || !mbs_offset2 || !is_binary)
5240*75f6d617Schristos 	{
5241*75f6d617Schristos 	  FREE_WCS_BUFFERS ();
5242*75f6d617Schristos 	  if (size2 > MAX_ALLOCA_SIZE)
5243*75f6d617Schristos 	    free (is_binary);
5244*75f6d617Schristos 	  else
5245*75f6d617Schristos 	    FREE_VAR (is_binary);
5246*75f6d617Schristos 	  return -2;
5247*75f6d617Schristos 	}
5248*75f6d617Schristos       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5249*75f6d617Schristos 				     mbs_offset2, is_binary);
5250*75f6d617Schristos       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
5251*75f6d617Schristos       if (size2 > MAX_ALLOCA_SIZE)
5252*75f6d617Schristos 	free (is_binary);
5253*75f6d617Schristos       else
5254*75f6d617Schristos 	FREE_VAR (is_binary);
5255*75f6d617Schristos     }
5256*75f6d617Schristos #endif /* WCHAR */
5257*75f6d617Schristos 
5258*75f6d617Schristos 
5259*75f6d617Schristos   /* Loop through the string, looking for a place to start matching.  */
5260*75f6d617Schristos   for (;;)
5261*75f6d617Schristos     {
5262*75f6d617Schristos       /* If a fastmap is supplied, skip quickly over characters that
5263*75f6d617Schristos          cannot be the start of a match.  If the pattern can match the
5264*75f6d617Schristos          null string, however, we don't need to skip characters; we want
5265*75f6d617Schristos          the first null string.  */
5266*75f6d617Schristos       if (fastmap && startpos < total_size && !bufp->can_be_null)
5267*75f6d617Schristos 	{
5268*75f6d617Schristos 	  if (range > 0)	/* Searching forwards.  */
5269*75f6d617Schristos 	    {
5270*75f6d617Schristos 	      register const char *d;
5271*75f6d617Schristos 	      register int lim = 0;
5272*75f6d617Schristos 	      int irange = range;
5273*75f6d617Schristos 
5274*75f6d617Schristos               if (startpos < size1 && startpos + range >= size1)
5275*75f6d617Schristos                 lim = range - (size1 - startpos);
5276*75f6d617Schristos 
5277*75f6d617Schristos 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5278*75f6d617Schristos 
5279*75f6d617Schristos               /* Written out as an if-else to avoid testing `translate'
5280*75f6d617Schristos                  inside the loop.  */
5281*75f6d617Schristos 	      if (translate)
5282*75f6d617Schristos                 while (range > lim
5283*75f6d617Schristos                        && !fastmap[(unsigned char)
5284*75f6d617Schristos 				   translate[(unsigned char) *d++]])
5285*75f6d617Schristos                   range--;
5286*75f6d617Schristos 	      else
5287*75f6d617Schristos                 while (range > lim && !fastmap[(unsigned char) *d++])
5288*75f6d617Schristos                   range--;
5289*75f6d617Schristos 
5290*75f6d617Schristos 	      startpos += irange - range;
5291*75f6d617Schristos 	    }
5292*75f6d617Schristos 	  else				/* Searching backwards.  */
5293*75f6d617Schristos 	    {
5294*75f6d617Schristos 	      register CHAR_T c = (size1 == 0 || startpos >= size1
5295*75f6d617Schristos 				      ? string2[startpos - size1]
5296*75f6d617Schristos 				      : string1[startpos]);
5297*75f6d617Schristos 
5298*75f6d617Schristos 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5299*75f6d617Schristos 		goto advance;
5300*75f6d617Schristos 	    }
5301*75f6d617Schristos 	}
5302*75f6d617Schristos 
5303*75f6d617Schristos       /* If can't match the null string, and that's all we have left, fail.  */
5304*75f6d617Schristos       if (range >= 0 && startpos == total_size && fastmap
5305*75f6d617Schristos           && !bufp->can_be_null)
5306*75f6d617Schristos        {
5307*75f6d617Schristos #ifdef WCHAR
5308*75f6d617Schristos          FREE_WCS_BUFFERS ();
5309*75f6d617Schristos #endif
5310*75f6d617Schristos          return -1;
5311*75f6d617Schristos        }
5312*75f6d617Schristos 
5313*75f6d617Schristos #ifdef WCHAR
5314*75f6d617Schristos       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5315*75f6d617Schristos 				     size2, startpos, regs, stop,
5316*75f6d617Schristos 				     wcs_string1, wcs_size1,
5317*75f6d617Schristos 				     wcs_string2, wcs_size2,
5318*75f6d617Schristos 				     mbs_offset1, mbs_offset2);
5319*75f6d617Schristos #else /* BYTE */
5320*75f6d617Schristos       val = byte_re_match_2_internal (bufp, string1, size1, string2,
5321*75f6d617Schristos 				      size2, startpos, regs, stop);
5322*75f6d617Schristos #endif /* BYTE */
5323*75f6d617Schristos 
5324*75f6d617Schristos #ifndef REGEX_MALLOC
5325*75f6d617Schristos # ifdef C_ALLOCA
5326*75f6d617Schristos       alloca (0);
5327*75f6d617Schristos # endif
5328*75f6d617Schristos #endif
5329*75f6d617Schristos 
5330*75f6d617Schristos       if (val >= 0)
5331*75f6d617Schristos 	{
5332*75f6d617Schristos #ifdef WCHAR
5333*75f6d617Schristos 	  FREE_WCS_BUFFERS ();
5334*75f6d617Schristos #endif
5335*75f6d617Schristos 	  return startpos;
5336*75f6d617Schristos 	}
5337*75f6d617Schristos 
5338*75f6d617Schristos       if (val == -2)
5339*75f6d617Schristos 	{
5340*75f6d617Schristos #ifdef WCHAR
5341*75f6d617Schristos 	  FREE_WCS_BUFFERS ();
5342*75f6d617Schristos #endif
5343*75f6d617Schristos 	  return -2;
5344*75f6d617Schristos 	}
5345*75f6d617Schristos 
5346*75f6d617Schristos     advance:
5347*75f6d617Schristos       if (!range)
5348*75f6d617Schristos         break;
5349*75f6d617Schristos       else if (range > 0)
5350*75f6d617Schristos         {
5351*75f6d617Schristos           range--;
5352*75f6d617Schristos           startpos++;
5353*75f6d617Schristos         }
5354*75f6d617Schristos       else
5355*75f6d617Schristos         {
5356*75f6d617Schristos           range++;
5357*75f6d617Schristos           startpos--;
5358*75f6d617Schristos         }
5359*75f6d617Schristos     }
5360*75f6d617Schristos #ifdef WCHAR
5361*75f6d617Schristos   FREE_WCS_BUFFERS ();
5362*75f6d617Schristos #endif
5363*75f6d617Schristos   return -1;
5364*75f6d617Schristos }
5365*75f6d617Schristos 
5366*75f6d617Schristos #ifdef WCHAR
5367*75f6d617Schristos /* This converts PTR, a pointer into one of the search wchar_t strings
5368*75f6d617Schristos    `string1' and `string2' into an multibyte string offset from the
5369*75f6d617Schristos    beginning of that string. We use mbs_offset to optimize.
5370*75f6d617Schristos    See convert_mbs_to_wcs.  */
5371*75f6d617Schristos # define POINTER_TO_OFFSET(ptr)						\
5372*75f6d617Schristos   (FIRST_STRING_P (ptr)							\
5373*75f6d617Schristos    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5374*75f6d617Schristos    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5375*75f6d617Schristos 		 + csize1)))
5376*75f6d617Schristos #else /* BYTE */
5377*75f6d617Schristos /* This converts PTR, a pointer into one of the search strings `string1'
5378*75f6d617Schristos    and `string2' into an offset from the beginning of that string.  */
5379*75f6d617Schristos # define POINTER_TO_OFFSET(ptr)			\
5380*75f6d617Schristos   (FIRST_STRING_P (ptr)				\
5381*75f6d617Schristos    ? ((regoff_t) ((ptr) - string1))		\
5382*75f6d617Schristos    : ((regoff_t) ((ptr) - string2 + size1)))
5383*75f6d617Schristos #endif /* WCHAR */
5384*75f6d617Schristos 
5385*75f6d617Schristos /* Macros for dealing with the split strings in re_match_2.  */
5386*75f6d617Schristos 
5387*75f6d617Schristos #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5388*75f6d617Schristos 
5389*75f6d617Schristos /* Call before fetching a character with *d.  This switches over to
5390*75f6d617Schristos    string2 if necessary.  */
5391*75f6d617Schristos #define PREFETCH()							\
5392*75f6d617Schristos   while (d == dend)						    	\
5393*75f6d617Schristos     {									\
5394*75f6d617Schristos       /* End of string2 => fail.  */					\
5395*75f6d617Schristos       if (dend == end_match_2) 						\
5396*75f6d617Schristos         goto fail;							\
5397*75f6d617Schristos       /* End of string1 => advance to string2.  */ 			\
5398*75f6d617Schristos       d = string2;						        \
5399*75f6d617Schristos       dend = end_match_2;						\
5400*75f6d617Schristos     }
5401*75f6d617Schristos 
5402*75f6d617Schristos /* Test if at very beginning or at very end of the virtual concatenation
5403*75f6d617Schristos    of `string1' and `string2'.  If only one string, it's `string2'.  */
5404*75f6d617Schristos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5405*75f6d617Schristos #define AT_STRINGS_END(d) ((d) == end2)
5406*75f6d617Schristos 
5407*75f6d617Schristos 
5408*75f6d617Schristos /* Test if D points to a character which is word-constituent.  We have
5409*75f6d617Schristos    two special cases to check for: if past the end of string1, look at
5410*75f6d617Schristos    the first character in string2; and if before the beginning of
5411*75f6d617Schristos    string2, look at the last character in string1.  */
5412*75f6d617Schristos #ifdef WCHAR
5413*75f6d617Schristos /* Use internationalized API instead of SYNTAX.  */
5414*75f6d617Schristos # define WORDCHAR_P(d)							\
5415*75f6d617Schristos   (iswalnum ((wint_t)((d) == end1 ? *string2				\
5416*75f6d617Schristos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
5417*75f6d617Schristos    || ((d) == end1 ? *string2						\
5418*75f6d617Schristos        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5419*75f6d617Schristos #else /* BYTE */
5420*75f6d617Schristos # define WORDCHAR_P(d)							\
5421*75f6d617Schristos   (SYNTAX ((d) == end1 ? *string2					\
5422*75f6d617Schristos            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5423*75f6d617Schristos    == Sword)
5424*75f6d617Schristos #endif /* WCHAR */
5425*75f6d617Schristos 
5426*75f6d617Schristos /* Disabled due to a compiler bug -- see comment at case wordbound */
5427*75f6d617Schristos #if 0
5428*75f6d617Schristos /* Test if the character before D and the one at D differ with respect
5429*75f6d617Schristos    to being word-constituent.  */
5430*75f6d617Schristos #define AT_WORD_BOUNDARY(d)						\
5431*75f6d617Schristos   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5432*75f6d617Schristos    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5433*75f6d617Schristos #endif
5434*75f6d617Schristos 
5435*75f6d617Schristos /* Free everything we malloc.  */
5436*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5437*75f6d617Schristos # ifdef WCHAR
5438*75f6d617Schristos #  define FREE_VARIABLES()						\
5439*75f6d617Schristos   do {									\
5440*75f6d617Schristos     REGEX_FREE_STACK (fail_stack.stack);				\
5441*75f6d617Schristos     FREE_VAR (regstart);						\
5442*75f6d617Schristos     FREE_VAR (regend);							\
5443*75f6d617Schristos     FREE_VAR (old_regstart);						\
5444*75f6d617Schristos     FREE_VAR (old_regend);						\
5445*75f6d617Schristos     FREE_VAR (best_regstart);						\
5446*75f6d617Schristos     FREE_VAR (best_regend);						\
5447*75f6d617Schristos     FREE_VAR (reg_info);						\
5448*75f6d617Schristos     FREE_VAR (reg_dummy);						\
5449*75f6d617Schristos     FREE_VAR (reg_info_dummy);						\
5450*75f6d617Schristos     if (!cant_free_wcs_buf)						\
5451*75f6d617Schristos       {									\
5452*75f6d617Schristos         FREE_VAR (string1);						\
5453*75f6d617Schristos         FREE_VAR (string2);						\
5454*75f6d617Schristos         FREE_VAR (mbs_offset1);						\
5455*75f6d617Schristos         FREE_VAR (mbs_offset2);						\
5456*75f6d617Schristos       }									\
5457*75f6d617Schristos   } while (0)
5458*75f6d617Schristos # else /* BYTE */
5459*75f6d617Schristos #  define FREE_VARIABLES()						\
5460*75f6d617Schristos   do {									\
5461*75f6d617Schristos     REGEX_FREE_STACK (fail_stack.stack);				\
5462*75f6d617Schristos     FREE_VAR (regstart);						\
5463*75f6d617Schristos     FREE_VAR (regend);							\
5464*75f6d617Schristos     FREE_VAR (old_regstart);						\
5465*75f6d617Schristos     FREE_VAR (old_regend);						\
5466*75f6d617Schristos     FREE_VAR (best_regstart);						\
5467*75f6d617Schristos     FREE_VAR (best_regend);						\
5468*75f6d617Schristos     FREE_VAR (reg_info);						\
5469*75f6d617Schristos     FREE_VAR (reg_dummy);						\
5470*75f6d617Schristos     FREE_VAR (reg_info_dummy);						\
5471*75f6d617Schristos   } while (0)
5472*75f6d617Schristos # endif /* WCHAR */
5473*75f6d617Schristos #else
5474*75f6d617Schristos # ifdef WCHAR
5475*75f6d617Schristos #  define FREE_VARIABLES()						\
5476*75f6d617Schristos   do {									\
5477*75f6d617Schristos     if (!cant_free_wcs_buf)						\
5478*75f6d617Schristos       {									\
5479*75f6d617Schristos         FREE_VAR (string1);						\
5480*75f6d617Schristos         FREE_VAR (string2);						\
5481*75f6d617Schristos         FREE_VAR (mbs_offset1);						\
5482*75f6d617Schristos         FREE_VAR (mbs_offset2);						\
5483*75f6d617Schristos       }									\
5484*75f6d617Schristos   } while (0)
5485*75f6d617Schristos # else /* BYTE */
5486*75f6d617Schristos #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5487*75f6d617Schristos # endif /* WCHAR */
5488*75f6d617Schristos #endif /* not MATCH_MAY_ALLOCATE */
5489*75f6d617Schristos 
5490*75f6d617Schristos /* These values must meet several constraints.  They must not be valid
5491*75f6d617Schristos    register values; since we have a limit of 255 registers (because
5492*75f6d617Schristos    we use only one byte in the pattern for the register number), we can
5493*75f6d617Schristos    use numbers larger than 255.  They must differ by 1, because of
5494*75f6d617Schristos    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5495*75f6d617Schristos    be larger than the value for the highest register, so we do not try
5496*75f6d617Schristos    to actually save any registers when none are active.  */
5497*75f6d617Schristos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5498*75f6d617Schristos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5499*75f6d617Schristos 
5500*75f6d617Schristos #else /* not INSIDE_RECURSION */
5501*75f6d617Schristos /* Matching routines.  */
5502*75f6d617Schristos 
5503*75f6d617Schristos #ifndef emacs   /* Emacs never uses this.  */
5504*75f6d617Schristos /* re_match is like re_match_2 except it takes only a single string.  */
5505*75f6d617Schristos 
5506*75f6d617Schristos int
5507*75f6d617Schristos re_match (bufp, string, size, pos, regs)
5508*75f6d617Schristos      struct re_pattern_buffer *bufp;
5509*75f6d617Schristos      const char *string;
5510*75f6d617Schristos      int size, pos;
5511*75f6d617Schristos      struct re_registers *regs;
5512*75f6d617Schristos {
5513*75f6d617Schristos   int result;
5514*75f6d617Schristos # ifdef MBS_SUPPORT
5515*75f6d617Schristos   if (MB_CUR_MAX != 1)
5516*75f6d617Schristos     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5517*75f6d617Schristos 				      pos, regs, size,
5518*75f6d617Schristos 				      NULL, 0, NULL, 0, NULL, NULL);
5519*75f6d617Schristos   else
5520*75f6d617Schristos # endif
5521*75f6d617Schristos     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5522*75f6d617Schristos 				  pos, regs, size);
5523*75f6d617Schristos # ifndef REGEX_MALLOC
5524*75f6d617Schristos #  ifdef C_ALLOCA
5525*75f6d617Schristos   alloca (0);
5526*75f6d617Schristos #  endif
5527*75f6d617Schristos # endif
5528*75f6d617Schristos   return result;
5529*75f6d617Schristos }
5530*75f6d617Schristos # ifdef _LIBC
5531*75f6d617Schristos weak_alias (__re_match, re_match)
5532*75f6d617Schristos # endif
5533*75f6d617Schristos #endif /* not emacs */
5534*75f6d617Schristos 
5535*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5536*75f6d617Schristos 
5537*75f6d617Schristos #ifdef INSIDE_RECURSION
5538*75f6d617Schristos static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5539*75f6d617Schristos 						    UCHAR_T *end,
5540*75f6d617Schristos 					PREFIX(register_info_type) *reg_info));
5541*75f6d617Schristos static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
5542*75f6d617Schristos 						  UCHAR_T *end,
5543*75f6d617Schristos 					PREFIX(register_info_type) *reg_info));
5544*75f6d617Schristos static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5545*75f6d617Schristos 							UCHAR_T *end,
5546*75f6d617Schristos 					PREFIX(register_info_type) *reg_info));
5547*75f6d617Schristos static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
5548*75f6d617Schristos 				     int len, char *translate));
5549*75f6d617Schristos #else /* not INSIDE_RECURSION */
5550*75f6d617Schristos 
5551*75f6d617Schristos /* re_match_2 matches the compiled pattern in BUFP against the
5552*75f6d617Schristos    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5553*75f6d617Schristos    and SIZE2, respectively).  We start matching at POS, and stop
5554*75f6d617Schristos    matching at STOP.
5555*75f6d617Schristos 
5556*75f6d617Schristos    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5557*75f6d617Schristos    store offsets for the substring each group matched in REGS.  See the
5558*75f6d617Schristos    documentation for exactly how many groups we fill.
5559*75f6d617Schristos 
5560*75f6d617Schristos    We return -1 if no match, -2 if an internal error (such as the
5561*75f6d617Schristos    failure stack overflowing).  Otherwise, we return the length of the
5562*75f6d617Schristos    matched substring.  */
5563*75f6d617Schristos 
5564*75f6d617Schristos int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)5565*75f6d617Schristos re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5566*75f6d617Schristos      struct re_pattern_buffer *bufp;
5567*75f6d617Schristos      const char *string1, *string2;
5568*75f6d617Schristos      int size1, size2;
5569*75f6d617Schristos      int pos;
5570*75f6d617Schristos      struct re_registers *regs;
5571*75f6d617Schristos      int stop;
5572*75f6d617Schristos {
5573*75f6d617Schristos   int result;
5574*75f6d617Schristos # ifdef MBS_SUPPORT
5575*75f6d617Schristos   if (MB_CUR_MAX != 1)
5576*75f6d617Schristos     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5577*75f6d617Schristos 				      pos, regs, stop,
5578*75f6d617Schristos 				      NULL, 0, NULL, 0, NULL, NULL);
5579*75f6d617Schristos   else
5580*75f6d617Schristos # endif
5581*75f6d617Schristos     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5582*75f6d617Schristos 				  pos, regs, stop);
5583*75f6d617Schristos 
5584*75f6d617Schristos #ifndef REGEX_MALLOC
5585*75f6d617Schristos # ifdef C_ALLOCA
5586*75f6d617Schristos   alloca (0);
5587*75f6d617Schristos # endif
5588*75f6d617Schristos #endif
5589*75f6d617Schristos   return result;
5590*75f6d617Schristos }
5591*75f6d617Schristos #ifdef _LIBC
5592*75f6d617Schristos weak_alias (__re_match_2, re_match_2)
5593*75f6d617Schristos #endif
5594*75f6d617Schristos 
5595*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5596*75f6d617Schristos 
5597*75f6d617Schristos #ifdef INSIDE_RECURSION
5598*75f6d617Schristos 
5599*75f6d617Schristos #ifdef WCHAR
5600*75f6d617Schristos static int count_mbs_length PARAMS ((int *, int));
5601*75f6d617Schristos 
5602*75f6d617Schristos /* This check the substring (from 0, to length) of the multibyte string,
5603*75f6d617Schristos    to which offset_buffer correspond. And count how many wchar_t_characters
5604*75f6d617Schristos    the substring occupy. We use offset_buffer to optimization.
5605*75f6d617Schristos    See convert_mbs_to_wcs.  */
5606*75f6d617Schristos 
5607*75f6d617Schristos static int
count_mbs_length(offset_buffer,length)5608*75f6d617Schristos count_mbs_length(offset_buffer, length)
5609*75f6d617Schristos      int *offset_buffer;
5610*75f6d617Schristos      int length;
5611*75f6d617Schristos {
5612*75f6d617Schristos   int upper, lower;
5613*75f6d617Schristos 
5614*75f6d617Schristos   /* Check whether the size is valid.  */
5615*75f6d617Schristos   if (length < 0)
5616*75f6d617Schristos     return -1;
5617*75f6d617Schristos 
5618*75f6d617Schristos   if (offset_buffer == NULL)
5619*75f6d617Schristos     return 0;
5620*75f6d617Schristos 
5621*75f6d617Schristos   /* If there are no multibyte character, offset_buffer[i] == i.
5622*75f6d617Schristos    Optmize for this case.  */
5623*75f6d617Schristos   if (offset_buffer[length] == length)
5624*75f6d617Schristos     return length;
5625*75f6d617Schristos 
5626*75f6d617Schristos   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
5627*75f6d617Schristos   upper = length;
5628*75f6d617Schristos   lower = 0;
5629*75f6d617Schristos 
5630*75f6d617Schristos   while (true)
5631*75f6d617Schristos     {
5632*75f6d617Schristos       int middle = (lower + upper) / 2;
5633*75f6d617Schristos       if (middle == lower || middle == upper)
5634*75f6d617Schristos 	break;
5635*75f6d617Schristos       if (offset_buffer[middle] > length)
5636*75f6d617Schristos 	upper = middle;
5637*75f6d617Schristos       else if (offset_buffer[middle] < length)
5638*75f6d617Schristos 	lower = middle;
5639*75f6d617Schristos       else
5640*75f6d617Schristos 	return middle;
5641*75f6d617Schristos     }
5642*75f6d617Schristos 
5643*75f6d617Schristos   return -1;
5644*75f6d617Schristos }
5645*75f6d617Schristos #endif /* WCHAR */
5646*75f6d617Schristos 
5647*75f6d617Schristos /* This is a separate function so that we can force an alloca cleanup
5648*75f6d617Schristos    afterwards.  */
5649*75f6d617Schristos #ifdef WCHAR
5650*75f6d617Schristos static int
wcs_re_match_2_internal(bufp,cstring1,csize1,cstring2,csize2,pos,regs,stop,string1,size1,string2,size2,mbs_offset1,mbs_offset2)5651*75f6d617Schristos wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
5652*75f6d617Schristos 			 regs, stop, string1, size1, string2, size2,
5653*75f6d617Schristos 			 mbs_offset1, mbs_offset2)
5654*75f6d617Schristos      struct re_pattern_buffer *bufp;
5655*75f6d617Schristos      const char *cstring1, *cstring2;
5656*75f6d617Schristos      int csize1, csize2;
5657*75f6d617Schristos      int pos;
5658*75f6d617Schristos      struct re_registers *regs;
5659*75f6d617Schristos      int stop;
5660*75f6d617Schristos      /* string1 == string2 == NULL means string1/2, size1/2 and
5661*75f6d617Schristos 	mbs_offset1/2 need seting up in this function.  */
5662*75f6d617Schristos      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5663*75f6d617Schristos      wchar_t *string1, *string2;
5664*75f6d617Schristos      /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5665*75f6d617Schristos      int size1, size2;
5666*75f6d617Schristos      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5667*75f6d617Schristos      int *mbs_offset1, *mbs_offset2;
5668*75f6d617Schristos #else /* BYTE */
5669*75f6d617Schristos static int
5670*75f6d617Schristos byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
5671*75f6d617Schristos 			  regs, stop)
5672*75f6d617Schristos      struct re_pattern_buffer *bufp;
5673*75f6d617Schristos      const char *string1, *string2;
5674*75f6d617Schristos      int size1, size2;
5675*75f6d617Schristos      int pos;
5676*75f6d617Schristos      struct re_registers *regs;
5677*75f6d617Schristos      int stop;
5678*75f6d617Schristos #endif /* BYTE */
5679*75f6d617Schristos {
5680*75f6d617Schristos   /* General temporaries.  */
5681*75f6d617Schristos   int mcnt;
5682*75f6d617Schristos   UCHAR_T *p1;
5683*75f6d617Schristos #ifdef WCHAR
5684*75f6d617Schristos   /* They hold whether each wchar_t is binary data or not.  */
5685*75f6d617Schristos   char *is_binary = NULL;
5686*75f6d617Schristos   /* If true, we can't free string1/2, mbs_offset1/2.  */
5687*75f6d617Schristos   int cant_free_wcs_buf = 1;
5688*75f6d617Schristos #endif /* WCHAR */
5689*75f6d617Schristos 
5690*75f6d617Schristos   /* Just past the end of the corresponding string.  */
5691*75f6d617Schristos   const CHAR_T *end1, *end2;
5692*75f6d617Schristos 
5693*75f6d617Schristos   /* Pointers into string1 and string2, just past the last characters in
5694*75f6d617Schristos      each to consider matching.  */
5695*75f6d617Schristos   const CHAR_T *end_match_1, *end_match_2;
5696*75f6d617Schristos 
5697*75f6d617Schristos   /* Where we are in the data, and the end of the current string.  */
5698*75f6d617Schristos   const CHAR_T *d, *dend;
5699*75f6d617Schristos 
5700*75f6d617Schristos   /* Where we are in the pattern, and the end of the pattern.  */
5701*75f6d617Schristos #ifdef WCHAR
5702*75f6d617Schristos   UCHAR_T *pattern, *p;
5703*75f6d617Schristos   register UCHAR_T *pend;
5704*75f6d617Schristos #else /* BYTE */
5705*75f6d617Schristos   UCHAR_T *p = bufp->buffer;
5706*75f6d617Schristos   register UCHAR_T *pend = p + bufp->used;
5707*75f6d617Schristos #endif /* WCHAR */
5708*75f6d617Schristos 
5709*75f6d617Schristos   /* Mark the opcode just after a start_memory, so we can test for an
5710*75f6d617Schristos      empty subpattern when we get to the stop_memory.  */
5711*75f6d617Schristos   UCHAR_T *just_past_start_mem = 0;
5712*75f6d617Schristos 
5713*75f6d617Schristos   /* We use this to map every character in the string.  */
5714*75f6d617Schristos   RE_TRANSLATE_TYPE translate = bufp->translate;
5715*75f6d617Schristos 
5716*75f6d617Schristos   /* Failure point stack.  Each place that can handle a failure further
5717*75f6d617Schristos      down the line pushes a failure point on this stack.  It consists of
5718*75f6d617Schristos      restart, regend, and reg_info for all registers corresponding to
5719*75f6d617Schristos      the subexpressions we're currently inside, plus the number of such
5720*75f6d617Schristos      registers, and, finally, two char *'s.  The first char * is where
5721*75f6d617Schristos      to resume scanning the pattern; the second one is where to resume
5722*75f6d617Schristos      scanning the strings.  If the latter is zero, the failure point is
5723*75f6d617Schristos      a ``dummy''; if a failure happens and the failure point is a dummy,
5724*75f6d617Schristos      it gets discarded and the next next one is tried.  */
5725*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5726*75f6d617Schristos   PREFIX(fail_stack_type) fail_stack;
5727*75f6d617Schristos #endif
5728*75f6d617Schristos #ifdef DEBUG
5729*75f6d617Schristos   static unsigned failure_id;
5730*75f6d617Schristos   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5731*75f6d617Schristos #endif
5732*75f6d617Schristos 
5733*75f6d617Schristos #ifdef REL_ALLOC
5734*75f6d617Schristos   /* This holds the pointer to the failure stack, when
5735*75f6d617Schristos      it is allocated relocatably.  */
5736*75f6d617Schristos   fail_stack_elt_t *failure_stack_ptr;
5737*75f6d617Schristos #endif
5738*75f6d617Schristos 
5739*75f6d617Schristos   /* We fill all the registers internally, independent of what we
5740*75f6d617Schristos      return, for use in backreferences.  The number here includes
5741*75f6d617Schristos      an element for register zero.  */
5742*75f6d617Schristos   size_t num_regs = bufp->re_nsub + 1;
5743*75f6d617Schristos 
5744*75f6d617Schristos   /* The currently active registers.  */
5745*75f6d617Schristos   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5746*75f6d617Schristos   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5747*75f6d617Schristos 
5748*75f6d617Schristos   /* Information on the contents of registers. These are pointers into
5749*75f6d617Schristos      the input strings; they record just what was matched (on this
5750*75f6d617Schristos      attempt) by a subexpression part of the pattern, that is, the
5751*75f6d617Schristos      regnum-th regstart pointer points to where in the pattern we began
5752*75f6d617Schristos      matching and the regnum-th regend points to right after where we
5753*75f6d617Schristos      stopped matching the regnum-th subexpression.  (The zeroth register
5754*75f6d617Schristos      keeps track of what the whole pattern matches.)  */
5755*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5756*75f6d617Schristos   const CHAR_T **regstart, **regend;
5757*75f6d617Schristos #endif
5758*75f6d617Schristos 
5759*75f6d617Schristos   /* If a group that's operated upon by a repetition operator fails to
5760*75f6d617Schristos      match anything, then the register for its start will need to be
5761*75f6d617Schristos      restored because it will have been set to wherever in the string we
5762*75f6d617Schristos      are when we last see its open-group operator.  Similarly for a
5763*75f6d617Schristos      register's end.  */
5764*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5765*75f6d617Schristos   const CHAR_T **old_regstart, **old_regend;
5766*75f6d617Schristos #endif
5767*75f6d617Schristos 
5768*75f6d617Schristos   /* The is_active field of reg_info helps us keep track of which (possibly
5769*75f6d617Schristos      nested) subexpressions we are currently in. The matched_something
5770*75f6d617Schristos      field of reg_info[reg_num] helps us tell whether or not we have
5771*75f6d617Schristos      matched any of the pattern so far this time through the reg_num-th
5772*75f6d617Schristos      subexpression.  These two fields get reset each time through any
5773*75f6d617Schristos      loop their register is in.  */
5774*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5775*75f6d617Schristos   PREFIX(register_info_type) *reg_info;
5776*75f6d617Schristos #endif
5777*75f6d617Schristos 
5778*75f6d617Schristos   /* The following record the register info as found in the above
5779*75f6d617Schristos      variables when we find a match better than any we've seen before.
5780*75f6d617Schristos      This happens as we backtrack through the failure points, which in
5781*75f6d617Schristos      turn happens only if we have not yet matched the entire string. */
5782*75f6d617Schristos   unsigned best_regs_set = false;
5783*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5784*75f6d617Schristos   const CHAR_T **best_regstart, **best_regend;
5785*75f6d617Schristos #endif
5786*75f6d617Schristos 
5787*75f6d617Schristos   /* Logically, this is `best_regend[0]'.  But we don't want to have to
5788*75f6d617Schristos      allocate space for that if we're not allocating space for anything
5789*75f6d617Schristos      else (see below).  Also, we never need info about register 0 for
5790*75f6d617Schristos      any of the other register vectors, and it seems rather a kludge to
5791*75f6d617Schristos      treat `best_regend' differently than the rest.  So we keep track of
5792*75f6d617Schristos      the end of the best match so far in a separate variable.  We
5793*75f6d617Schristos      initialize this to NULL so that when we backtrack the first time
5794*75f6d617Schristos      and need to test it, it's not garbage.  */
5795*75f6d617Schristos   const CHAR_T *match_end = NULL;
5796*75f6d617Schristos 
5797*75f6d617Schristos   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5798*75f6d617Schristos   int set_regs_matched_done = 0;
5799*75f6d617Schristos 
5800*75f6d617Schristos   /* Used when we pop values we don't care about.  */
5801*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5802*75f6d617Schristos   const CHAR_T **reg_dummy;
5803*75f6d617Schristos   PREFIX(register_info_type) *reg_info_dummy;
5804*75f6d617Schristos #endif
5805*75f6d617Schristos 
5806*75f6d617Schristos #ifdef DEBUG
5807*75f6d617Schristos   /* Counts the total number of registers pushed.  */
5808*75f6d617Schristos   unsigned num_regs_pushed = 0;
5809*75f6d617Schristos #endif
5810*75f6d617Schristos 
5811*75f6d617Schristos   /* Definitions for state transitions.  More efficiently for gcc.  */
5812*75f6d617Schristos #ifdef __GNUC__
5813*75f6d617Schristos # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
5814*75f6d617Schristos #  define NEXT \
5815*75f6d617Schristos       do								      \
5816*75f6d617Schristos 	{								      \
5817*75f6d617Schristos 	  int offset;							      \
5818*75f6d617Schristos 	  const void *__unbounded ptr;					      \
5819*75f6d617Schristos 	  offset = (p == pend						      \
5820*75f6d617Schristos 		    ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);   \
5821*75f6d617Schristos 	  ptr = &&end_of_pattern + offset;				      \
5822*75f6d617Schristos 	  goto *ptr;							      \
5823*75f6d617Schristos 	}								      \
5824*75f6d617Schristos       while (0)
5825*75f6d617Schristos #  define REF(x) \
5826*75f6d617Schristos   &&label_##x - &&end_of_pattern
5827*75f6d617Schristos #  define JUMP_TABLE_TYPE const int
5828*75f6d617Schristos # else
5829*75f6d617Schristos #  define NEXT \
5830*75f6d617Schristos       do								      \
5831*75f6d617Schristos 	{								      \
5832*75f6d617Schristos 	  const void *__unbounded ptr;					      \
5833*75f6d617Schristos 	  ptr = (p == pend ? &&end_of_pattern				      \
5834*75f6d617Schristos 		 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);	      \
5835*75f6d617Schristos 	  goto *ptr;							      \
5836*75f6d617Schristos 	}								      \
5837*75f6d617Schristos       while (0)
5838*75f6d617Schristos #  define REF(x) \
5839*75f6d617Schristos   &&label_##x
5840*75f6d617Schristos #  define JUMP_TABLE_TYPE const void *const
5841*75f6d617Schristos # endif
5842*75f6d617Schristos # define CASE(x) label_##x
5843*75f6d617Schristos   static JUMP_TABLE_TYPE jmptable[] =
5844*75f6d617Schristos     {
5845*75f6d617Schristos     REF (no_op),
5846*75f6d617Schristos     REF (succeed),
5847*75f6d617Schristos     REF (exactn),
5848*75f6d617Schristos # ifdef MBS_SUPPORT
5849*75f6d617Schristos     REF (exactn_bin),
5850*75f6d617Schristos # endif
5851*75f6d617Schristos     REF (anychar),
5852*75f6d617Schristos     REF (charset),
5853*75f6d617Schristos     REF (charset_not),
5854*75f6d617Schristos     REF (start_memory),
5855*75f6d617Schristos     REF (stop_memory),
5856*75f6d617Schristos     REF (duplicate),
5857*75f6d617Schristos     REF (begline),
5858*75f6d617Schristos     REF (endline),
5859*75f6d617Schristos     REF (begbuf),
5860*75f6d617Schristos     REF (endbuf),
5861*75f6d617Schristos     REF (jump),
5862*75f6d617Schristos     REF (jump_past_alt),
5863*75f6d617Schristos     REF (on_failure_jump),
5864*75f6d617Schristos     REF (on_failure_keep_string_jump),
5865*75f6d617Schristos     REF (pop_failure_jump),
5866*75f6d617Schristos     REF (maybe_pop_jump),
5867*75f6d617Schristos     REF (dummy_failure_jump),
5868*75f6d617Schristos     REF (push_dummy_failure),
5869*75f6d617Schristos     REF (succeed_n),
5870*75f6d617Schristos     REF (jump_n),
5871*75f6d617Schristos     REF (set_number_at),
5872*75f6d617Schristos     REF (wordchar),
5873*75f6d617Schristos     REF (notwordchar),
5874*75f6d617Schristos     REF (wordbeg),
5875*75f6d617Schristos     REF (wordend),
5876*75f6d617Schristos     REF (wordbound),
5877*75f6d617Schristos     REF (notwordbound)
5878*75f6d617Schristos # ifdef emacs
5879*75f6d617Schristos     ,REF (before_dot),
5880*75f6d617Schristos     REF (at_dot),
5881*75f6d617Schristos     REF (after_dot),
5882*75f6d617Schristos     REF (syntaxspec),
5883*75f6d617Schristos     REF (notsyntaxspec)
5884*75f6d617Schristos # endif
5885*75f6d617Schristos     };
5886*75f6d617Schristos #else
5887*75f6d617Schristos # define NEXT \
5888*75f6d617Schristos   break
5889*75f6d617Schristos # define CASE(x) \
5890*75f6d617Schristos   case x
5891*75f6d617Schristos #endif
5892*75f6d617Schristos 
5893*75f6d617Schristos   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5894*75f6d617Schristos 
5895*75f6d617Schristos   INIT_FAIL_STACK ();
5896*75f6d617Schristos 
5897*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5898*75f6d617Schristos   /* Do not bother to initialize all the register variables if there are
5899*75f6d617Schristos      no groups in the pattern, as it takes a fair amount of time.  If
5900*75f6d617Schristos      there are groups, we include space for register 0 (the whole
5901*75f6d617Schristos      pattern), even though we never use it, since it simplifies the
5902*75f6d617Schristos      array indexing.  We should fix this.  */
5903*75f6d617Schristos   if (bufp->re_nsub)
5904*75f6d617Schristos     {
5905*75f6d617Schristos       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5906*75f6d617Schristos       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5907*75f6d617Schristos       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5908*75f6d617Schristos       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5909*75f6d617Schristos       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5910*75f6d617Schristos       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5911*75f6d617Schristos       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5912*75f6d617Schristos       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5913*75f6d617Schristos       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5914*75f6d617Schristos 
5915*75f6d617Schristos       if (!(regstart && regend && old_regstart && old_regend && reg_info
5916*75f6d617Schristos             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5917*75f6d617Schristos         {
5918*75f6d617Schristos           FREE_VARIABLES ();
5919*75f6d617Schristos           return -2;
5920*75f6d617Schristos         }
5921*75f6d617Schristos     }
5922*75f6d617Schristos   else
5923*75f6d617Schristos     {
5924*75f6d617Schristos       /* We must initialize all our variables to NULL, so that
5925*75f6d617Schristos          `FREE_VARIABLES' doesn't try to free them.  */
5926*75f6d617Schristos       regstart = regend = old_regstart = old_regend = best_regstart
5927*75f6d617Schristos         = best_regend = reg_dummy = NULL;
5928*75f6d617Schristos       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5929*75f6d617Schristos     }
5930*75f6d617Schristos #endif /* MATCH_MAY_ALLOCATE */
5931*75f6d617Schristos 
5932*75f6d617Schristos   /* The starting position is bogus.  */
5933*75f6d617Schristos #ifdef WCHAR
5934*75f6d617Schristos   if (pos < 0 || pos > csize1 + csize2)
5935*75f6d617Schristos #else /* BYTE */
5936*75f6d617Schristos   if (pos < 0 || pos > size1 + size2)
5937*75f6d617Schristos #endif
5938*75f6d617Schristos     {
5939*75f6d617Schristos       FREE_VARIABLES ();
5940*75f6d617Schristos       return -1;
5941*75f6d617Schristos     }
5942*75f6d617Schristos 
5943*75f6d617Schristos #ifdef WCHAR
5944*75f6d617Schristos   /* Allocate wchar_t array for string1 and string2 and
5945*75f6d617Schristos      fill them with converted string.  */
5946*75f6d617Schristos   if (string1 == NULL && string2 == NULL)
5947*75f6d617Schristos     {
5948*75f6d617Schristos       /* We need seting up buffers here.  */
5949*75f6d617Schristos 
5950*75f6d617Schristos       /* We must free wcs buffers in this function.  */
5951*75f6d617Schristos       cant_free_wcs_buf = 0;
5952*75f6d617Schristos 
5953*75f6d617Schristos       if (csize1 != 0)
5954*75f6d617Schristos 	{
5955*75f6d617Schristos 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5956*75f6d617Schristos 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5957*75f6d617Schristos 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
5958*75f6d617Schristos 	  if (!string1 || !mbs_offset1 || !is_binary)
5959*75f6d617Schristos 	    {
5960*75f6d617Schristos 	      FREE_VAR (string1);
5961*75f6d617Schristos 	      FREE_VAR (mbs_offset1);
5962*75f6d617Schristos 	      FREE_VAR (is_binary);
5963*75f6d617Schristos 	      return -2;
5964*75f6d617Schristos 	    }
5965*75f6d617Schristos 	}
5966*75f6d617Schristos       if (csize2 != 0)
5967*75f6d617Schristos 	{
5968*75f6d617Schristos 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5969*75f6d617Schristos 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5970*75f6d617Schristos 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
5971*75f6d617Schristos 	  if (!string2 || !mbs_offset2 || !is_binary)
5972*75f6d617Schristos 	    {
5973*75f6d617Schristos 	      FREE_VAR (string1);
5974*75f6d617Schristos 	      FREE_VAR (mbs_offset1);
5975*75f6d617Schristos 	      FREE_VAR (string2);
5976*75f6d617Schristos 	      FREE_VAR (mbs_offset2);
5977*75f6d617Schristos 	      FREE_VAR (is_binary);
5978*75f6d617Schristos 	      return -2;
5979*75f6d617Schristos 	    }
5980*75f6d617Schristos 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5981*75f6d617Schristos 				     mbs_offset2, is_binary);
5982*75f6d617Schristos 	  string2[size2] = L'\0'; /* for a sentinel  */
5983*75f6d617Schristos 	  FREE_VAR (is_binary);
5984*75f6d617Schristos 	}
5985*75f6d617Schristos     }
5986*75f6d617Schristos 
5987*75f6d617Schristos   /* We need to cast pattern to (wchar_t*), because we casted this compiled
5988*75f6d617Schristos      pattern to (char*) in regex_compile.  */
5989*75f6d617Schristos   p = pattern = (CHAR_T*)bufp->buffer;
5990*75f6d617Schristos   pend = (CHAR_T*)(bufp->buffer + bufp->used);
5991*75f6d617Schristos 
5992*75f6d617Schristos #endif /* WCHAR */
5993*75f6d617Schristos 
5994*75f6d617Schristos   /* Initialize subexpression text positions to -1 to mark ones that no
5995*75f6d617Schristos      start_memory/stop_memory has been seen for. Also initialize the
5996*75f6d617Schristos      register information struct.  */
5997*75f6d617Schristos   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5998*75f6d617Schristos     {
5999*75f6d617Schristos       regstart[mcnt] = regend[mcnt]
6000*75f6d617Schristos         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
6001*75f6d617Schristos 
6002*75f6d617Schristos       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
6003*75f6d617Schristos       IS_ACTIVE (reg_info[mcnt]) = 0;
6004*75f6d617Schristos       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6005*75f6d617Schristos       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6006*75f6d617Schristos     }
6007*75f6d617Schristos 
6008*75f6d617Schristos   /* We move `string1' into `string2' if the latter's empty -- but not if
6009*75f6d617Schristos      `string1' is null.  */
6010*75f6d617Schristos   if (size2 == 0 && string1 != NULL)
6011*75f6d617Schristos     {
6012*75f6d617Schristos       string2 = string1;
6013*75f6d617Schristos       size2 = size1;
6014*75f6d617Schristos       string1 = 0;
6015*75f6d617Schristos       size1 = 0;
6016*75f6d617Schristos #ifdef WCHAR
6017*75f6d617Schristos       mbs_offset2 = mbs_offset1;
6018*75f6d617Schristos       csize2 = csize1;
6019*75f6d617Schristos       mbs_offset1 = NULL;
6020*75f6d617Schristos       csize1 = 0;
6021*75f6d617Schristos #endif
6022*75f6d617Schristos     }
6023*75f6d617Schristos   end1 = string1 + size1;
6024*75f6d617Schristos   end2 = string2 + size2;
6025*75f6d617Schristos 
6026*75f6d617Schristos   /* Compute where to stop matching, within the two strings.  */
6027*75f6d617Schristos #ifdef WCHAR
6028*75f6d617Schristos   if (stop <= csize1)
6029*75f6d617Schristos     {
6030*75f6d617Schristos       mcnt = count_mbs_length(mbs_offset1, stop);
6031*75f6d617Schristos       end_match_1 = string1 + mcnt;
6032*75f6d617Schristos       end_match_2 = string2;
6033*75f6d617Schristos     }
6034*75f6d617Schristos   else
6035*75f6d617Schristos     {
6036*75f6d617Schristos       if (stop > csize1 + csize2)
6037*75f6d617Schristos 	stop = csize1 + csize2;
6038*75f6d617Schristos       end_match_1 = end1;
6039*75f6d617Schristos       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
6040*75f6d617Schristos       end_match_2 = string2 + mcnt;
6041*75f6d617Schristos     }
6042*75f6d617Schristos   if (mcnt < 0)
6043*75f6d617Schristos     { /* count_mbs_length return error.  */
6044*75f6d617Schristos       FREE_VARIABLES ();
6045*75f6d617Schristos       return -1;
6046*75f6d617Schristos     }
6047*75f6d617Schristos #else
6048*75f6d617Schristos   if (stop <= size1)
6049*75f6d617Schristos     {
6050*75f6d617Schristos       end_match_1 = string1 + stop;
6051*75f6d617Schristos       end_match_2 = string2;
6052*75f6d617Schristos     }
6053*75f6d617Schristos   else
6054*75f6d617Schristos     {
6055*75f6d617Schristos       end_match_1 = end1;
6056*75f6d617Schristos       end_match_2 = string2 + stop - size1;
6057*75f6d617Schristos     }
6058*75f6d617Schristos #endif /* WCHAR */
6059*75f6d617Schristos 
6060*75f6d617Schristos   /* `p' scans through the pattern as `d' scans through the data.
6061*75f6d617Schristos      `dend' is the end of the input string that `d' points within.  `d'
6062*75f6d617Schristos      is advanced into the following input string whenever necessary, but
6063*75f6d617Schristos      this happens before fetching; therefore, at the beginning of the
6064*75f6d617Schristos      loop, `d' can be pointing at the end of a string, but it cannot
6065*75f6d617Schristos      equal `string2'.  */
6066*75f6d617Schristos #ifdef WCHAR
6067*75f6d617Schristos   if (size1 > 0 && pos <= csize1)
6068*75f6d617Schristos     {
6069*75f6d617Schristos       mcnt = count_mbs_length(mbs_offset1, pos);
6070*75f6d617Schristos       d = string1 + mcnt;
6071*75f6d617Schristos       dend = end_match_1;
6072*75f6d617Schristos     }
6073*75f6d617Schristos   else
6074*75f6d617Schristos     {
6075*75f6d617Schristos       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
6076*75f6d617Schristos       d = string2 + mcnt;
6077*75f6d617Schristos       dend = end_match_2;
6078*75f6d617Schristos     }
6079*75f6d617Schristos 
6080*75f6d617Schristos   if (mcnt < 0)
6081*75f6d617Schristos     { /* count_mbs_length return error.  */
6082*75f6d617Schristos       FREE_VARIABLES ();
6083*75f6d617Schristos       return -1;
6084*75f6d617Schristos     }
6085*75f6d617Schristos #else
6086*75f6d617Schristos   if (size1 > 0 && pos <= size1)
6087*75f6d617Schristos     {
6088*75f6d617Schristos       d = string1 + pos;
6089*75f6d617Schristos       dend = end_match_1;
6090*75f6d617Schristos     }
6091*75f6d617Schristos   else
6092*75f6d617Schristos     {
6093*75f6d617Schristos       d = string2 + pos - size1;
6094*75f6d617Schristos       dend = end_match_2;
6095*75f6d617Schristos     }
6096*75f6d617Schristos #endif /* WCHAR */
6097*75f6d617Schristos 
6098*75f6d617Schristos   DEBUG_PRINT1 ("The compiled pattern is:\n");
6099*75f6d617Schristos   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
6100*75f6d617Schristos   DEBUG_PRINT1 ("The string to match is: `");
6101*75f6d617Schristos   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
6102*75f6d617Schristos   DEBUG_PRINT1 ("'\n");
6103*75f6d617Schristos 
6104*75f6d617Schristos   /* This loops over pattern commands.  It exits by returning from the
6105*75f6d617Schristos      function if the match is complete, or it drops through if the match
6106*75f6d617Schristos      fails at this starting point in the input data.  */
6107*75f6d617Schristos   for (;;)
6108*75f6d617Schristos     {
6109*75f6d617Schristos #ifdef _LIBC
6110*75f6d617Schristos       DEBUG_PRINT2 ("\n%p: ", p);
6111*75f6d617Schristos #else
6112*75f6d617Schristos       DEBUG_PRINT2 ("\n0x%x: ", p);
6113*75f6d617Schristos #endif
6114*75f6d617Schristos 
6115*75f6d617Schristos #ifdef __GNUC__
6116*75f6d617Schristos       NEXT;
6117*75f6d617Schristos #else
6118*75f6d617Schristos       if (p == pend)
6119*75f6d617Schristos #endif
6120*75f6d617Schristos 	{
6121*75f6d617Schristos #ifdef __GNUC__
6122*75f6d617Schristos 	end_of_pattern:
6123*75f6d617Schristos #endif
6124*75f6d617Schristos 	  /* End of pattern means we might have succeeded.  */
6125*75f6d617Schristos 	  DEBUG_PRINT1 ("end of pattern ... ");
6126*75f6d617Schristos 
6127*75f6d617Schristos 	  /* If we haven't matched the entire string, and we want the
6128*75f6d617Schristos 	     longest match, try backtracking.  */
6129*75f6d617Schristos 	  if (d != end_match_2)
6130*75f6d617Schristos 	    {
6131*75f6d617Schristos 	      /* 1 if this match ends in the same string (string1 or string2)
6132*75f6d617Schristos 		 as the best previous match.  */
6133*75f6d617Schristos 	      boolean same_str_p = (FIRST_STRING_P (match_end)
6134*75f6d617Schristos 				    == MATCHING_IN_FIRST_STRING);
6135*75f6d617Schristos 	      /* 1 if this match is the best seen so far.  */
6136*75f6d617Schristos 	      boolean best_match_p;
6137*75f6d617Schristos 
6138*75f6d617Schristos 	      /* AIX compiler got confused when this was combined
6139*75f6d617Schristos 		 with the previous declaration.  */
6140*75f6d617Schristos 	      if (same_str_p)
6141*75f6d617Schristos 		best_match_p = d > match_end;
6142*75f6d617Schristos 	      else
6143*75f6d617Schristos 		best_match_p = !MATCHING_IN_FIRST_STRING;
6144*75f6d617Schristos 
6145*75f6d617Schristos 	      DEBUG_PRINT1 ("backtracking.\n");
6146*75f6d617Schristos 
6147*75f6d617Schristos 	      if (!FAIL_STACK_EMPTY ())
6148*75f6d617Schristos 		{ /* More failure points to try.  */
6149*75f6d617Schristos 
6150*75f6d617Schristos 		  /* If exceeds best match so far, save it.  */
6151*75f6d617Schristos 		  if (!best_regs_set || best_match_p)
6152*75f6d617Schristos 		    {
6153*75f6d617Schristos 		      best_regs_set = true;
6154*75f6d617Schristos 		      match_end = d;
6155*75f6d617Schristos 
6156*75f6d617Schristos 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
6157*75f6d617Schristos 
6158*75f6d617Schristos 		      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6159*75f6d617Schristos 			{
6160*75f6d617Schristos 			  best_regstart[mcnt] = regstart[mcnt];
6161*75f6d617Schristos 			  best_regend[mcnt] = regend[mcnt];
6162*75f6d617Schristos 			}
6163*75f6d617Schristos 		    }
6164*75f6d617Schristos 		  goto fail;
6165*75f6d617Schristos 		}
6166*75f6d617Schristos 
6167*75f6d617Schristos 	      /* If no failure points, don't restore garbage.  And if
6168*75f6d617Schristos 		 last match is real best match, don't restore second
6169*75f6d617Schristos 		 best one. */
6170*75f6d617Schristos 	      else if (best_regs_set && !best_match_p)
6171*75f6d617Schristos 		{
6172*75f6d617Schristos 		restore_best_regs:
6173*75f6d617Schristos 		  /* Restore best match.  It may happen that `dend ==
6174*75f6d617Schristos 		     end_match_1' while the restored d is in string2.
6175*75f6d617Schristos 		     For example, the pattern `x.*y.*z' against the
6176*75f6d617Schristos 		     strings `x-' and `y-z-', if the two strings are
6177*75f6d617Schristos 		     not consecutive in memory.  */
6178*75f6d617Schristos 		  DEBUG_PRINT1 ("Restoring best registers.\n");
6179*75f6d617Schristos 
6180*75f6d617Schristos 		  d = match_end;
6181*75f6d617Schristos 		  dend = ((d >= string1 && d <= end1)
6182*75f6d617Schristos 			  ? end_match_1 : end_match_2);
6183*75f6d617Schristos 
6184*75f6d617Schristos 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6185*75f6d617Schristos 		    {
6186*75f6d617Schristos 		      regstart[mcnt] = best_regstart[mcnt];
6187*75f6d617Schristos 		      regend[mcnt] = best_regend[mcnt];
6188*75f6d617Schristos 		    }
6189*75f6d617Schristos 		}
6190*75f6d617Schristos 	    } /* d != end_match_2 */
6191*75f6d617Schristos 
6192*75f6d617Schristos 	succeed_label:
6193*75f6d617Schristos 	  DEBUG_PRINT1 ("Accepting match.\n");
6194*75f6d617Schristos 	  /* If caller wants register contents data back, do it.  */
6195*75f6d617Schristos 	  if (regs && !bufp->no_sub)
6196*75f6d617Schristos 	    {
6197*75f6d617Schristos 	      /* Have the register data arrays been allocated?  */
6198*75f6d617Schristos 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
6199*75f6d617Schristos 		{ /* No.  So allocate them with malloc.  We need one
6200*75f6d617Schristos 		     extra element beyond `num_regs' for the `-1' marker
6201*75f6d617Schristos 		     GNU code uses.  */
6202*75f6d617Schristos 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
6203*75f6d617Schristos 		  regs->start = TALLOC (regs->num_regs, regoff_t);
6204*75f6d617Schristos 		  regs->end = TALLOC (regs->num_regs, regoff_t);
6205*75f6d617Schristos 		  if (regs->start == NULL || regs->end == NULL)
6206*75f6d617Schristos 		    {
6207*75f6d617Schristos 		      FREE_VARIABLES ();
6208*75f6d617Schristos 		      return -2;
6209*75f6d617Schristos 		    }
6210*75f6d617Schristos 		  bufp->regs_allocated = REGS_REALLOCATE;
6211*75f6d617Schristos 		}
6212*75f6d617Schristos 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
6213*75f6d617Schristos 		{ /* Yes.  If we need more elements than were already
6214*75f6d617Schristos 		     allocated, reallocate them.  If we need fewer, just
6215*75f6d617Schristos 		     leave it alone.  */
6216*75f6d617Schristos 		  if (regs->num_regs < num_regs + 1)
6217*75f6d617Schristos 		    {
6218*75f6d617Schristos 		      regs->num_regs = num_regs + 1;
6219*75f6d617Schristos 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
6220*75f6d617Schristos 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
6221*75f6d617Schristos 		      if (regs->start == NULL || regs->end == NULL)
6222*75f6d617Schristos 			{
6223*75f6d617Schristos 			  FREE_VARIABLES ();
6224*75f6d617Schristos 			  return -2;
6225*75f6d617Schristos 			}
6226*75f6d617Schristos 		    }
6227*75f6d617Schristos 		}
6228*75f6d617Schristos 	      else
6229*75f6d617Schristos 		{
6230*75f6d617Schristos 		  /* These braces fend off a "empty body in an else-statement"
6231*75f6d617Schristos 		     warning under GCC when assert expands to nothing.  */
6232*75f6d617Schristos 		  assert (bufp->regs_allocated == REGS_FIXED);
6233*75f6d617Schristos 		}
6234*75f6d617Schristos 
6235*75f6d617Schristos 	      /* Convert the pointer data in `regstart' and `regend' to
6236*75f6d617Schristos 		 indices.  Register zero has to be set differently,
6237*75f6d617Schristos 		 since we haven't kept track of any info for it.  */
6238*75f6d617Schristos 	      if (regs->num_regs > 0)
6239*75f6d617Schristos 		{
6240*75f6d617Schristos 		  regs->start[0] = pos;
6241*75f6d617Schristos #ifdef WCHAR
6242*75f6d617Schristos 		  if (MATCHING_IN_FIRST_STRING)
6243*75f6d617Schristos 		    regs->end[0] = (mbs_offset1 != NULL ?
6244*75f6d617Schristos 				    mbs_offset1[d-string1] : 0);
6245*75f6d617Schristos 		  else
6246*75f6d617Schristos 		    regs->end[0] = csize1 + (mbs_offset2 != NULL
6247*75f6d617Schristos 					     ? mbs_offset2[d-string2] : 0);
6248*75f6d617Schristos #else
6249*75f6d617Schristos 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
6250*75f6d617Schristos 				  ? ((regoff_t) (d - string1))
6251*75f6d617Schristos 				  : ((regoff_t) (d - string2 + size1)));
6252*75f6d617Schristos #endif /* WCHAR */
6253*75f6d617Schristos 		}
6254*75f6d617Schristos 
6255*75f6d617Schristos 	      /* Go through the first `min (num_regs, regs->num_regs)'
6256*75f6d617Schristos 		 registers, since that is all we initialized.  */
6257*75f6d617Schristos 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6258*75f6d617Schristos 		   mcnt++)
6259*75f6d617Schristos 		{
6260*75f6d617Schristos 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6261*75f6d617Schristos 		    regs->start[mcnt] = regs->end[mcnt] = -1;
6262*75f6d617Schristos 		  else
6263*75f6d617Schristos 		    {
6264*75f6d617Schristos 		      regs->start[mcnt]
6265*75f6d617Schristos 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6266*75f6d617Schristos 		      regs->end[mcnt]
6267*75f6d617Schristos 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6268*75f6d617Schristos 		    }
6269*75f6d617Schristos 		}
6270*75f6d617Schristos 
6271*75f6d617Schristos 	      /* If the regs structure we return has more elements than
6272*75f6d617Schristos 		 were in the pattern, set the extra elements to -1.  If
6273*75f6d617Schristos 		 we (re)allocated the registers, this is the case,
6274*75f6d617Schristos 		 because we always allocate enough to have at least one
6275*75f6d617Schristos 		 -1 at the end.  */
6276*75f6d617Schristos 	      for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6277*75f6d617Schristos 		regs->start[mcnt] = regs->end[mcnt] = -1;
6278*75f6d617Schristos 	    } /* regs && !bufp->no_sub */
6279*75f6d617Schristos 
6280*75f6d617Schristos 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6281*75f6d617Schristos 			nfailure_points_pushed, nfailure_points_popped,
6282*75f6d617Schristos 			nfailure_points_pushed - nfailure_points_popped);
6283*75f6d617Schristos 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6284*75f6d617Schristos 
6285*75f6d617Schristos #ifdef WCHAR
6286*75f6d617Schristos 	  if (MATCHING_IN_FIRST_STRING)
6287*75f6d617Schristos 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6288*75f6d617Schristos 	  else
6289*75f6d617Schristos 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6290*75f6d617Schristos 	      csize1;
6291*75f6d617Schristos 	  mcnt -= pos;
6292*75f6d617Schristos #else
6293*75f6d617Schristos 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6294*75f6d617Schristos 			    ? string1 : string2 - size1);
6295*75f6d617Schristos #endif /* WCHAR */
6296*75f6d617Schristos 
6297*75f6d617Schristos 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6298*75f6d617Schristos 
6299*75f6d617Schristos 	  FREE_VARIABLES ();
6300*75f6d617Schristos 	  return mcnt;
6301*75f6d617Schristos 	}
6302*75f6d617Schristos 
6303*75f6d617Schristos #ifndef __GNUC__
6304*75f6d617Schristos       /* Otherwise match next pattern command.  */
6305*75f6d617Schristos       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6306*75f6d617Schristos 	{
6307*75f6d617Schristos #endif
6308*75f6d617Schristos         /* Ignore these.  Used to ignore the n of succeed_n's which
6309*75f6d617Schristos            currently have n == 0.  */
6310*75f6d617Schristos         CASE (no_op):
6311*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING no_op.\n");
6312*75f6d617Schristos           NEXT;
6313*75f6d617Schristos 
6314*75f6d617Schristos 	CASE (succeed):
6315*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING succeed.\n");
6316*75f6d617Schristos 	  goto succeed_label;
6317*75f6d617Schristos 
6318*75f6d617Schristos         /* Match the next n pattern characters exactly.  The following
6319*75f6d617Schristos            byte in the pattern defines n, and the n bytes after that
6320*75f6d617Schristos            are the characters to match.  */
6321*75f6d617Schristos 	CASE (exactn):
6322*75f6d617Schristos #ifdef MBS_SUPPORT
6323*75f6d617Schristos 	CASE (exactn_bin):
6324*75f6d617Schristos #endif
6325*75f6d617Schristos 	  mcnt = *p++;
6326*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6327*75f6d617Schristos 
6328*75f6d617Schristos           /* This is written out as an if-else so we don't waste time
6329*75f6d617Schristos              testing `translate' inside the loop.  */
6330*75f6d617Schristos           if (translate)
6331*75f6d617Schristos 	    {
6332*75f6d617Schristos 	      do
6333*75f6d617Schristos 		{
6334*75f6d617Schristos 		  PREFETCH ();
6335*75f6d617Schristos #ifdef WCHAR
6336*75f6d617Schristos 		  if (*d <= 0xff)
6337*75f6d617Schristos 		    {
6338*75f6d617Schristos 		      if ((UCHAR_T) translate[(unsigned char) *d++]
6339*75f6d617Schristos 			  != (UCHAR_T) *p++)
6340*75f6d617Schristos 			goto fail;
6341*75f6d617Schristos 		    }
6342*75f6d617Schristos 		  else
6343*75f6d617Schristos 		    {
6344*75f6d617Schristos 		      if (*d++ != (CHAR_T) *p++)
6345*75f6d617Schristos 			goto fail;
6346*75f6d617Schristos 		    }
6347*75f6d617Schristos #else
6348*75f6d617Schristos 		  if ((UCHAR_T) translate[(unsigned char) *d++]
6349*75f6d617Schristos 		      != (UCHAR_T) *p++)
6350*75f6d617Schristos                     goto fail;
6351*75f6d617Schristos #endif /* WCHAR */
6352*75f6d617Schristos 		}
6353*75f6d617Schristos 	      while (--mcnt);
6354*75f6d617Schristos 	    }
6355*75f6d617Schristos 	  else
6356*75f6d617Schristos 	    {
6357*75f6d617Schristos 	      do
6358*75f6d617Schristos 		{
6359*75f6d617Schristos 		  PREFETCH ();
6360*75f6d617Schristos 		  if (*d++ != (CHAR_T) *p++) goto fail;
6361*75f6d617Schristos 		}
6362*75f6d617Schristos 	      while (--mcnt);
6363*75f6d617Schristos 	    }
6364*75f6d617Schristos 	  SET_REGS_MATCHED ();
6365*75f6d617Schristos           NEXT;
6366*75f6d617Schristos 
6367*75f6d617Schristos 
6368*75f6d617Schristos         /* Match any character except possibly a newline or a null.  */
6369*75f6d617Schristos 	CASE (anychar):
6370*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING anychar.\n");
6371*75f6d617Schristos 
6372*75f6d617Schristos           PREFETCH ();
6373*75f6d617Schristos 
6374*75f6d617Schristos           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6375*75f6d617Schristos               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6376*75f6d617Schristos 	    goto fail;
6377*75f6d617Schristos 
6378*75f6d617Schristos           SET_REGS_MATCHED ();
6379*75f6d617Schristos           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
6380*75f6d617Schristos           d++;
6381*75f6d617Schristos 	  NEXT;
6382*75f6d617Schristos 
6383*75f6d617Schristos 
6384*75f6d617Schristos 	CASE (charset):
6385*75f6d617Schristos 	CASE (charset_not):
6386*75f6d617Schristos 	  {
6387*75f6d617Schristos 	    register UCHAR_T c;
6388*75f6d617Schristos #ifdef WCHAR
6389*75f6d617Schristos 	    unsigned int i, char_class_length, coll_symbol_length,
6390*75f6d617Schristos               equiv_class_length, ranges_length, chars_length, length;
6391*75f6d617Schristos 	    CHAR_T *workp, *workp2, *charset_top;
6392*75f6d617Schristos #define WORK_BUFFER_SIZE 128
6393*75f6d617Schristos             CHAR_T str_buf[WORK_BUFFER_SIZE];
6394*75f6d617Schristos # ifdef _LIBC
6395*75f6d617Schristos 	    uint32_t nrules;
6396*75f6d617Schristos # endif /* _LIBC */
6397*75f6d617Schristos #endif /* WCHAR */
6398*75f6d617Schristos 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
6399*75f6d617Schristos 
6400*75f6d617Schristos             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
6401*75f6d617Schristos 	    PREFETCH ();
6402*75f6d617Schristos 	    c = TRANSLATE (*d); /* The character to match.  */
6403*75f6d617Schristos #ifdef WCHAR
6404*75f6d617Schristos # ifdef _LIBC
6405*75f6d617Schristos 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6406*75f6d617Schristos # endif /* _LIBC */
6407*75f6d617Schristos 	    charset_top = p - 1;
6408*75f6d617Schristos 	    char_class_length = *p++;
6409*75f6d617Schristos 	    coll_symbol_length = *p++;
6410*75f6d617Schristos 	    equiv_class_length = *p++;
6411*75f6d617Schristos 	    ranges_length = *p++;
6412*75f6d617Schristos 	    chars_length = *p++;
6413*75f6d617Schristos 	    /* p points charset[6], so the address of the next instruction
6414*75f6d617Schristos 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6415*75f6d617Schristos 	       where l=length of char_classes, m=length of collating_symbol,
6416*75f6d617Schristos 	       n=equivalence_class, o=length of char_range,
6417*75f6d617Schristos 	       p'=length of character.  */
6418*75f6d617Schristos 	    workp = p;
6419*75f6d617Schristos 	    /* Update p to indicate the next instruction.  */
6420*75f6d617Schristos 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
6421*75f6d617Schristos               2*ranges_length + chars_length;
6422*75f6d617Schristos 
6423*75f6d617Schristos             /* match with char_class?  */
6424*75f6d617Schristos 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6425*75f6d617Schristos 	      {
6426*75f6d617Schristos 		wctype_t wctype;
6427*75f6d617Schristos 		uintptr_t alignedp = ((uintptr_t)workp
6428*75f6d617Schristos 				      + __alignof__(wctype_t) - 1)
6429*75f6d617Schristos 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6430*75f6d617Schristos 		wctype = *((wctype_t*)alignedp);
6431*75f6d617Schristos 		workp += CHAR_CLASS_SIZE;
6432*75f6d617Schristos 		if (iswctype((wint_t)c, wctype))
6433*75f6d617Schristos 		  goto char_set_matched;
6434*75f6d617Schristos 	      }
6435*75f6d617Schristos 
6436*75f6d617Schristos             /* match with collating_symbol?  */
6437*75f6d617Schristos # ifdef _LIBC
6438*75f6d617Schristos 	    if (nrules != 0)
6439*75f6d617Schristos 	      {
6440*75f6d617Schristos 		const unsigned char *extra = (const unsigned char *)
6441*75f6d617Schristos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6442*75f6d617Schristos 
6443*75f6d617Schristos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6444*75f6d617Schristos 		     workp++)
6445*75f6d617Schristos 		  {
6446*75f6d617Schristos 		    int32_t *wextra;
6447*75f6d617Schristos 		    wextra = (int32_t*)(extra + *workp++);
6448*75f6d617Schristos 		    for (i = 0; i < *wextra; ++i)
6449*75f6d617Schristos 		      if (TRANSLATE(d[i]) != wextra[1 + i])
6450*75f6d617Schristos 			break;
6451*75f6d617Schristos 
6452*75f6d617Schristos 		    if (i == *wextra)
6453*75f6d617Schristos 		      {
6454*75f6d617Schristos 			/* Update d, however d will be incremented at
6455*75f6d617Schristos 			   char_set_matched:, we decrement d here.  */
6456*75f6d617Schristos 			d += i - 1;
6457*75f6d617Schristos 			goto char_set_matched;
6458*75f6d617Schristos 		      }
6459*75f6d617Schristos 		  }
6460*75f6d617Schristos 	      }
6461*75f6d617Schristos 	    else /* (nrules == 0) */
6462*75f6d617Schristos # endif
6463*75f6d617Schristos 	      /* If we can't look up collation data, we use wcscoll
6464*75f6d617Schristos 		 instead.  */
6465*75f6d617Schristos 	      {
6466*75f6d617Schristos 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6467*75f6d617Schristos 		  {
6468*75f6d617Schristos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6469*75f6d617Schristos 		    length = wcslen (workp);
6470*75f6d617Schristos 
6471*75f6d617Schristos 		    /* If wcscoll(the collating symbol, whole string) > 0,
6472*75f6d617Schristos 		       any substring of the string never match with the
6473*75f6d617Schristos 		       collating symbol.  */
6474*75f6d617Schristos 		    if (wcscoll (workp, d) > 0)
6475*75f6d617Schristos 		      {
6476*75f6d617Schristos 			workp += length + 1;
6477*75f6d617Schristos 			continue;
6478*75f6d617Schristos 		      }
6479*75f6d617Schristos 
6480*75f6d617Schristos 		    /* First, we compare the collating symbol with
6481*75f6d617Schristos 		       the first character of the string.
6482*75f6d617Schristos 		       If it don't match, we add the next character to
6483*75f6d617Schristos 		       the compare buffer in turn.  */
6484*75f6d617Schristos 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6485*75f6d617Schristos 		      {
6486*75f6d617Schristos 			int match;
6487*75f6d617Schristos 			if (d == dend)
6488*75f6d617Schristos 			  {
6489*75f6d617Schristos 			    if (dend == end_match_2)
6490*75f6d617Schristos 			      break;
6491*75f6d617Schristos 			    d = string2;
6492*75f6d617Schristos 			    dend = end_match_2;
6493*75f6d617Schristos 			  }
6494*75f6d617Schristos 
6495*75f6d617Schristos 			/* add next character to the compare buffer.  */
6496*75f6d617Schristos 			str_buf[i] = TRANSLATE(*d);
6497*75f6d617Schristos 			str_buf[i+1] = '\0';
6498*75f6d617Schristos 
6499*75f6d617Schristos 			match = wcscoll (workp, str_buf);
6500*75f6d617Schristos 			if (match == 0)
6501*75f6d617Schristos 			  goto char_set_matched;
6502*75f6d617Schristos 
6503*75f6d617Schristos 			if (match < 0)
6504*75f6d617Schristos 			  /* (str_buf > workp) indicate (str_buf + X > workp),
6505*75f6d617Schristos 			     because for all X (str_buf + X > str_buf).
6506*75f6d617Schristos 			     So we don't need continue this loop.  */
6507*75f6d617Schristos 			  break;
6508*75f6d617Schristos 
6509*75f6d617Schristos 			/* Otherwise(str_buf < workp),
6510*75f6d617Schristos 			   (str_buf+next_character) may equals (workp).
6511*75f6d617Schristos 			   So we continue this loop.  */
6512*75f6d617Schristos 		      }
6513*75f6d617Schristos 		    /* not matched */
6514*75f6d617Schristos 		    d = backup_d;
6515*75f6d617Schristos 		    dend = backup_dend;
6516*75f6d617Schristos 		    workp += length + 1;
6517*75f6d617Schristos 		  }
6518*75f6d617Schristos               }
6519*75f6d617Schristos             /* match with equivalence_class?  */
6520*75f6d617Schristos # ifdef _LIBC
6521*75f6d617Schristos 	    if (nrules != 0)
6522*75f6d617Schristos 	      {
6523*75f6d617Schristos                 const CHAR_T *backup_d = d, *backup_dend = dend;
6524*75f6d617Schristos 		/* Try to match the equivalence class against
6525*75f6d617Schristos 		   those known to the collate implementation.  */
6526*75f6d617Schristos 		const int32_t *table;
6527*75f6d617Schristos 		const int32_t *weights;
6528*75f6d617Schristos 		const int32_t *extra;
6529*75f6d617Schristos 		const int32_t *indirect;
6530*75f6d617Schristos 		int32_t idx, idx2;
6531*75f6d617Schristos 		wint_t *cp;
6532*75f6d617Schristos 		size_t len;
6533*75f6d617Schristos 
6534*75f6d617Schristos 		/* This #include defines a local function!  */
6535*75f6d617Schristos #  include <locale/weightwc.h>
6536*75f6d617Schristos 
6537*75f6d617Schristos 		table = (const int32_t *)
6538*75f6d617Schristos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6539*75f6d617Schristos 		weights = (const wint_t *)
6540*75f6d617Schristos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6541*75f6d617Schristos 		extra = (const wint_t *)
6542*75f6d617Schristos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6543*75f6d617Schristos 		indirect = (const int32_t *)
6544*75f6d617Schristos 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6545*75f6d617Schristos 
6546*75f6d617Schristos 		/* Write 1 collating element to str_buf, and
6547*75f6d617Schristos 		   get its index.  */
6548*75f6d617Schristos 		idx2 = 0;
6549*75f6d617Schristos 
6550*75f6d617Schristos 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6551*75f6d617Schristos 		  {
6552*75f6d617Schristos 		    cp = (wint_t*)str_buf;
6553*75f6d617Schristos 		    if (d == dend)
6554*75f6d617Schristos 		      {
6555*75f6d617Schristos 			if (dend == end_match_2)
6556*75f6d617Schristos 			  break;
6557*75f6d617Schristos 			d = string2;
6558*75f6d617Schristos 			dend = end_match_2;
6559*75f6d617Schristos 		      }
6560*75f6d617Schristos 		    str_buf[i] = TRANSLATE(*(d+i));
6561*75f6d617Schristos 		    str_buf[i+1] = '\0'; /* sentinel */
6562*75f6d617Schristos 		    idx2 = findidx ((const wint_t**)&cp);
6563*75f6d617Schristos 		  }
6564*75f6d617Schristos 
6565*75f6d617Schristos 		/* Update d, however d will be incremented at
6566*75f6d617Schristos 		   char_set_matched:, we decrement d here.  */
6567*75f6d617Schristos 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6568*75f6d617Schristos 		if (d >= dend)
6569*75f6d617Schristos 		  {
6570*75f6d617Schristos 		    if (dend == end_match_2)
6571*75f6d617Schristos 			d = dend;
6572*75f6d617Schristos 		    else
6573*75f6d617Schristos 		      {
6574*75f6d617Schristos 			d = string2;
6575*75f6d617Schristos 			dend = end_match_2;
6576*75f6d617Schristos 		      }
6577*75f6d617Schristos 		  }
6578*75f6d617Schristos 
6579*75f6d617Schristos 		len = weights[idx2];
6580*75f6d617Schristos 
6581*75f6d617Schristos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6582*75f6d617Schristos 		     workp++)
6583*75f6d617Schristos 		  {
6584*75f6d617Schristos 		    idx = (int32_t)*workp;
6585*75f6d617Schristos 		    /* We already checked idx != 0 in regex_compile. */
6586*75f6d617Schristos 
6587*75f6d617Schristos 		    if (idx2 != 0 && len == weights[idx])
6588*75f6d617Schristos 		      {
6589*75f6d617Schristos 			int cnt = 0;
6590*75f6d617Schristos 			while (cnt < len && (weights[idx + 1 + cnt]
6591*75f6d617Schristos 					     == weights[idx2 + 1 + cnt]))
6592*75f6d617Schristos 			  ++cnt;
6593*75f6d617Schristos 
6594*75f6d617Schristos 			if (cnt == len)
6595*75f6d617Schristos 			  goto char_set_matched;
6596*75f6d617Schristos 		      }
6597*75f6d617Schristos 		  }
6598*75f6d617Schristos 		/* not matched */
6599*75f6d617Schristos                 d = backup_d;
6600*75f6d617Schristos                 dend = backup_dend;
6601*75f6d617Schristos 	      }
6602*75f6d617Schristos 	    else /* (nrules == 0) */
6603*75f6d617Schristos # endif
6604*75f6d617Schristos 	      /* If we can't look up collation data, we use wcscoll
6605*75f6d617Schristos 		 instead.  */
6606*75f6d617Schristos 	      {
6607*75f6d617Schristos 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6608*75f6d617Schristos 		  {
6609*75f6d617Schristos 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6610*75f6d617Schristos 		    length = wcslen (workp);
6611*75f6d617Schristos 
6612*75f6d617Schristos 		    /* If wcscoll(the collating symbol, whole string) > 0,
6613*75f6d617Schristos 		       any substring of the string never match with the
6614*75f6d617Schristos 		       collating symbol.  */
6615*75f6d617Schristos 		    if (wcscoll (workp, d) > 0)
6616*75f6d617Schristos 		      {
6617*75f6d617Schristos 			workp += length + 1;
6618*75f6d617Schristos 			break;
6619*75f6d617Schristos 		      }
6620*75f6d617Schristos 
6621*75f6d617Schristos 		    /* First, we compare the equivalence class with
6622*75f6d617Schristos 		       the first character of the string.
6623*75f6d617Schristos 		       If it don't match, we add the next character to
6624*75f6d617Schristos 		       the compare buffer in turn.  */
6625*75f6d617Schristos 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6626*75f6d617Schristos 		      {
6627*75f6d617Schristos 			int match;
6628*75f6d617Schristos 			if (d == dend)
6629*75f6d617Schristos 			  {
6630*75f6d617Schristos 			    if (dend == end_match_2)
6631*75f6d617Schristos 			      break;
6632*75f6d617Schristos 			    d = string2;
6633*75f6d617Schristos 			    dend = end_match_2;
6634*75f6d617Schristos 			  }
6635*75f6d617Schristos 
6636*75f6d617Schristos 			/* add next character to the compare buffer.  */
6637*75f6d617Schristos 			str_buf[i] = TRANSLATE(*d);
6638*75f6d617Schristos 			str_buf[i+1] = '\0';
6639*75f6d617Schristos 
6640*75f6d617Schristos 			match = wcscoll (workp, str_buf);
6641*75f6d617Schristos 
6642*75f6d617Schristos 			if (match == 0)
6643*75f6d617Schristos 			  goto char_set_matched;
6644*75f6d617Schristos 
6645*75f6d617Schristos 			if (match < 0)
6646*75f6d617Schristos 			/* (str_buf > workp) indicate (str_buf + X > workp),
6647*75f6d617Schristos 			   because for all X (str_buf + X > str_buf).
6648*75f6d617Schristos 			   So we don't need continue this loop.  */
6649*75f6d617Schristos 			  break;
6650*75f6d617Schristos 
6651*75f6d617Schristos 			/* Otherwise(str_buf < workp),
6652*75f6d617Schristos 			   (str_buf+next_character) may equals (workp).
6653*75f6d617Schristos 			   So we continue this loop.  */
6654*75f6d617Schristos 		      }
6655*75f6d617Schristos 		    /* not matched */
6656*75f6d617Schristos 		    d = backup_d;
6657*75f6d617Schristos 		    dend = backup_dend;
6658*75f6d617Schristos 		    workp += length + 1;
6659*75f6d617Schristos 		  }
6660*75f6d617Schristos 	      }
6661*75f6d617Schristos 
6662*75f6d617Schristos             /* match with char_range?  */
6663*75f6d617Schristos # ifdef _LIBC
6664*75f6d617Schristos 	    if (nrules != 0)
6665*75f6d617Schristos 	      {
6666*75f6d617Schristos 		uint32_t collseqval;
6667*75f6d617Schristos 		const char *collseq = (const char *)
6668*75f6d617Schristos 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6669*75f6d617Schristos 
6670*75f6d617Schristos 		collseqval = collseq_table_lookup (collseq, c);
6671*75f6d617Schristos 
6672*75f6d617Schristos 		for (; workp < p - chars_length ;)
6673*75f6d617Schristos 		  {
6674*75f6d617Schristos 		    uint32_t start_val, end_val;
6675*75f6d617Schristos 
6676*75f6d617Schristos 		    /* We already compute the collation sequence value
6677*75f6d617Schristos 		       of the characters (or collating symbols).  */
6678*75f6d617Schristos 		    start_val = (uint32_t) *workp++; /* range_start */
6679*75f6d617Schristos 		    end_val = (uint32_t) *workp++; /* range_end */
6680*75f6d617Schristos 
6681*75f6d617Schristos 		    if (start_val <= collseqval && collseqval <= end_val)
6682*75f6d617Schristos 		      goto char_set_matched;
6683*75f6d617Schristos 		  }
6684*75f6d617Schristos 	      }
6685*75f6d617Schristos 	    else
6686*75f6d617Schristos # endif
6687*75f6d617Schristos 	      {
6688*75f6d617Schristos 		/* We set range_start_char at str_buf[0], range_end_char
6689*75f6d617Schristos 		   at str_buf[4], and compared char at str_buf[2].  */
6690*75f6d617Schristos 		str_buf[1] = 0;
6691*75f6d617Schristos 		str_buf[2] = c;
6692*75f6d617Schristos 		str_buf[3] = 0;
6693*75f6d617Schristos 		str_buf[5] = 0;
6694*75f6d617Schristos 		for (; workp < p - chars_length ;)
6695*75f6d617Schristos 		  {
6696*75f6d617Schristos 		    wchar_t *range_start_char, *range_end_char;
6697*75f6d617Schristos 
6698*75f6d617Schristos 		    /* match if (range_start_char <= c <= range_end_char).  */
6699*75f6d617Schristos 
6700*75f6d617Schristos 		    /* If range_start(or end) < 0, we assume -range_start(end)
6701*75f6d617Schristos 		       is the offset of the collating symbol which is specified
6702*75f6d617Schristos 		       as the character of the range start(end).  */
6703*75f6d617Schristos 
6704*75f6d617Schristos 		    /* range_start */
6705*75f6d617Schristos 		    if (*workp < 0)
6706*75f6d617Schristos 		      range_start_char = charset_top - (*workp++);
6707*75f6d617Schristos 		    else
6708*75f6d617Schristos 		      {
6709*75f6d617Schristos 			str_buf[0] = *workp++;
6710*75f6d617Schristos 			range_start_char = str_buf;
6711*75f6d617Schristos 		      }
6712*75f6d617Schristos 
6713*75f6d617Schristos 		    /* range_end */
6714*75f6d617Schristos 		    if (*workp < 0)
6715*75f6d617Schristos 		      range_end_char = charset_top - (*workp++);
6716*75f6d617Schristos 		    else
6717*75f6d617Schristos 		      {
6718*75f6d617Schristos 			str_buf[4] = *workp++;
6719*75f6d617Schristos 			range_end_char = str_buf + 4;
6720*75f6d617Schristos 		      }
6721*75f6d617Schristos 
6722*75f6d617Schristos 		    if (wcscoll (range_start_char, str_buf+2) <= 0
6723*75f6d617Schristos 			&& wcscoll (str_buf+2, range_end_char) <= 0)
6724*75f6d617Schristos 		      goto char_set_matched;
6725*75f6d617Schristos 		  }
6726*75f6d617Schristos 	      }
6727*75f6d617Schristos 
6728*75f6d617Schristos             /* match with char?  */
6729*75f6d617Schristos 	    for (; workp < p ; workp++)
6730*75f6d617Schristos 	      if (c == *workp)
6731*75f6d617Schristos 		goto char_set_matched;
6732*75f6d617Schristos 
6733*75f6d617Schristos 	    not = !not;
6734*75f6d617Schristos 
6735*75f6d617Schristos 	  char_set_matched:
6736*75f6d617Schristos 	    if (not) goto fail;
6737*75f6d617Schristos #else
6738*75f6d617Schristos             /* Cast to `unsigned' instead of `unsigned char' in case the
6739*75f6d617Schristos                bit list is a full 32 bytes long.  */
6740*75f6d617Schristos 	    if (c < (unsigned) (*p * BYTEWIDTH)
6741*75f6d617Schristos 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6742*75f6d617Schristos 	      not = !not;
6743*75f6d617Schristos 
6744*75f6d617Schristos 	    p += 1 + *p;
6745*75f6d617Schristos 
6746*75f6d617Schristos 	    if (!not) goto fail;
6747*75f6d617Schristos #undef WORK_BUFFER_SIZE
6748*75f6d617Schristos #endif /* WCHAR */
6749*75f6d617Schristos 	    SET_REGS_MATCHED ();
6750*75f6d617Schristos             d++;
6751*75f6d617Schristos 	    NEXT;
6752*75f6d617Schristos 	  }
6753*75f6d617Schristos 
6754*75f6d617Schristos 
6755*75f6d617Schristos         /* The beginning of a group is represented by start_memory.
6756*75f6d617Schristos            The arguments are the register number in the next byte, and the
6757*75f6d617Schristos            number of groups inner to this one in the next.  The text
6758*75f6d617Schristos            matched within the group is recorded (in the internal
6759*75f6d617Schristos            registers data structure) under the register number.  */
6760*75f6d617Schristos         CASE (start_memory):
6761*75f6d617Schristos 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6762*75f6d617Schristos 			(long int) *p, (long int) p[1]);
6763*75f6d617Schristos 
6764*75f6d617Schristos           /* Find out if this group can match the empty string.  */
6765*75f6d617Schristos 	  p1 = p;		/* To send to group_match_null_string_p.  */
6766*75f6d617Schristos 
6767*75f6d617Schristos           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6768*75f6d617Schristos             REG_MATCH_NULL_STRING_P (reg_info[*p])
6769*75f6d617Schristos               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6770*75f6d617Schristos 
6771*75f6d617Schristos           /* Save the position in the string where we were the last time
6772*75f6d617Schristos              we were at this open-group operator in case the group is
6773*75f6d617Schristos              operated upon by a repetition operator, e.g., with `(a*)*b'
6774*75f6d617Schristos              against `ab'; then we want to ignore where we are now in
6775*75f6d617Schristos              the string in case this attempt to match fails.  */
6776*75f6d617Schristos           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6777*75f6d617Schristos                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6778*75f6d617Schristos                              : regstart[*p];
6779*75f6d617Schristos 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6780*75f6d617Schristos 			 POINTER_TO_OFFSET (old_regstart[*p]));
6781*75f6d617Schristos 
6782*75f6d617Schristos           regstart[*p] = d;
6783*75f6d617Schristos 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6784*75f6d617Schristos 
6785*75f6d617Schristos           IS_ACTIVE (reg_info[*p]) = 1;
6786*75f6d617Schristos           MATCHED_SOMETHING (reg_info[*p]) = 0;
6787*75f6d617Schristos 
6788*75f6d617Schristos 	  /* Clear this whenever we change the register activity status.  */
6789*75f6d617Schristos 	  set_regs_matched_done = 0;
6790*75f6d617Schristos 
6791*75f6d617Schristos           /* This is the new highest active register.  */
6792*75f6d617Schristos           highest_active_reg = *p;
6793*75f6d617Schristos 
6794*75f6d617Schristos           /* If nothing was active before, this is the new lowest active
6795*75f6d617Schristos              register.  */
6796*75f6d617Schristos           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6797*75f6d617Schristos             lowest_active_reg = *p;
6798*75f6d617Schristos 
6799*75f6d617Schristos           /* Move past the register number and inner group count.  */
6800*75f6d617Schristos           p += 2;
6801*75f6d617Schristos 	  just_past_start_mem = p;
6802*75f6d617Schristos 
6803*75f6d617Schristos           NEXT;
6804*75f6d617Schristos 
6805*75f6d617Schristos 
6806*75f6d617Schristos         /* The stop_memory opcode represents the end of a group.  Its
6807*75f6d617Schristos            arguments are the same as start_memory's: the register
6808*75f6d617Schristos            number, and the number of inner groups.  */
6809*75f6d617Schristos 	CASE (stop_memory):
6810*75f6d617Schristos 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6811*75f6d617Schristos 			(long int) *p, (long int) p[1]);
6812*75f6d617Schristos 
6813*75f6d617Schristos           /* We need to save the string position the last time we were at
6814*75f6d617Schristos              this close-group operator in case the group is operated
6815*75f6d617Schristos              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6816*75f6d617Schristos              against `aba'; then we want to ignore where we are now in
6817*75f6d617Schristos              the string in case this attempt to match fails.  */
6818*75f6d617Schristos           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6819*75f6d617Schristos                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
6820*75f6d617Schristos 			   : regend[*p];
6821*75f6d617Schristos 	  DEBUG_PRINT2 ("      old_regend: %d\n",
6822*75f6d617Schristos 			 POINTER_TO_OFFSET (old_regend[*p]));
6823*75f6d617Schristos 
6824*75f6d617Schristos           regend[*p] = d;
6825*75f6d617Schristos 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6826*75f6d617Schristos 
6827*75f6d617Schristos           /* This register isn't active anymore.  */
6828*75f6d617Schristos           IS_ACTIVE (reg_info[*p]) = 0;
6829*75f6d617Schristos 
6830*75f6d617Schristos 	  /* Clear this whenever we change the register activity status.  */
6831*75f6d617Schristos 	  set_regs_matched_done = 0;
6832*75f6d617Schristos 
6833*75f6d617Schristos           /* If this was the only register active, nothing is active
6834*75f6d617Schristos              anymore.  */
6835*75f6d617Schristos           if (lowest_active_reg == highest_active_reg)
6836*75f6d617Schristos             {
6837*75f6d617Schristos               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6838*75f6d617Schristos               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6839*75f6d617Schristos             }
6840*75f6d617Schristos           else
6841*75f6d617Schristos             { /* We must scan for the new highest active register, since
6842*75f6d617Schristos                  it isn't necessarily one less than now: consider
6843*75f6d617Schristos                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6844*75f6d617Schristos                  new highest active register is 1.  */
6845*75f6d617Schristos               UCHAR_T r = *p - 1;
6846*75f6d617Schristos               while (r > 0 && !IS_ACTIVE (reg_info[r]))
6847*75f6d617Schristos                 r--;
6848*75f6d617Schristos 
6849*75f6d617Schristos               /* If we end up at register zero, that means that we saved
6850*75f6d617Schristos                  the registers as the result of an `on_failure_jump', not
6851*75f6d617Schristos                  a `start_memory', and we jumped to past the innermost
6852*75f6d617Schristos                  `stop_memory'.  For example, in ((.)*) we save
6853*75f6d617Schristos                  registers 1 and 2 as a result of the *, but when we pop
6854*75f6d617Schristos                  back to the second ), we are at the stop_memory 1.
6855*75f6d617Schristos                  Thus, nothing is active.  */
6856*75f6d617Schristos 	      if (r == 0)
6857*75f6d617Schristos                 {
6858*75f6d617Schristos                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6859*75f6d617Schristos                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6860*75f6d617Schristos                 }
6861*75f6d617Schristos               else
6862*75f6d617Schristos                 highest_active_reg = r;
6863*75f6d617Schristos             }
6864*75f6d617Schristos 
6865*75f6d617Schristos           /* If just failed to match something this time around with a
6866*75f6d617Schristos              group that's operated on by a repetition operator, try to
6867*75f6d617Schristos              force exit from the ``loop'', and restore the register
6868*75f6d617Schristos              information for this group that we had before trying this
6869*75f6d617Schristos              last match.  */
6870*75f6d617Schristos           if ((!MATCHED_SOMETHING (reg_info[*p])
6871*75f6d617Schristos                || just_past_start_mem == p - 1)
6872*75f6d617Schristos 	      && (p + 2) < pend)
6873*75f6d617Schristos             {
6874*75f6d617Schristos               boolean is_a_jump_n = false;
6875*75f6d617Schristos 
6876*75f6d617Schristos               p1 = p + 2;
6877*75f6d617Schristos               mcnt = 0;
6878*75f6d617Schristos               switch ((re_opcode_t) *p1++)
6879*75f6d617Schristos                 {
6880*75f6d617Schristos                   case jump_n:
6881*75f6d617Schristos 		    is_a_jump_n = true;
6882*75f6d617Schristos                   case pop_failure_jump:
6883*75f6d617Schristos 		  case maybe_pop_jump:
6884*75f6d617Schristos 		  case jump:
6885*75f6d617Schristos 		  case dummy_failure_jump:
6886*75f6d617Schristos                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6887*75f6d617Schristos 		    if (is_a_jump_n)
6888*75f6d617Schristos 		      p1 += OFFSET_ADDRESS_SIZE;
6889*75f6d617Schristos                     break;
6890*75f6d617Schristos 
6891*75f6d617Schristos                   default:
6892*75f6d617Schristos                     /* do nothing */ ;
6893*75f6d617Schristos                 }
6894*75f6d617Schristos 	      p1 += mcnt;
6895*75f6d617Schristos 
6896*75f6d617Schristos               /* If the next operation is a jump backwards in the pattern
6897*75f6d617Schristos 	         to an on_failure_jump right before the start_memory
6898*75f6d617Schristos                  corresponding to this stop_memory, exit from the loop
6899*75f6d617Schristos                  by forcing a failure after pushing on the stack the
6900*75f6d617Schristos                  on_failure_jump's jump in the pattern, and d.  */
6901*75f6d617Schristos               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6902*75f6d617Schristos                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6903*75f6d617Schristos 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6904*75f6d617Schristos 		{
6905*75f6d617Schristos                   /* If this group ever matched anything, then restore
6906*75f6d617Schristos                      what its registers were before trying this last
6907*75f6d617Schristos                      failed match, e.g., with `(a*)*b' against `ab' for
6908*75f6d617Schristos                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
6909*75f6d617Schristos                      against `aba' for regend[3].
6910*75f6d617Schristos 
6911*75f6d617Schristos                      Also restore the registers for inner groups for,
6912*75f6d617Schristos                      e.g., `((a*)(b*))*' against `aba' (register 3 would
6913*75f6d617Schristos                      otherwise get trashed).  */
6914*75f6d617Schristos 
6915*75f6d617Schristos                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6916*75f6d617Schristos 		    {
6917*75f6d617Schristos 		      unsigned r;
6918*75f6d617Schristos 
6919*75f6d617Schristos                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6920*75f6d617Schristos 
6921*75f6d617Schristos 		      /* Restore this and inner groups' (if any) registers.  */
6922*75f6d617Schristos                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6923*75f6d617Schristos 			   r++)
6924*75f6d617Schristos                         {
6925*75f6d617Schristos                           regstart[r] = old_regstart[r];
6926*75f6d617Schristos 
6927*75f6d617Schristos                           /* xx why this test?  */
6928*75f6d617Schristos                           if (old_regend[r] >= regstart[r])
6929*75f6d617Schristos                             regend[r] = old_regend[r];
6930*75f6d617Schristos                         }
6931*75f6d617Schristos                     }
6932*75f6d617Schristos 		  p1++;
6933*75f6d617Schristos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6934*75f6d617Schristos                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6935*75f6d617Schristos 
6936*75f6d617Schristos                   goto fail;
6937*75f6d617Schristos                 }
6938*75f6d617Schristos             }
6939*75f6d617Schristos 
6940*75f6d617Schristos           /* Move past the register number and the inner group count.  */
6941*75f6d617Schristos           p += 2;
6942*75f6d617Schristos           NEXT;
6943*75f6d617Schristos 
6944*75f6d617Schristos 
6945*75f6d617Schristos 	/* \<digit> has been turned into a `duplicate' command which is
6946*75f6d617Schristos            followed by the numeric value of <digit> as the register number.  */
6947*75f6d617Schristos         CASE (duplicate):
6948*75f6d617Schristos 	  {
6949*75f6d617Schristos 	    register const CHAR_T *d2, *dend2;
6950*75f6d617Schristos 	    int regno = *p++;   /* Get which register to match against.  */
6951*75f6d617Schristos 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6952*75f6d617Schristos 
6953*75f6d617Schristos 	    /* Can't back reference a group which we've never matched.  */
6954*75f6d617Schristos             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6955*75f6d617Schristos               goto fail;
6956*75f6d617Schristos 
6957*75f6d617Schristos             /* Where in input to try to start matching.  */
6958*75f6d617Schristos             d2 = regstart[regno];
6959*75f6d617Schristos 
6960*75f6d617Schristos             /* Where to stop matching; if both the place to start and
6961*75f6d617Schristos                the place to stop matching are in the same string, then
6962*75f6d617Schristos                set to the place to stop, otherwise, for now have to use
6963*75f6d617Schristos                the end of the first string.  */
6964*75f6d617Schristos 
6965*75f6d617Schristos             dend2 = ((FIRST_STRING_P (regstart[regno])
6966*75f6d617Schristos 		      == FIRST_STRING_P (regend[regno]))
6967*75f6d617Schristos 		     ? regend[regno] : end_match_1);
6968*75f6d617Schristos 	    for (;;)
6969*75f6d617Schristos 	      {
6970*75f6d617Schristos 		/* If necessary, advance to next segment in register
6971*75f6d617Schristos                    contents.  */
6972*75f6d617Schristos 		while (d2 == dend2)
6973*75f6d617Schristos 		  {
6974*75f6d617Schristos 		    if (dend2 == end_match_2) break;
6975*75f6d617Schristos 		    if (dend2 == regend[regno]) break;
6976*75f6d617Schristos 
6977*75f6d617Schristos                     /* End of string1 => advance to string2. */
6978*75f6d617Schristos                     d2 = string2;
6979*75f6d617Schristos                     dend2 = regend[regno];
6980*75f6d617Schristos 		  }
6981*75f6d617Schristos 		/* At end of register contents => success */
6982*75f6d617Schristos 		if (d2 == dend2) break;
6983*75f6d617Schristos 
6984*75f6d617Schristos 		/* If necessary, advance to next segment in data.  */
6985*75f6d617Schristos 		PREFETCH ();
6986*75f6d617Schristos 
6987*75f6d617Schristos 		/* How many characters left in this segment to match.  */
6988*75f6d617Schristos 		mcnt = dend - d;
6989*75f6d617Schristos 
6990*75f6d617Schristos 		/* Want how many consecutive characters we can match in
6991*75f6d617Schristos                    one shot, so, if necessary, adjust the count.  */
6992*75f6d617Schristos                 if (mcnt > dend2 - d2)
6993*75f6d617Schristos 		  mcnt = dend2 - d2;
6994*75f6d617Schristos 
6995*75f6d617Schristos 		/* Compare that many; failure if mismatch, else move
6996*75f6d617Schristos                    past them.  */
6997*75f6d617Schristos 		if (translate
6998*75f6d617Schristos                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6999*75f6d617Schristos                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
7000*75f6d617Schristos 		  goto fail;
7001*75f6d617Schristos 		d += mcnt, d2 += mcnt;
7002*75f6d617Schristos 
7003*75f6d617Schristos 		/* Do this because we've match some characters.  */
7004*75f6d617Schristos 		SET_REGS_MATCHED ();
7005*75f6d617Schristos 	      }
7006*75f6d617Schristos 	  }
7007*75f6d617Schristos 	  NEXT;
7008*75f6d617Schristos 
7009*75f6d617Schristos 
7010*75f6d617Schristos         /* begline matches the empty string at the beginning of the string
7011*75f6d617Schristos            (unless `not_bol' is set in `bufp'), and, if
7012*75f6d617Schristos            `newline_anchor' is set, after newlines.  */
7013*75f6d617Schristos 	CASE (begline):
7014*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING begline.\n");
7015*75f6d617Schristos 
7016*75f6d617Schristos           if (AT_STRINGS_BEG (d))
7017*75f6d617Schristos             {
7018*75f6d617Schristos               if (!bufp->not_bol)
7019*75f6d617Schristos 		{
7020*75f6d617Schristos 		  NEXT;
7021*75f6d617Schristos 		}
7022*75f6d617Schristos             }
7023*75f6d617Schristos           else if (d[-1] == '\n' && bufp->newline_anchor)
7024*75f6d617Schristos             {
7025*75f6d617Schristos               NEXT;
7026*75f6d617Schristos             }
7027*75f6d617Schristos           /* In all other cases, we fail.  */
7028*75f6d617Schristos           goto fail;
7029*75f6d617Schristos 
7030*75f6d617Schristos 
7031*75f6d617Schristos         /* endline is the dual of begline.  */
7032*75f6d617Schristos 	CASE (endline):
7033*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING endline.\n");
7034*75f6d617Schristos 
7035*75f6d617Schristos           if (AT_STRINGS_END (d))
7036*75f6d617Schristos             {
7037*75f6d617Schristos               if (!bufp->not_eol)
7038*75f6d617Schristos 		{
7039*75f6d617Schristos 		  NEXT;
7040*75f6d617Schristos 		}
7041*75f6d617Schristos             }
7042*75f6d617Schristos 
7043*75f6d617Schristos           /* We have to ``prefetch'' the next character.  */
7044*75f6d617Schristos           else if ((d == end1 ? *string2 : *d) == '\n'
7045*75f6d617Schristos                    && bufp->newline_anchor)
7046*75f6d617Schristos             {
7047*75f6d617Schristos               NEXT;
7048*75f6d617Schristos             }
7049*75f6d617Schristos           goto fail;
7050*75f6d617Schristos 
7051*75f6d617Schristos 
7052*75f6d617Schristos 	/* Match at the very beginning of the data.  */
7053*75f6d617Schristos         CASE (begbuf):
7054*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
7055*75f6d617Schristos           if (AT_STRINGS_BEG (d))
7056*75f6d617Schristos 	    {
7057*75f6d617Schristos 	      NEXT;
7058*75f6d617Schristos 	    }
7059*75f6d617Schristos           goto fail;
7060*75f6d617Schristos 
7061*75f6d617Schristos 
7062*75f6d617Schristos 	/* Match at the very end of the data.  */
7063*75f6d617Schristos         CASE (endbuf):
7064*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
7065*75f6d617Schristos 	  if (AT_STRINGS_END (d))
7066*75f6d617Schristos 	    {
7067*75f6d617Schristos 	      NEXT;
7068*75f6d617Schristos 	    }
7069*75f6d617Schristos           goto fail;
7070*75f6d617Schristos 
7071*75f6d617Schristos 
7072*75f6d617Schristos         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
7073*75f6d617Schristos            pushes NULL as the value for the string on the stack.  Then
7074*75f6d617Schristos            `pop_failure_point' will keep the current value for the
7075*75f6d617Schristos            string, instead of restoring it.  To see why, consider
7076*75f6d617Schristos            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
7077*75f6d617Schristos            then the . fails against the \n.  But the next thing we want
7078*75f6d617Schristos            to do is match the \n against the \n; if we restored the
7079*75f6d617Schristos            string value, we would be back at the foo.
7080*75f6d617Schristos 
7081*75f6d617Schristos            Because this is used only in specific cases, we don't need to
7082*75f6d617Schristos            check all the things that `on_failure_jump' does, to make
7083*75f6d617Schristos            sure the right things get saved on the stack.  Hence we don't
7084*75f6d617Schristos            share its code.  The only reason to push anything on the
7085*75f6d617Schristos            stack at all is that otherwise we would have to change
7086*75f6d617Schristos            `anychar's code to do something besides goto fail in this
7087*75f6d617Schristos            case; that seems worse than this.  */
7088*75f6d617Schristos         CASE (on_failure_keep_string_jump):
7089*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
7090*75f6d617Schristos 
7091*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7092*75f6d617Schristos #ifdef _LIBC
7093*75f6d617Schristos           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
7094*75f6d617Schristos #else
7095*75f6d617Schristos           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
7096*75f6d617Schristos #endif
7097*75f6d617Schristos 
7098*75f6d617Schristos           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
7099*75f6d617Schristos           NEXT;
7100*75f6d617Schristos 
7101*75f6d617Schristos 
7102*75f6d617Schristos 	/* Uses of on_failure_jump:
7103*75f6d617Schristos 
7104*75f6d617Schristos            Each alternative starts with an on_failure_jump that points
7105*75f6d617Schristos            to the beginning of the next alternative.  Each alternative
7106*75f6d617Schristos            except the last ends with a jump that in effect jumps past
7107*75f6d617Schristos            the rest of the alternatives.  (They really jump to the
7108*75f6d617Schristos            ending jump of the following alternative, because tensioning
7109*75f6d617Schristos            these jumps is a hassle.)
7110*75f6d617Schristos 
7111*75f6d617Schristos            Repeats start with an on_failure_jump that points past both
7112*75f6d617Schristos            the repetition text and either the following jump or
7113*75f6d617Schristos            pop_failure_jump back to this on_failure_jump.  */
7114*75f6d617Schristos 	CASE (on_failure_jump):
7115*75f6d617Schristos         on_failure:
7116*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
7117*75f6d617Schristos 
7118*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7119*75f6d617Schristos #ifdef _LIBC
7120*75f6d617Schristos           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
7121*75f6d617Schristos #else
7122*75f6d617Schristos           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
7123*75f6d617Schristos #endif
7124*75f6d617Schristos 
7125*75f6d617Schristos           /* If this on_failure_jump comes right before a group (i.e.,
7126*75f6d617Schristos              the original * applied to a group), save the information
7127*75f6d617Schristos              for that group and all inner ones, so that if we fail back
7128*75f6d617Schristos              to this point, the group's information will be correct.
7129*75f6d617Schristos              For example, in \(a*\)*\1, we need the preceding group,
7130*75f6d617Schristos              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
7131*75f6d617Schristos 
7132*75f6d617Schristos           /* We can't use `p' to check ahead because we push
7133*75f6d617Schristos              a failure point to `p + mcnt' after we do this.  */
7134*75f6d617Schristos           p1 = p;
7135*75f6d617Schristos 
7136*75f6d617Schristos           /* We need to skip no_op's before we look for the
7137*75f6d617Schristos              start_memory in case this on_failure_jump is happening as
7138*75f6d617Schristos              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
7139*75f6d617Schristos              against aba.  */
7140*75f6d617Schristos           while (p1 < pend && (re_opcode_t) *p1 == no_op)
7141*75f6d617Schristos             p1++;
7142*75f6d617Schristos 
7143*75f6d617Schristos           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
7144*75f6d617Schristos             {
7145*75f6d617Schristos               /* We have a new highest active register now.  This will
7146*75f6d617Schristos                  get reset at the start_memory we are about to get to,
7147*75f6d617Schristos                  but we will have saved all the registers relevant to
7148*75f6d617Schristos                  this repetition op, as described above.  */
7149*75f6d617Schristos               highest_active_reg = *(p1 + 1) + *(p1 + 2);
7150*75f6d617Schristos               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
7151*75f6d617Schristos                 lowest_active_reg = *(p1 + 1);
7152*75f6d617Schristos             }
7153*75f6d617Schristos 
7154*75f6d617Schristos           DEBUG_PRINT1 (":\n");
7155*75f6d617Schristos           PUSH_FAILURE_POINT (p + mcnt, d, -2);
7156*75f6d617Schristos           NEXT;
7157*75f6d617Schristos 
7158*75f6d617Schristos 
7159*75f6d617Schristos         /* A smart repeat ends with `maybe_pop_jump'.
7160*75f6d617Schristos 	   We change it to either `pop_failure_jump' or `jump'.  */
7161*75f6d617Schristos         CASE (maybe_pop_jump):
7162*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7163*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
7164*75f6d617Schristos           {
7165*75f6d617Schristos 	    register UCHAR_T *p2 = p;
7166*75f6d617Schristos 
7167*75f6d617Schristos             /* Compare the beginning of the repeat with what in the
7168*75f6d617Schristos                pattern follows its end. If we can establish that there
7169*75f6d617Schristos                is nothing that they would both match, i.e., that we
7170*75f6d617Schristos                would have to backtrack because of (as in, e.g., `a*a')
7171*75f6d617Schristos                then we can change to pop_failure_jump, because we'll
7172*75f6d617Schristos                never have to backtrack.
7173*75f6d617Schristos 
7174*75f6d617Schristos                This is not true in the case of alternatives: in
7175*75f6d617Schristos                `(a|ab)*' we do need to backtrack to the `ab' alternative
7176*75f6d617Schristos                (e.g., if the string was `ab').  But instead of trying to
7177*75f6d617Schristos                detect that here, the alternative has put on a dummy
7178*75f6d617Schristos                failure point which is what we will end up popping.  */
7179*75f6d617Schristos 
7180*75f6d617Schristos 	    /* Skip over open/close-group commands.
7181*75f6d617Schristos 	       If what follows this loop is a ...+ construct,
7182*75f6d617Schristos 	       look at what begins its body, since we will have to
7183*75f6d617Schristos 	       match at least one of that.  */
7184*75f6d617Schristos 	    while (1)
7185*75f6d617Schristos 	      {
7186*75f6d617Schristos 		if (p2 + 2 < pend
7187*75f6d617Schristos 		    && ((re_opcode_t) *p2 == stop_memory
7188*75f6d617Schristos 			|| (re_opcode_t) *p2 == start_memory))
7189*75f6d617Schristos 		  p2 += 3;
7190*75f6d617Schristos 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7191*75f6d617Schristos 			 && (re_opcode_t) *p2 == dummy_failure_jump)
7192*75f6d617Schristos 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7193*75f6d617Schristos 		else
7194*75f6d617Schristos 		  break;
7195*75f6d617Schristos 	      }
7196*75f6d617Schristos 
7197*75f6d617Schristos 	    p1 = p + mcnt;
7198*75f6d617Schristos 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7199*75f6d617Schristos 	       to the `maybe_finalize_jump' of this case.  Examine what
7200*75f6d617Schristos 	       follows.  */
7201*75f6d617Schristos 
7202*75f6d617Schristos             /* If we're at the end of the pattern, we can change.  */
7203*75f6d617Schristos             if (p2 == pend)
7204*75f6d617Schristos 	      {
7205*75f6d617Schristos 		/* Consider what happens when matching ":\(.*\)"
7206*75f6d617Schristos 		   against ":/".  I don't really understand this code
7207*75f6d617Schristos 		   yet.  */
7208*75f6d617Schristos   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7209*75f6d617Schristos 		  pop_failure_jump;
7210*75f6d617Schristos                 DEBUG_PRINT1
7211*75f6d617Schristos                   ("  End of pattern: change to `pop_failure_jump'.\n");
7212*75f6d617Schristos               }
7213*75f6d617Schristos 
7214*75f6d617Schristos             else if ((re_opcode_t) *p2 == exactn
7215*75f6d617Schristos #ifdef MBS_SUPPORT
7216*75f6d617Schristos 		     || (re_opcode_t) *p2 == exactn_bin
7217*75f6d617Schristos #endif
7218*75f6d617Schristos 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7219*75f6d617Schristos 	      {
7220*75f6d617Schristos 		register UCHAR_T c
7221*75f6d617Schristos                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7222*75f6d617Schristos 
7223*75f6d617Schristos                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7224*75f6d617Schristos #ifdef MBS_SUPPORT
7225*75f6d617Schristos 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7226*75f6d617Schristos #endif
7227*75f6d617Schristos 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7228*75f6d617Schristos                   {
7229*75f6d617Schristos   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7230*75f6d617Schristos 		      pop_failure_jump;
7231*75f6d617Schristos #ifdef WCHAR
7232*75f6d617Schristos 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
7233*75f6d617Schristos 				    (wint_t) c,
7234*75f6d617Schristos 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7235*75f6d617Schristos #else
7236*75f6d617Schristos 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
7237*75f6d617Schristos 				    (char) c,
7238*75f6d617Schristos 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
7239*75f6d617Schristos #endif
7240*75f6d617Schristos                   }
7241*75f6d617Schristos 
7242*75f6d617Schristos #ifndef WCHAR
7243*75f6d617Schristos 		else if ((re_opcode_t) p1[3] == charset
7244*75f6d617Schristos 			 || (re_opcode_t) p1[3] == charset_not)
7245*75f6d617Schristos 		  {
7246*75f6d617Schristos 		    int not = (re_opcode_t) p1[3] == charset_not;
7247*75f6d617Schristos 
7248*75f6d617Schristos 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
7249*75f6d617Schristos 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7250*75f6d617Schristos 		      not = !not;
7251*75f6d617Schristos 
7252*75f6d617Schristos                     /* `not' is equal to 1 if c would match, which means
7253*75f6d617Schristos                         that we can't change to pop_failure_jump.  */
7254*75f6d617Schristos 		    if (!not)
7255*75f6d617Schristos                       {
7256*75f6d617Schristos   		        p[-3] = (unsigned char) pop_failure_jump;
7257*75f6d617Schristos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7258*75f6d617Schristos                       }
7259*75f6d617Schristos 		  }
7260*75f6d617Schristos #endif /* not WCHAR */
7261*75f6d617Schristos 	      }
7262*75f6d617Schristos #ifndef WCHAR
7263*75f6d617Schristos             else if ((re_opcode_t) *p2 == charset)
7264*75f6d617Schristos 	      {
7265*75f6d617Schristos 		/* We win if the first character of the loop is not part
7266*75f6d617Schristos                    of the charset.  */
7267*75f6d617Schristos                 if ((re_opcode_t) p1[3] == exactn
7268*75f6d617Schristos  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7269*75f6d617Schristos  			  && (p2[2 + p1[5] / BYTEWIDTH]
7270*75f6d617Schristos  			      & (1 << (p1[5] % BYTEWIDTH)))))
7271*75f6d617Schristos 		  {
7272*75f6d617Schristos 		    p[-3] = (unsigned char) pop_failure_jump;
7273*75f6d617Schristos 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7274*75f6d617Schristos                   }
7275*75f6d617Schristos 
7276*75f6d617Schristos 		else if ((re_opcode_t) p1[3] == charset_not)
7277*75f6d617Schristos 		  {
7278*75f6d617Schristos 		    int idx;
7279*75f6d617Schristos 		    /* We win if the charset_not inside the loop
7280*75f6d617Schristos 		       lists every character listed in the charset after.  */
7281*75f6d617Schristos 		    for (idx = 0; idx < (int) p2[1]; idx++)
7282*75f6d617Schristos 		      if (! (p2[2 + idx] == 0
7283*75f6d617Schristos 			     || (idx < (int) p1[4]
7284*75f6d617Schristos 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7285*75f6d617Schristos 			break;
7286*75f6d617Schristos 
7287*75f6d617Schristos 		    if (idx == p2[1])
7288*75f6d617Schristos                       {
7289*75f6d617Schristos   		        p[-3] = (unsigned char) pop_failure_jump;
7290*75f6d617Schristos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7291*75f6d617Schristos                       }
7292*75f6d617Schristos 		  }
7293*75f6d617Schristos 		else if ((re_opcode_t) p1[3] == charset)
7294*75f6d617Schristos 		  {
7295*75f6d617Schristos 		    int idx;
7296*75f6d617Schristos 		    /* We win if the charset inside the loop
7297*75f6d617Schristos 		       has no overlap with the one after the loop.  */
7298*75f6d617Schristos 		    for (idx = 0;
7299*75f6d617Schristos 			 idx < (int) p2[1] && idx < (int) p1[4];
7300*75f6d617Schristos 			 idx++)
7301*75f6d617Schristos 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
7302*75f6d617Schristos 			break;
7303*75f6d617Schristos 
7304*75f6d617Schristos 		    if (idx == p2[1] || idx == p1[4])
7305*75f6d617Schristos                       {
7306*75f6d617Schristos   		        p[-3] = (unsigned char) pop_failure_jump;
7307*75f6d617Schristos                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7308*75f6d617Schristos                       }
7309*75f6d617Schristos 		  }
7310*75f6d617Schristos 	      }
7311*75f6d617Schristos #endif /* not WCHAR */
7312*75f6d617Schristos 	  }
7313*75f6d617Schristos 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
7314*75f6d617Schristos 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
7315*75f6d617Schristos 	    {
7316*75f6d617Schristos 	      p[-1] = (UCHAR_T) jump;
7317*75f6d617Schristos               DEBUG_PRINT1 ("  Match => jump.\n");
7318*75f6d617Schristos 	      goto unconditional_jump;
7319*75f6d617Schristos 	    }
7320*75f6d617Schristos         /* Note fall through.  */
7321*75f6d617Schristos 
7322*75f6d617Schristos 
7323*75f6d617Schristos 	/* The end of a simple repeat has a pop_failure_jump back to
7324*75f6d617Schristos            its matching on_failure_jump, where the latter will push a
7325*75f6d617Schristos            failure point.  The pop_failure_jump takes off failure
7326*75f6d617Schristos            points put on by this pop_failure_jump's matching
7327*75f6d617Schristos            on_failure_jump; we got through the pattern to here from the
7328*75f6d617Schristos            matching on_failure_jump, so didn't fail.  */
7329*75f6d617Schristos         CASE (pop_failure_jump):
7330*75f6d617Schristos           {
7331*75f6d617Schristos             /* We need to pass separate storage for the lowest and
7332*75f6d617Schristos                highest registers, even though we don't care about the
7333*75f6d617Schristos                actual values.  Otherwise, we will restore only one
7334*75f6d617Schristos                register from the stack, since lowest will == highest in
7335*75f6d617Schristos                `pop_failure_point'.  */
7336*75f6d617Schristos             active_reg_t dummy_low_reg, dummy_high_reg;
7337*75f6d617Schristos             UCHAR_T *pdummy = NULL;
7338*75f6d617Schristos             const CHAR_T *sdummy = NULL;
7339*75f6d617Schristos 
7340*75f6d617Schristos             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7341*75f6d617Schristos             POP_FAILURE_POINT (sdummy, pdummy,
7342*75f6d617Schristos                                dummy_low_reg, dummy_high_reg,
7343*75f6d617Schristos                                reg_dummy, reg_dummy, reg_info_dummy);
7344*75f6d617Schristos           }
7345*75f6d617Schristos 	  /* Note fall through.  */
7346*75f6d617Schristos 
7347*75f6d617Schristos 	unconditional_jump:
7348*75f6d617Schristos #ifdef _LIBC
7349*75f6d617Schristos 	  DEBUG_PRINT2 ("\n%p: ", p);
7350*75f6d617Schristos #else
7351*75f6d617Schristos 	  DEBUG_PRINT2 ("\n0x%x: ", p);
7352*75f6d617Schristos #endif
7353*75f6d617Schristos           /* Note fall through.  */
7354*75f6d617Schristos 
7355*75f6d617Schristos         /* Unconditionally jump (without popping any failure points).  */
7356*75f6d617Schristos         CASE (jump):
7357*75f6d617Schristos 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
7358*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7359*75f6d617Schristos 	  p += mcnt;				/* Do the jump.  */
7360*75f6d617Schristos #ifdef _LIBC
7361*75f6d617Schristos           DEBUG_PRINT2 ("(to %p).\n", p);
7362*75f6d617Schristos #else
7363*75f6d617Schristos           DEBUG_PRINT2 ("(to 0x%x).\n", p);
7364*75f6d617Schristos #endif
7365*75f6d617Schristos 	  NEXT;
7366*75f6d617Schristos 
7367*75f6d617Schristos 
7368*75f6d617Schristos         /* We need this opcode so we can detect where alternatives end
7369*75f6d617Schristos            in `group_match_null_string_p' et al.  */
7370*75f6d617Schristos         CASE (jump_past_alt):
7371*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7372*75f6d617Schristos           goto unconditional_jump;
7373*75f6d617Schristos 
7374*75f6d617Schristos 
7375*75f6d617Schristos         /* Normally, the on_failure_jump pushes a failure point, which
7376*75f6d617Schristos            then gets popped at pop_failure_jump.  We will end up at
7377*75f6d617Schristos            pop_failure_jump, also, and with a pattern of, say, `a+', we
7378*75f6d617Schristos            are skipping over the on_failure_jump, so we have to push
7379*75f6d617Schristos            something meaningless for pop_failure_jump to pop.  */
7380*75f6d617Schristos         CASE (dummy_failure_jump):
7381*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7382*75f6d617Schristos           /* It doesn't matter what we push for the string here.  What
7383*75f6d617Schristos              the code at `fail' tests is the value for the pattern.  */
7384*75f6d617Schristos           PUSH_FAILURE_POINT (NULL, NULL, -2);
7385*75f6d617Schristos           goto unconditional_jump;
7386*75f6d617Schristos 
7387*75f6d617Schristos 
7388*75f6d617Schristos         /* At the end of an alternative, we need to push a dummy failure
7389*75f6d617Schristos            point in case we are followed by a `pop_failure_jump', because
7390*75f6d617Schristos            we don't want the failure point for the alternative to be
7391*75f6d617Schristos            popped.  For example, matching `(a|ab)*' against `aab'
7392*75f6d617Schristos            requires that we match the `ab' alternative.  */
7393*75f6d617Schristos         CASE (push_dummy_failure):
7394*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7395*75f6d617Schristos           /* See comments just above at `dummy_failure_jump' about the
7396*75f6d617Schristos              two zeroes.  */
7397*75f6d617Schristos           PUSH_FAILURE_POINT (NULL, NULL, -2);
7398*75f6d617Schristos           NEXT;
7399*75f6d617Schristos 
7400*75f6d617Schristos         /* Have to succeed matching what follows at least n times.
7401*75f6d617Schristos            After that, handle like `on_failure_jump'.  */
7402*75f6d617Schristos         CASE (succeed_n):
7403*75f6d617Schristos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7404*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7405*75f6d617Schristos 
7406*75f6d617Schristos           assert (mcnt >= 0);
7407*75f6d617Schristos           /* Originally, this is how many times we HAVE to succeed.  */
7408*75f6d617Schristos           if (mcnt > 0)
7409*75f6d617Schristos             {
7410*75f6d617Schristos                mcnt--;
7411*75f6d617Schristos 	       p += OFFSET_ADDRESS_SIZE;
7412*75f6d617Schristos                STORE_NUMBER_AND_INCR (p, mcnt);
7413*75f6d617Schristos #ifdef _LIBC
7414*75f6d617Schristos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7415*75f6d617Schristos 			     , mcnt);
7416*75f6d617Schristos #else
7417*75f6d617Schristos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7418*75f6d617Schristos 			     , mcnt);
7419*75f6d617Schristos #endif
7420*75f6d617Schristos             }
7421*75f6d617Schristos 	  else if (mcnt == 0)
7422*75f6d617Schristos             {
7423*75f6d617Schristos #ifdef _LIBC
7424*75f6d617Schristos               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
7425*75f6d617Schristos 			    p + OFFSET_ADDRESS_SIZE);
7426*75f6d617Schristos #else
7427*75f6d617Schristos               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
7428*75f6d617Schristos 			    p + OFFSET_ADDRESS_SIZE);
7429*75f6d617Schristos #endif /* _LIBC */
7430*75f6d617Schristos 
7431*75f6d617Schristos #ifdef WCHAR
7432*75f6d617Schristos 	      p[1] = (UCHAR_T) no_op;
7433*75f6d617Schristos #else
7434*75f6d617Schristos 	      p[2] = (UCHAR_T) no_op;
7435*75f6d617Schristos               p[3] = (UCHAR_T) no_op;
7436*75f6d617Schristos #endif /* WCHAR */
7437*75f6d617Schristos               goto on_failure;
7438*75f6d617Schristos             }
7439*75f6d617Schristos           NEXT;
7440*75f6d617Schristos 
7441*75f6d617Schristos         CASE (jump_n):
7442*75f6d617Schristos           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7443*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7444*75f6d617Schristos 
7445*75f6d617Schristos           /* Originally, this is how many times we CAN jump.  */
7446*75f6d617Schristos           if (mcnt)
7447*75f6d617Schristos             {
7448*75f6d617Schristos                mcnt--;
7449*75f6d617Schristos                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7450*75f6d617Schristos 
7451*75f6d617Schristos #ifdef _LIBC
7452*75f6d617Schristos                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7453*75f6d617Schristos 			     mcnt);
7454*75f6d617Schristos #else
7455*75f6d617Schristos                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7456*75f6d617Schristos 			     mcnt);
7457*75f6d617Schristos #endif /* _LIBC */
7458*75f6d617Schristos 	       goto unconditional_jump;
7459*75f6d617Schristos             }
7460*75f6d617Schristos           /* If don't have to jump any more, skip over the rest of command.  */
7461*75f6d617Schristos 	  else
7462*75f6d617Schristos 	    p += 2 * OFFSET_ADDRESS_SIZE;
7463*75f6d617Schristos           NEXT;
7464*75f6d617Schristos 
7465*75f6d617Schristos 	CASE (set_number_at):
7466*75f6d617Schristos 	  {
7467*75f6d617Schristos             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7468*75f6d617Schristos 
7469*75f6d617Schristos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7470*75f6d617Schristos             p1 = p + mcnt;
7471*75f6d617Schristos             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7472*75f6d617Schristos #ifdef _LIBC
7473*75f6d617Schristos             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7474*75f6d617Schristos #else
7475*75f6d617Schristos             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7476*75f6d617Schristos #endif
7477*75f6d617Schristos 	    STORE_NUMBER (p1, mcnt);
7478*75f6d617Schristos             NEXT;
7479*75f6d617Schristos           }
7480*75f6d617Schristos 
7481*75f6d617Schristos #if 0
7482*75f6d617Schristos 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7483*75f6d617Schristos 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7484*75f6d617Schristos 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7485*75f6d617Schristos 	   macro and introducing temporary variables works around the bug.  */
7486*75f6d617Schristos 
7487*75f6d617Schristos 	CASE (wordbound):
7488*75f6d617Schristos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7489*75f6d617Schristos 	  if (AT_WORD_BOUNDARY (d))
7490*75f6d617Schristos 	    {
7491*75f6d617Schristos 	      NEXT;
7492*75f6d617Schristos 	    }
7493*75f6d617Schristos 	  goto fail;
7494*75f6d617Schristos 
7495*75f6d617Schristos 	CASE (notwordbound):
7496*75f6d617Schristos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7497*75f6d617Schristos 	  if (AT_WORD_BOUNDARY (d))
7498*75f6d617Schristos 	    goto fail;
7499*75f6d617Schristos 	  NEXT;
7500*75f6d617Schristos #else
7501*75f6d617Schristos 	CASE (wordbound):
7502*75f6d617Schristos 	{
7503*75f6d617Schristos 	  boolean prevchar, thischar;
7504*75f6d617Schristos 
7505*75f6d617Schristos 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7506*75f6d617Schristos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7507*75f6d617Schristos 	    {
7508*75f6d617Schristos 	      NEXT;
7509*75f6d617Schristos 	    }
7510*75f6d617Schristos 
7511*75f6d617Schristos 	  prevchar = WORDCHAR_P (d - 1);
7512*75f6d617Schristos 	  thischar = WORDCHAR_P (d);
7513*75f6d617Schristos 	  if (prevchar != thischar)
7514*75f6d617Schristos 	    {
7515*75f6d617Schristos 	      NEXT;
7516*75f6d617Schristos 	    }
7517*75f6d617Schristos 	  goto fail;
7518*75f6d617Schristos 	}
7519*75f6d617Schristos 
7520*75f6d617Schristos       CASE (notwordbound):
7521*75f6d617Schristos 	{
7522*75f6d617Schristos 	  boolean prevchar, thischar;
7523*75f6d617Schristos 
7524*75f6d617Schristos 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7525*75f6d617Schristos 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7526*75f6d617Schristos 	    goto fail;
7527*75f6d617Schristos 
7528*75f6d617Schristos 	  prevchar = WORDCHAR_P (d - 1);
7529*75f6d617Schristos 	  thischar = WORDCHAR_P (d);
7530*75f6d617Schristos 	  if (prevchar != thischar)
7531*75f6d617Schristos 	    goto fail;
7532*75f6d617Schristos 	  NEXT;
7533*75f6d617Schristos 	}
7534*75f6d617Schristos #endif
7535*75f6d617Schristos 
7536*75f6d617Schristos 	CASE (wordbeg):
7537*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7538*75f6d617Schristos 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7539*75f6d617Schristos 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7540*75f6d617Schristos 	    {
7541*75f6d617Schristos 	      NEXT;
7542*75f6d617Schristos 	    }
7543*75f6d617Schristos           goto fail;
7544*75f6d617Schristos 
7545*75f6d617Schristos 	CASE (wordend):
7546*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING wordend.\n");
7547*75f6d617Schristos 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7548*75f6d617Schristos               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7549*75f6d617Schristos 	    {
7550*75f6d617Schristos 	      NEXT;
7551*75f6d617Schristos 	    }
7552*75f6d617Schristos           goto fail;
7553*75f6d617Schristos 
7554*75f6d617Schristos #ifdef emacs
7555*75f6d617Schristos   	CASE (before_dot):
7556*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7557*75f6d617Schristos  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7558*75f6d617Schristos   	    goto fail;
7559*75f6d617Schristos   	  NEXT;
7560*75f6d617Schristos 
7561*75f6d617Schristos   	CASE (at_dot):
7562*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7563*75f6d617Schristos  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7564*75f6d617Schristos   	    goto fail;
7565*75f6d617Schristos   	  NEXT;
7566*75f6d617Schristos 
7567*75f6d617Schristos   	CASE (after_dot):
7568*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7569*75f6d617Schristos           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7570*75f6d617Schristos   	    goto fail;
7571*75f6d617Schristos   	  NEXT;
7572*75f6d617Schristos 
7573*75f6d617Schristos 	CASE (syntaxspec):
7574*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7575*75f6d617Schristos 	  mcnt = *p++;
7576*75f6d617Schristos 	  goto matchsyntax;
7577*75f6d617Schristos 
7578*75f6d617Schristos         CASE (wordchar):
7579*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7580*75f6d617Schristos 	  mcnt = (int) Sword;
7581*75f6d617Schristos         matchsyntax:
7582*75f6d617Schristos 	  PREFETCH ();
7583*75f6d617Schristos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7584*75f6d617Schristos 	  d++;
7585*75f6d617Schristos 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7586*75f6d617Schristos 	    goto fail;
7587*75f6d617Schristos           SET_REGS_MATCHED ();
7588*75f6d617Schristos 	  NEXT;
7589*75f6d617Schristos 
7590*75f6d617Schristos 	CASE (notsyntaxspec):
7591*75f6d617Schristos           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7592*75f6d617Schristos 	  mcnt = *p++;
7593*75f6d617Schristos 	  goto matchnotsyntax;
7594*75f6d617Schristos 
7595*75f6d617Schristos         CASE (notwordchar):
7596*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7597*75f6d617Schristos 	  mcnt = (int) Sword;
7598*75f6d617Schristos         matchnotsyntax:
7599*75f6d617Schristos 	  PREFETCH ();
7600*75f6d617Schristos 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7601*75f6d617Schristos 	  d++;
7602*75f6d617Schristos 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7603*75f6d617Schristos 	    goto fail;
7604*75f6d617Schristos 	  SET_REGS_MATCHED ();
7605*75f6d617Schristos           NEXT;
7606*75f6d617Schristos 
7607*75f6d617Schristos #else /* not emacs */
7608*75f6d617Schristos 	CASE (wordchar):
7609*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7610*75f6d617Schristos 	  PREFETCH ();
7611*75f6d617Schristos           if (!WORDCHAR_P (d))
7612*75f6d617Schristos             goto fail;
7613*75f6d617Schristos 	  SET_REGS_MATCHED ();
7614*75f6d617Schristos           d++;
7615*75f6d617Schristos 	  NEXT;
7616*75f6d617Schristos 
7617*75f6d617Schristos 	CASE (notwordchar):
7618*75f6d617Schristos           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7619*75f6d617Schristos 	  PREFETCH ();
7620*75f6d617Schristos 	  if (WORDCHAR_P (d))
7621*75f6d617Schristos             goto fail;
7622*75f6d617Schristos           SET_REGS_MATCHED ();
7623*75f6d617Schristos           d++;
7624*75f6d617Schristos 	  NEXT;
7625*75f6d617Schristos #endif /* not emacs */
7626*75f6d617Schristos 
7627*75f6d617Schristos #ifndef __GNUC__
7628*75f6d617Schristos         default:
7629*75f6d617Schristos           abort ();
7630*75f6d617Schristos 	}
7631*75f6d617Schristos       continue;  /* Successfully executed one pattern command; keep going.  */
7632*75f6d617Schristos #endif
7633*75f6d617Schristos 
7634*75f6d617Schristos 
7635*75f6d617Schristos     /* We goto here if a matching operation fails. */
7636*75f6d617Schristos     fail:
7637*75f6d617Schristos       if (!FAIL_STACK_EMPTY ())
7638*75f6d617Schristos 	{ /* A restart point is known.  Restore to that state.  */
7639*75f6d617Schristos           DEBUG_PRINT1 ("\nFAIL:\n");
7640*75f6d617Schristos           POP_FAILURE_POINT (d, p,
7641*75f6d617Schristos                              lowest_active_reg, highest_active_reg,
7642*75f6d617Schristos                              regstart, regend, reg_info);
7643*75f6d617Schristos 
7644*75f6d617Schristos           /* If this failure point is a dummy, try the next one.  */
7645*75f6d617Schristos           if (!p)
7646*75f6d617Schristos 	    goto fail;
7647*75f6d617Schristos 
7648*75f6d617Schristos           /* If we failed to the end of the pattern, don't examine *p.  */
7649*75f6d617Schristos 	  assert (p <= pend);
7650*75f6d617Schristos           if (p < pend)
7651*75f6d617Schristos             {
7652*75f6d617Schristos               boolean is_a_jump_n = false;
7653*75f6d617Schristos 
7654*75f6d617Schristos               /* If failed to a backwards jump that's part of a repetition
7655*75f6d617Schristos                  loop, need to pop this failure point and use the next one.  */
7656*75f6d617Schristos               switch ((re_opcode_t) *p)
7657*75f6d617Schristos                 {
7658*75f6d617Schristos                 case jump_n:
7659*75f6d617Schristos                   is_a_jump_n = true;
7660*75f6d617Schristos                 case maybe_pop_jump:
7661*75f6d617Schristos                 case pop_failure_jump:
7662*75f6d617Schristos                 case jump:
7663*75f6d617Schristos                   p1 = p + 1;
7664*75f6d617Schristos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7665*75f6d617Schristos                   p1 += mcnt;
7666*75f6d617Schristos 
7667*75f6d617Schristos                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7668*75f6d617Schristos                       || (!is_a_jump_n
7669*75f6d617Schristos                           && (re_opcode_t) *p1 == on_failure_jump))
7670*75f6d617Schristos                     goto fail;
7671*75f6d617Schristos                   break;
7672*75f6d617Schristos                 default:
7673*75f6d617Schristos                   /* do nothing */ ;
7674*75f6d617Schristos                 }
7675*75f6d617Schristos             }
7676*75f6d617Schristos 
7677*75f6d617Schristos           if (d >= string1 && d <= end1)
7678*75f6d617Schristos 	    dend = end_match_1;
7679*75f6d617Schristos         }
7680*75f6d617Schristos       else
7681*75f6d617Schristos         break;   /* Matching at this starting point really fails.  */
7682*75f6d617Schristos     } /* for (;;) */
7683*75f6d617Schristos 
7684*75f6d617Schristos   if (best_regs_set)
7685*75f6d617Schristos     goto restore_best_regs;
7686*75f6d617Schristos 
7687*75f6d617Schristos   FREE_VARIABLES ();
7688*75f6d617Schristos 
7689*75f6d617Schristos   return -1;         			/* Failure to match.  */
7690*75f6d617Schristos } /* re_match_2 */
7691*75f6d617Schristos 
7692*75f6d617Schristos /* Subroutine definitions for re_match_2.  */
7693*75f6d617Schristos 
7694*75f6d617Schristos 
7695*75f6d617Schristos /* We are passed P pointing to a register number after a start_memory.
7696*75f6d617Schristos 
7697*75f6d617Schristos    Return true if the pattern up to the corresponding stop_memory can
7698*75f6d617Schristos    match the empty string, and false otherwise.
7699*75f6d617Schristos 
7700*75f6d617Schristos    If we find the matching stop_memory, sets P to point to one past its number.
7701*75f6d617Schristos    Otherwise, sets P to an undefined byte less than or equal to END.
7702*75f6d617Schristos 
7703*75f6d617Schristos    We don't handle duplicates properly (yet).  */
7704*75f6d617Schristos 
7705*75f6d617Schristos static boolean
7706*75f6d617Schristos PREFIX(group_match_null_string_p) (p, end, reg_info)
7707*75f6d617Schristos     UCHAR_T **p, *end;
PREFIX(register_info_type)7708*75f6d617Schristos     PREFIX(register_info_type) *reg_info;
7709*75f6d617Schristos {
7710*75f6d617Schristos   int mcnt;
7711*75f6d617Schristos   /* Point to after the args to the start_memory.  */
7712*75f6d617Schristos   UCHAR_T *p1 = *p + 2;
7713*75f6d617Schristos 
7714*75f6d617Schristos   while (p1 < end)
7715*75f6d617Schristos     {
7716*75f6d617Schristos       /* Skip over opcodes that can match nothing, and return true or
7717*75f6d617Schristos 	 false, as appropriate, when we get to one that can't, or to the
7718*75f6d617Schristos          matching stop_memory.  */
7719*75f6d617Schristos 
7720*75f6d617Schristos       switch ((re_opcode_t) *p1)
7721*75f6d617Schristos         {
7722*75f6d617Schristos         /* Could be either a loop or a series of alternatives.  */
7723*75f6d617Schristos         case on_failure_jump:
7724*75f6d617Schristos           p1++;
7725*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7726*75f6d617Schristos 
7727*75f6d617Schristos           /* If the next operation is not a jump backwards in the
7728*75f6d617Schristos 	     pattern.  */
7729*75f6d617Schristos 
7730*75f6d617Schristos 	  if (mcnt >= 0)
7731*75f6d617Schristos 	    {
7732*75f6d617Schristos               /* Go through the on_failure_jumps of the alternatives,
7733*75f6d617Schristos                  seeing if any of the alternatives cannot match nothing.
7734*75f6d617Schristos                  The last alternative starts with only a jump,
7735*75f6d617Schristos                  whereas the rest start with on_failure_jump and end
7736*75f6d617Schristos                  with a jump, e.g., here is the pattern for `a|b|c':
7737*75f6d617Schristos 
7738*75f6d617Schristos                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7739*75f6d617Schristos                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7740*75f6d617Schristos                  /exactn/1/c
7741*75f6d617Schristos 
7742*75f6d617Schristos                  So, we have to first go through the first (n-1)
7743*75f6d617Schristos                  alternatives and then deal with the last one separately.  */
7744*75f6d617Schristos 
7745*75f6d617Schristos 
7746*75f6d617Schristos               /* Deal with the first (n-1) alternatives, which start
7747*75f6d617Schristos                  with an on_failure_jump (see above) that jumps to right
7748*75f6d617Schristos                  past a jump_past_alt.  */
7749*75f6d617Schristos 
7750*75f6d617Schristos               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7751*75f6d617Schristos 		     jump_past_alt)
7752*75f6d617Schristos                 {
7753*75f6d617Schristos                   /* `mcnt' holds how many bytes long the alternative
7754*75f6d617Schristos                      is, including the ending `jump_past_alt' and
7755*75f6d617Schristos                      its number.  */
7756*75f6d617Schristos 
7757*75f6d617Schristos                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7758*75f6d617Schristos 						(1 + OFFSET_ADDRESS_SIZE),
7759*75f6d617Schristos 						reg_info))
7760*75f6d617Schristos                     return false;
7761*75f6d617Schristos 
7762*75f6d617Schristos                   /* Move to right after this alternative, including the
7763*75f6d617Schristos 		     jump_past_alt.  */
7764*75f6d617Schristos                   p1 += mcnt;
7765*75f6d617Schristos 
7766*75f6d617Schristos                   /* Break if it's the beginning of an n-th alternative
7767*75f6d617Schristos                      that doesn't begin with an on_failure_jump.  */
7768*75f6d617Schristos                   if ((re_opcode_t) *p1 != on_failure_jump)
7769*75f6d617Schristos                     break;
7770*75f6d617Schristos 
7771*75f6d617Schristos 		  /* Still have to check that it's not an n-th
7772*75f6d617Schristos 		     alternative that starts with an on_failure_jump.  */
7773*75f6d617Schristos 		  p1++;
7774*75f6d617Schristos                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7775*75f6d617Schristos                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7776*75f6d617Schristos 		      jump_past_alt)
7777*75f6d617Schristos                     {
7778*75f6d617Schristos 		      /* Get to the beginning of the n-th alternative.  */
7779*75f6d617Schristos                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
7780*75f6d617Schristos                       break;
7781*75f6d617Schristos                     }
7782*75f6d617Schristos                 }
7783*75f6d617Schristos 
7784*75f6d617Schristos               /* Deal with the last alternative: go back and get number
7785*75f6d617Schristos                  of the `jump_past_alt' just before it.  `mcnt' contains
7786*75f6d617Schristos                  the length of the alternative.  */
7787*75f6d617Schristos               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7788*75f6d617Schristos 
7789*75f6d617Schristos               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7790*75f6d617Schristos                 return false;
7791*75f6d617Schristos 
7792*75f6d617Schristos               p1 += mcnt;	/* Get past the n-th alternative.  */
7793*75f6d617Schristos             } /* if mcnt > 0 */
7794*75f6d617Schristos           break;
7795*75f6d617Schristos 
7796*75f6d617Schristos 
7797*75f6d617Schristos         case stop_memory:
7798*75f6d617Schristos 	  assert (p1[1] == **p);
7799*75f6d617Schristos           *p = p1 + 2;
7800*75f6d617Schristos           return true;
7801*75f6d617Schristos 
7802*75f6d617Schristos 
7803*75f6d617Schristos         default:
7804*75f6d617Schristos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7805*75f6d617Schristos             return false;
7806*75f6d617Schristos         }
7807*75f6d617Schristos     } /* while p1 < end */
7808*75f6d617Schristos 
7809*75f6d617Schristos   return false;
7810*75f6d617Schristos } /* group_match_null_string_p */
7811*75f6d617Schristos 
7812*75f6d617Schristos 
7813*75f6d617Schristos /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7814*75f6d617Schristos    It expects P to be the first byte of a single alternative and END one
7815*75f6d617Schristos    byte past the last. The alternative can contain groups.  */
7816*75f6d617Schristos 
7817*75f6d617Schristos static boolean
7818*75f6d617Schristos PREFIX(alt_match_null_string_p) (p, end, reg_info)
7819*75f6d617Schristos     UCHAR_T *p, *end;
PREFIX(register_info_type)7820*75f6d617Schristos     PREFIX(register_info_type) *reg_info;
7821*75f6d617Schristos {
7822*75f6d617Schristos   int mcnt;
7823*75f6d617Schristos   UCHAR_T *p1 = p;
7824*75f6d617Schristos 
7825*75f6d617Schristos   while (p1 < end)
7826*75f6d617Schristos     {
7827*75f6d617Schristos       /* Skip over opcodes that can match nothing, and break when we get
7828*75f6d617Schristos          to one that can't.  */
7829*75f6d617Schristos 
7830*75f6d617Schristos       switch ((re_opcode_t) *p1)
7831*75f6d617Schristos         {
7832*75f6d617Schristos 	/* It's a loop.  */
7833*75f6d617Schristos         case on_failure_jump:
7834*75f6d617Schristos           p1++;
7835*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7836*75f6d617Schristos           p1 += mcnt;
7837*75f6d617Schristos           break;
7838*75f6d617Schristos 
7839*75f6d617Schristos 	default:
7840*75f6d617Schristos           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7841*75f6d617Schristos             return false;
7842*75f6d617Schristos         }
7843*75f6d617Schristos     }  /* while p1 < end */
7844*75f6d617Schristos 
7845*75f6d617Schristos   return true;
7846*75f6d617Schristos } /* alt_match_null_string_p */
7847*75f6d617Schristos 
7848*75f6d617Schristos 
7849*75f6d617Schristos /* Deals with the ops common to group_match_null_string_p and
7850*75f6d617Schristos    alt_match_null_string_p.
7851*75f6d617Schristos 
7852*75f6d617Schristos    Sets P to one after the op and its arguments, if any.  */
7853*75f6d617Schristos 
7854*75f6d617Schristos static boolean
7855*75f6d617Schristos PREFIX(common_op_match_null_string_p) (p, end, reg_info)
7856*75f6d617Schristos     UCHAR_T **p, *end;
PREFIX(register_info_type)7857*75f6d617Schristos     PREFIX(register_info_type) *reg_info;
7858*75f6d617Schristos {
7859*75f6d617Schristos   int mcnt;
7860*75f6d617Schristos   boolean ret;
7861*75f6d617Schristos   int reg_no;
7862*75f6d617Schristos   UCHAR_T *p1 = *p;
7863*75f6d617Schristos 
7864*75f6d617Schristos   switch ((re_opcode_t) *p1++)
7865*75f6d617Schristos     {
7866*75f6d617Schristos     case no_op:
7867*75f6d617Schristos     case begline:
7868*75f6d617Schristos     case endline:
7869*75f6d617Schristos     case begbuf:
7870*75f6d617Schristos     case endbuf:
7871*75f6d617Schristos     case wordbeg:
7872*75f6d617Schristos     case wordend:
7873*75f6d617Schristos     case wordbound:
7874*75f6d617Schristos     case notwordbound:
7875*75f6d617Schristos #ifdef emacs
7876*75f6d617Schristos     case before_dot:
7877*75f6d617Schristos     case at_dot:
7878*75f6d617Schristos     case after_dot:
7879*75f6d617Schristos #endif
7880*75f6d617Schristos       break;
7881*75f6d617Schristos 
7882*75f6d617Schristos     case start_memory:
7883*75f6d617Schristos       reg_no = *p1;
7884*75f6d617Schristos       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7885*75f6d617Schristos       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7886*75f6d617Schristos 
7887*75f6d617Schristos       /* Have to set this here in case we're checking a group which
7888*75f6d617Schristos          contains a group and a back reference to it.  */
7889*75f6d617Schristos 
7890*75f6d617Schristos       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7891*75f6d617Schristos         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7892*75f6d617Schristos 
7893*75f6d617Schristos       if (!ret)
7894*75f6d617Schristos         return false;
7895*75f6d617Schristos       break;
7896*75f6d617Schristos 
7897*75f6d617Schristos     /* If this is an optimized succeed_n for zero times, make the jump.  */
7898*75f6d617Schristos     case jump:
7899*75f6d617Schristos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7900*75f6d617Schristos       if (mcnt >= 0)
7901*75f6d617Schristos         p1 += mcnt;
7902*75f6d617Schristos       else
7903*75f6d617Schristos         return false;
7904*75f6d617Schristos       break;
7905*75f6d617Schristos 
7906*75f6d617Schristos     case succeed_n:
7907*75f6d617Schristos       /* Get to the number of times to succeed.  */
7908*75f6d617Schristos       p1 += OFFSET_ADDRESS_SIZE;
7909*75f6d617Schristos       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7910*75f6d617Schristos 
7911*75f6d617Schristos       if (mcnt == 0)
7912*75f6d617Schristos         {
7913*75f6d617Schristos           p1 -= 2 * OFFSET_ADDRESS_SIZE;
7914*75f6d617Schristos           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7915*75f6d617Schristos           p1 += mcnt;
7916*75f6d617Schristos         }
7917*75f6d617Schristos       else
7918*75f6d617Schristos         return false;
7919*75f6d617Schristos       break;
7920*75f6d617Schristos 
7921*75f6d617Schristos     case duplicate:
7922*75f6d617Schristos       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7923*75f6d617Schristos         return false;
7924*75f6d617Schristos       break;
7925*75f6d617Schristos 
7926*75f6d617Schristos     case set_number_at:
7927*75f6d617Schristos       p1 += 2 * OFFSET_ADDRESS_SIZE;
7928*75f6d617Schristos 
7929*75f6d617Schristos     default:
7930*75f6d617Schristos       /* All other opcodes mean we cannot match the empty string.  */
7931*75f6d617Schristos       return false;
7932*75f6d617Schristos   }
7933*75f6d617Schristos 
7934*75f6d617Schristos   *p = p1;
7935*75f6d617Schristos   return true;
7936*75f6d617Schristos } /* common_op_match_null_string_p */
7937*75f6d617Schristos 
7938*75f6d617Schristos 
7939*75f6d617Schristos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7940*75f6d617Schristos    bytes; nonzero otherwise.  */
7941*75f6d617Schristos 
7942*75f6d617Schristos static int
7943*75f6d617Schristos PREFIX(bcmp_translate) (s1, s2, len, translate)
7944*75f6d617Schristos      const CHAR_T *s1, *s2;
7945*75f6d617Schristos      register int len;
7946*75f6d617Schristos      RE_TRANSLATE_TYPE translate;
7947*75f6d617Schristos {
7948*75f6d617Schristos   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7949*75f6d617Schristos   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7950*75f6d617Schristos   while (len)
7951*75f6d617Schristos     {
7952*75f6d617Schristos #ifdef WCHAR
7953*75f6d617Schristos       if (((*p1<=0xff)?translate[*p1++]:*p1++)
7954*75f6d617Schristos 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7955*75f6d617Schristos 	return 1;
7956*75f6d617Schristos #else /* BYTE */
7957*75f6d617Schristos       if (translate[*p1++] != translate[*p2++]) return 1;
7958*75f6d617Schristos #endif /* WCHAR */
7959*75f6d617Schristos       len--;
7960*75f6d617Schristos     }
7961*75f6d617Schristos   return 0;
7962*75f6d617Schristos }
7963*75f6d617Schristos 
7964*75f6d617Schristos 
7965*75f6d617Schristos #else /* not INSIDE_RECURSION */
7966*75f6d617Schristos 
7967*75f6d617Schristos /* Entry points for GNU code.  */
7968*75f6d617Schristos 
7969*75f6d617Schristos /* re_compile_pattern is the GNU regular expression compiler: it
7970*75f6d617Schristos    compiles PATTERN (of length SIZE) and puts the result in BUFP.
7971*75f6d617Schristos    Returns 0 if the pattern was valid, otherwise an error string.
7972*75f6d617Schristos 
7973*75f6d617Schristos    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7974*75f6d617Schristos    are set in BUFP on entry.
7975*75f6d617Schristos 
7976*75f6d617Schristos    We call regex_compile to do the actual compilation.  */
7977*75f6d617Schristos 
7978*75f6d617Schristos const char *
7979*75f6d617Schristos re_compile_pattern (pattern, length, bufp)
7980*75f6d617Schristos      const char *pattern;
7981*75f6d617Schristos      size_t length;
7982*75f6d617Schristos      struct re_pattern_buffer *bufp;
7983*75f6d617Schristos {
7984*75f6d617Schristos   reg_errcode_t ret;
7985*75f6d617Schristos 
7986*75f6d617Schristos   /* GNU code is written to assume at least RE_NREGS registers will be set
7987*75f6d617Schristos      (and at least one extra will be -1).  */
7988*75f6d617Schristos   bufp->regs_allocated = REGS_UNALLOCATED;
7989*75f6d617Schristos 
7990*75f6d617Schristos   /* And GNU code determines whether or not to get register information
7991*75f6d617Schristos      by passing null for the REGS argument to re_match, etc., not by
7992*75f6d617Schristos      setting no_sub.  */
7993*75f6d617Schristos   bufp->no_sub = 0;
7994*75f6d617Schristos 
7995*75f6d617Schristos   /* Match anchors at newline.  */
7996*75f6d617Schristos   bufp->newline_anchor = 1;
7997*75f6d617Schristos 
7998*75f6d617Schristos # ifdef MBS_SUPPORT
7999*75f6d617Schristos   if (MB_CUR_MAX != 1)
8000*75f6d617Schristos     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
8001*75f6d617Schristos   else
8002*75f6d617Schristos # endif
8003*75f6d617Schristos     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
8004*75f6d617Schristos 
8005*75f6d617Schristos   if (!ret)
8006*75f6d617Schristos     return NULL;
8007*75f6d617Schristos   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8008*75f6d617Schristos }
8009*75f6d617Schristos #ifdef _LIBC
8010*75f6d617Schristos weak_alias (__re_compile_pattern, re_compile_pattern)
8011*75f6d617Schristos #endif
8012*75f6d617Schristos 
8013*75f6d617Schristos /* Entry points compatible with 4.2 BSD regex library.  We don't define
8014*75f6d617Schristos    them unless specifically requested.  */
8015*75f6d617Schristos 
8016*75f6d617Schristos #if defined _REGEX_RE_COMP || defined _LIBC
8017*75f6d617Schristos 
8018*75f6d617Schristos /* BSD has one and only one pattern buffer.  */
8019*75f6d617Schristos static struct re_pattern_buffer re_comp_buf;
8020*75f6d617Schristos 
8021*75f6d617Schristos char *
8022*75f6d617Schristos #ifdef _LIBC
8023*75f6d617Schristos /* Make these definitions weak in libc, so POSIX programs can redefine
8024*75f6d617Schristos    these names if they don't use our functions, and still use
8025*75f6d617Schristos    regcomp/regexec below without link errors.  */
8026*75f6d617Schristos weak_function
8027*75f6d617Schristos #endif
8028*75f6d617Schristos re_comp (s)
8029*75f6d617Schristos     const char *s;
8030*75f6d617Schristos {
8031*75f6d617Schristos   reg_errcode_t ret;
8032*75f6d617Schristos 
8033*75f6d617Schristos   if (!s)
8034*75f6d617Schristos     {
8035*75f6d617Schristos       if (!re_comp_buf.buffer)
8036*75f6d617Schristos 	return gettext ("No previous regular expression");
8037*75f6d617Schristos       return 0;
8038*75f6d617Schristos     }
8039*75f6d617Schristos 
8040*75f6d617Schristos   if (!re_comp_buf.buffer)
8041*75f6d617Schristos     {
8042*75f6d617Schristos       re_comp_buf.buffer = (unsigned char *) malloc (200);
8043*75f6d617Schristos       if (re_comp_buf.buffer == NULL)
8044*75f6d617Schristos         return (char *) gettext (re_error_msgid
8045*75f6d617Schristos 				 + re_error_msgid_idx[(int) REG_ESPACE]);
8046*75f6d617Schristos       re_comp_buf.allocated = 200;
8047*75f6d617Schristos 
8048*75f6d617Schristos       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
8049*75f6d617Schristos       if (re_comp_buf.fastmap == NULL)
8050*75f6d617Schristos 	return (char *) gettext (re_error_msgid
8051*75f6d617Schristos 				 + re_error_msgid_idx[(int) REG_ESPACE]);
8052*75f6d617Schristos     }
8053*75f6d617Schristos 
8054*75f6d617Schristos   /* Since `re_exec' always passes NULL for the `regs' argument, we
8055*75f6d617Schristos      don't need to initialize the pattern buffer fields which affect it.  */
8056*75f6d617Schristos 
8057*75f6d617Schristos   /* Match anchors at newlines.  */
8058*75f6d617Schristos   re_comp_buf.newline_anchor = 1;
8059*75f6d617Schristos 
8060*75f6d617Schristos # ifdef MBS_SUPPORT
8061*75f6d617Schristos   if (MB_CUR_MAX != 1)
8062*75f6d617Schristos     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8063*75f6d617Schristos   else
8064*75f6d617Schristos # endif
8065*75f6d617Schristos     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8066*75f6d617Schristos 
8067*75f6d617Schristos   if (!ret)
8068*75f6d617Schristos     return NULL;
8069*75f6d617Schristos 
8070*75f6d617Schristos   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
8071*75f6d617Schristos   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8072*75f6d617Schristos }
8073*75f6d617Schristos 
8074*75f6d617Schristos 
8075*75f6d617Schristos int
8076*75f6d617Schristos #ifdef _LIBC
8077*75f6d617Schristos weak_function
8078*75f6d617Schristos #endif
8079*75f6d617Schristos re_exec (s)
8080*75f6d617Schristos     const char *s;
8081*75f6d617Schristos {
8082*75f6d617Schristos   const int len = strlen (s);
8083*75f6d617Schristos   return
8084*75f6d617Schristos     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
8085*75f6d617Schristos }
8086*75f6d617Schristos 
8087*75f6d617Schristos #endif /* _REGEX_RE_COMP */
8088*75f6d617Schristos 
8089*75f6d617Schristos /* POSIX.2 functions.  Don't define these for Emacs.  */
8090*75f6d617Schristos 
8091*75f6d617Schristos #ifndef emacs
8092*75f6d617Schristos 
8093*75f6d617Schristos /* regcomp takes a regular expression as a string and compiles it.
8094*75f6d617Schristos 
8095*75f6d617Schristos    PREG is a regex_t *.  We do not expect any fields to be initialized,
8096*75f6d617Schristos    since POSIX says we shouldn't.  Thus, we set
8097*75f6d617Schristos 
8098*75f6d617Schristos      `buffer' to the compiled pattern;
8099*75f6d617Schristos      `used' to the length of the compiled pattern;
8100*75f6d617Schristos      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
8101*75f6d617Schristos        REG_EXTENDED bit in CFLAGS is set; otherwise, to
8102*75f6d617Schristos        RE_SYNTAX_POSIX_BASIC;
8103*75f6d617Schristos      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
8104*75f6d617Schristos      `fastmap' to an allocated space for the fastmap;
8105*75f6d617Schristos      `fastmap_accurate' to zero;
8106*75f6d617Schristos      `re_nsub' to the number of subexpressions in PATTERN.
8107*75f6d617Schristos 
8108*75f6d617Schristos    PATTERN is the address of the pattern string.
8109*75f6d617Schristos 
8110*75f6d617Schristos    CFLAGS is a series of bits which affect compilation.
8111*75f6d617Schristos 
8112*75f6d617Schristos      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
8113*75f6d617Schristos      use POSIX basic syntax.
8114*75f6d617Schristos 
8115*75f6d617Schristos      If REG_NEWLINE is set, then . and [^...] don't match newline.
8116*75f6d617Schristos      Also, regexec will try a match beginning after every newline.
8117*75f6d617Schristos 
8118*75f6d617Schristos      If REG_ICASE is set, then we considers upper- and lowercase
8119*75f6d617Schristos      versions of letters to be equivalent when matching.
8120*75f6d617Schristos 
8121*75f6d617Schristos      If REG_NOSUB is set, then when PREG is passed to regexec, that
8122*75f6d617Schristos      routine will report only success or failure, and nothing about the
8123*75f6d617Schristos      registers.
8124*75f6d617Schristos 
8125*75f6d617Schristos    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
8126*75f6d617Schristos    the return codes and their meanings.)  */
8127*75f6d617Schristos 
8128*75f6d617Schristos int
8129*75f6d617Schristos regcomp (preg, pattern, cflags)
8130*75f6d617Schristos     regex_t *preg;
8131*75f6d617Schristos     const char *pattern;
8132*75f6d617Schristos     int cflags;
8133*75f6d617Schristos {
8134*75f6d617Schristos   reg_errcode_t ret;
8135*75f6d617Schristos   reg_syntax_t syntax
8136*75f6d617Schristos     = (cflags & REG_EXTENDED) ?
8137*75f6d617Schristos       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
8138*75f6d617Schristos 
8139*75f6d617Schristos   /* regex_compile will allocate the space for the compiled pattern.  */
8140*75f6d617Schristos   preg->buffer = 0;
8141*75f6d617Schristos   preg->allocated = 0;
8142*75f6d617Schristos   preg->used = 0;
8143*75f6d617Schristos 
8144*75f6d617Schristos   /* Try to allocate space for the fastmap.  */
8145*75f6d617Schristos   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
8146*75f6d617Schristos 
8147*75f6d617Schristos   if (cflags & REG_ICASE)
8148*75f6d617Schristos     {
8149*75f6d617Schristos       unsigned i;
8150*75f6d617Schristos 
8151*75f6d617Schristos       preg->translate
8152*75f6d617Schristos 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
8153*75f6d617Schristos 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
8154*75f6d617Schristos       if (preg->translate == NULL)
8155*75f6d617Schristos         return (int) REG_ESPACE;
8156*75f6d617Schristos 
8157*75f6d617Schristos       /* Map uppercase characters to corresponding lowercase ones.  */
8158*75f6d617Schristos       for (i = 0; i < CHAR_SET_SIZE; i++)
8159*75f6d617Schristos         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
8160*75f6d617Schristos     }
8161*75f6d617Schristos   else
8162*75f6d617Schristos     preg->translate = NULL;
8163*75f6d617Schristos 
8164*75f6d617Schristos   /* If REG_NEWLINE is set, newlines are treated differently.  */
8165*75f6d617Schristos   if (cflags & REG_NEWLINE)
8166*75f6d617Schristos     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
8167*75f6d617Schristos       syntax &= ~RE_DOT_NEWLINE;
8168*75f6d617Schristos       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
8169*75f6d617Schristos       /* It also changes the matching behavior.  */
8170*75f6d617Schristos       preg->newline_anchor = 1;
8171*75f6d617Schristos     }
8172*75f6d617Schristos   else
8173*75f6d617Schristos     preg->newline_anchor = 0;
8174*75f6d617Schristos 
8175*75f6d617Schristos   preg->no_sub = !!(cflags & REG_NOSUB);
8176*75f6d617Schristos 
8177*75f6d617Schristos   /* POSIX says a null character in the pattern terminates it, so we
8178*75f6d617Schristos      can use strlen here in compiling the pattern.  */
8179*75f6d617Schristos # ifdef MBS_SUPPORT
8180*75f6d617Schristos   if (MB_CUR_MAX != 1)
8181*75f6d617Schristos     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
8182*75f6d617Schristos   else
8183*75f6d617Schristos # endif
8184*75f6d617Schristos     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
8185*75f6d617Schristos 
8186*75f6d617Schristos   /* POSIX doesn't distinguish between an unmatched open-group and an
8187*75f6d617Schristos      unmatched close-group: both are REG_EPAREN.  */
8188*75f6d617Schristos   if (ret == REG_ERPAREN) ret = REG_EPAREN;
8189*75f6d617Schristos 
8190*75f6d617Schristos   if (ret == REG_NOERROR && preg->fastmap)
8191*75f6d617Schristos     {
8192*75f6d617Schristos       /* Compute the fastmap now, since regexec cannot modify the pattern
8193*75f6d617Schristos 	 buffer.  */
8194*75f6d617Schristos       if (re_compile_fastmap (preg) == -2)
8195*75f6d617Schristos 	{
8196*75f6d617Schristos 	  /* Some error occurred while computing the fastmap, just forget
8197*75f6d617Schristos 	     about it.  */
8198*75f6d617Schristos 	  free (preg->fastmap);
8199*75f6d617Schristos 	  preg->fastmap = NULL;
8200*75f6d617Schristos 	}
8201*75f6d617Schristos     }
8202*75f6d617Schristos 
8203*75f6d617Schristos   return (int) ret;
8204*75f6d617Schristos }
8205*75f6d617Schristos #ifdef _LIBC
8206*75f6d617Schristos weak_alias (__regcomp, regcomp)
8207*75f6d617Schristos #endif
8208*75f6d617Schristos 
8209*75f6d617Schristos 
8210*75f6d617Schristos /* regexec searches for a given pattern, specified by PREG, in the
8211*75f6d617Schristos    string STRING.
8212*75f6d617Schristos 
8213*75f6d617Schristos    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8214*75f6d617Schristos    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
8215*75f6d617Schristos    least NMATCH elements, and we set them to the offsets of the
8216*75f6d617Schristos    corresponding matched substrings.
8217*75f6d617Schristos 
8218*75f6d617Schristos    EFLAGS specifies `execution flags' which affect matching: if
8219*75f6d617Schristos    REG_NOTBOL is set, then ^ does not match at the beginning of the
8220*75f6d617Schristos    string; if REG_NOTEOL is set, then $ does not match at the end.
8221*75f6d617Schristos 
8222*75f6d617Schristos    We return 0 if we find a match and REG_NOMATCH if not.  */
8223*75f6d617Schristos 
8224*75f6d617Schristos int
8225*75f6d617Schristos regexec (preg, string, nmatch, pmatch, eflags)
8226*75f6d617Schristos     const regex_t *preg;
8227*75f6d617Schristos     const char *string;
8228*75f6d617Schristos     size_t nmatch;
8229*75f6d617Schristos     regmatch_t pmatch[];
8230*75f6d617Schristos     int eflags;
8231*75f6d617Schristos {
8232*75f6d617Schristos   int ret;
8233*75f6d617Schristos   struct re_registers regs;
8234*75f6d617Schristos   regex_t private_preg;
8235*75f6d617Schristos   int len = strlen (string);
8236*75f6d617Schristos   boolean want_reg_info = !preg->no_sub && nmatch > 0;
8237*75f6d617Schristos 
8238*75f6d617Schristos   private_preg = *preg;
8239*75f6d617Schristos 
8240*75f6d617Schristos   private_preg.not_bol = !!(eflags & REG_NOTBOL);
8241*75f6d617Schristos   private_preg.not_eol = !!(eflags & REG_NOTEOL);
8242*75f6d617Schristos 
8243*75f6d617Schristos   /* The user has told us exactly how many registers to return
8244*75f6d617Schristos      information about, via `nmatch'.  We have to pass that on to the
8245*75f6d617Schristos      matching routines.  */
8246*75f6d617Schristos   private_preg.regs_allocated = REGS_FIXED;
8247*75f6d617Schristos 
8248*75f6d617Schristos   if (want_reg_info)
8249*75f6d617Schristos     {
8250*75f6d617Schristos       regs.num_regs = nmatch;
8251*75f6d617Schristos       regs.start = TALLOC (nmatch * 2, regoff_t);
8252*75f6d617Schristos       if (regs.start == NULL)
8253*75f6d617Schristos         return (int) REG_NOMATCH;
8254*75f6d617Schristos       regs.end = regs.start + nmatch;
8255*75f6d617Schristos     }
8256*75f6d617Schristos 
8257*75f6d617Schristos   /* Perform the searching operation.  */
8258*75f6d617Schristos   ret = re_search (&private_preg, string, len,
8259*75f6d617Schristos                    /* start: */ 0, /* range: */ len,
8260*75f6d617Schristos                    want_reg_info ? &regs : (struct re_registers *) 0);
8261*75f6d617Schristos 
8262*75f6d617Schristos   /* Copy the register information to the POSIX structure.  */
8263*75f6d617Schristos   if (want_reg_info)
8264*75f6d617Schristos     {
8265*75f6d617Schristos       if (ret >= 0)
8266*75f6d617Schristos         {
8267*75f6d617Schristos           unsigned r;
8268*75f6d617Schristos 
8269*75f6d617Schristos           for (r = 0; r < nmatch; r++)
8270*75f6d617Schristos             {
8271*75f6d617Schristos               pmatch[r].rm_so = regs.start[r];
8272*75f6d617Schristos               pmatch[r].rm_eo = regs.end[r];
8273*75f6d617Schristos             }
8274*75f6d617Schristos         }
8275*75f6d617Schristos 
8276*75f6d617Schristos       /* If we needed the temporary register info, free the space now.  */
8277*75f6d617Schristos       free (regs.start);
8278*75f6d617Schristos     }
8279*75f6d617Schristos 
8280*75f6d617Schristos   /* We want zero return to mean success, unlike `re_search'.  */
8281*75f6d617Schristos   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8282*75f6d617Schristos }
8283*75f6d617Schristos #ifdef _LIBC
8284*75f6d617Schristos weak_alias (__regexec, regexec)
8285*75f6d617Schristos #endif
8286*75f6d617Schristos 
8287*75f6d617Schristos 
8288*75f6d617Schristos /* Returns a message corresponding to an error code, ERRCODE, returned
8289*75f6d617Schristos    from either regcomp or regexec.   We don't use PREG here.  */
8290*75f6d617Schristos 
8291*75f6d617Schristos size_t
8292*75f6d617Schristos regerror (errcode, preg, errbuf, errbuf_size)
8293*75f6d617Schristos     int errcode;
8294*75f6d617Schristos     const regex_t *preg;
8295*75f6d617Schristos     char *errbuf;
8296*75f6d617Schristos     size_t errbuf_size;
8297*75f6d617Schristos {
8298*75f6d617Schristos   const char *msg;
8299*75f6d617Schristos   size_t msg_size;
8300*75f6d617Schristos 
8301*75f6d617Schristos   if (errcode < 0
8302*75f6d617Schristos       || errcode >= (int) (sizeof (re_error_msgid_idx)
8303*75f6d617Schristos 			   / sizeof (re_error_msgid_idx[0])))
8304*75f6d617Schristos     /* Only error codes returned by the rest of the code should be passed
8305*75f6d617Schristos        to this routine.  If we are given anything else, or if other regex
8306*75f6d617Schristos        code generates an invalid error code, then the program has a bug.
8307*75f6d617Schristos        Dump core so we can fix it.  */
8308*75f6d617Schristos     abort ();
8309*75f6d617Schristos 
8310*75f6d617Schristos   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
8311*75f6d617Schristos 
8312*75f6d617Schristos   msg_size = strlen (msg) + 1; /* Includes the null.  */
8313*75f6d617Schristos 
8314*75f6d617Schristos   if (errbuf_size != 0)
8315*75f6d617Schristos     {
8316*75f6d617Schristos       if (msg_size > errbuf_size)
8317*75f6d617Schristos         {
8318*75f6d617Schristos #if defined HAVE_MEMPCPY || defined _LIBC
8319*75f6d617Schristos 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8320*75f6d617Schristos #else
8321*75f6d617Schristos           memcpy (errbuf, msg, errbuf_size - 1);
8322*75f6d617Schristos           errbuf[errbuf_size - 1] = 0;
8323*75f6d617Schristos #endif
8324*75f6d617Schristos         }
8325*75f6d617Schristos       else
8326*75f6d617Schristos         memcpy (errbuf, msg, msg_size);
8327*75f6d617Schristos     }
8328*75f6d617Schristos 
8329*75f6d617Schristos   return msg_size;
8330*75f6d617Schristos }
8331*75f6d617Schristos #ifdef _LIBC
8332*75f6d617Schristos weak_alias (__regerror, regerror)
8333*75f6d617Schristos #endif
8334*75f6d617Schristos 
8335*75f6d617Schristos 
8336*75f6d617Schristos /* Free dynamically allocated space used by PREG.  */
8337*75f6d617Schristos 
8338*75f6d617Schristos void
8339*75f6d617Schristos regfree (preg)
8340*75f6d617Schristos     regex_t *preg;
8341*75f6d617Schristos {
8342*75f6d617Schristos   if (preg->buffer != NULL)
8343*75f6d617Schristos     free (preg->buffer);
8344*75f6d617Schristos   preg->buffer = NULL;
8345*75f6d617Schristos 
8346*75f6d617Schristos   preg->allocated = 0;
8347*75f6d617Schristos   preg->used = 0;
8348*75f6d617Schristos 
8349*75f6d617Schristos   if (preg->fastmap != NULL)
8350*75f6d617Schristos     free (preg->fastmap);
8351*75f6d617Schristos   preg->fastmap = NULL;
8352*75f6d617Schristos   preg->fastmap_accurate = 0;
8353*75f6d617Schristos 
8354*75f6d617Schristos   if (preg->translate != NULL)
8355*75f6d617Schristos     free (preg->translate);
8356*75f6d617Schristos   preg->translate = NULL;
8357*75f6d617Schristos }
8358*75f6d617Schristos #ifdef _LIBC
8359*75f6d617Schristos weak_alias (__regfree, regfree)
8360*75f6d617Schristos #endif
8361*75f6d617Schristos 
8362*75f6d617Schristos #endif /* not emacs  */
8363*75f6d617Schristos 
8364*75f6d617Schristos #endif /* not INSIDE_RECURSION */
8365*75f6d617Schristos 
8366*75f6d617Schristos 
8367*75f6d617Schristos #undef STORE_NUMBER
8368*75f6d617Schristos #undef STORE_NUMBER_AND_INCR
8369*75f6d617Schristos #undef EXTRACT_NUMBER
8370*75f6d617Schristos #undef EXTRACT_NUMBER_AND_INCR
8371*75f6d617Schristos 
8372*75f6d617Schristos #undef DEBUG_PRINT_COMPILED_PATTERN
8373*75f6d617Schristos #undef DEBUG_PRINT_DOUBLE_STRING
8374*75f6d617Schristos 
8375*75f6d617Schristos #undef INIT_FAIL_STACK
8376*75f6d617Schristos #undef RESET_FAIL_STACK
8377*75f6d617Schristos #undef DOUBLE_FAIL_STACK
8378*75f6d617Schristos #undef PUSH_PATTERN_OP
8379*75f6d617Schristos #undef PUSH_FAILURE_POINTER
8380*75f6d617Schristos #undef PUSH_FAILURE_INT
8381*75f6d617Schristos #undef PUSH_FAILURE_ELT
8382*75f6d617Schristos #undef POP_FAILURE_POINTER
8383*75f6d617Schristos #undef POP_FAILURE_INT
8384*75f6d617Schristos #undef POP_FAILURE_ELT
8385*75f6d617Schristos #undef DEBUG_PUSH
8386*75f6d617Schristos #undef DEBUG_POP
8387*75f6d617Schristos #undef PUSH_FAILURE_POINT
8388*75f6d617Schristos #undef POP_FAILURE_POINT
8389*75f6d617Schristos 
8390*75f6d617Schristos #undef REG_UNSET_VALUE
8391*75f6d617Schristos #undef REG_UNSET
8392*75f6d617Schristos 
8393*75f6d617Schristos #undef PATFETCH
8394*75f6d617Schristos #undef PATFETCH_RAW
8395*75f6d617Schristos #undef PATUNFETCH
8396*75f6d617Schristos #undef TRANSLATE
8397*75f6d617Schristos 
8398*75f6d617Schristos #undef INIT_BUF_SIZE
8399*75f6d617Schristos #undef GET_BUFFER_SPACE
8400*75f6d617Schristos #undef BUF_PUSH
8401*75f6d617Schristos #undef BUF_PUSH_2
8402*75f6d617Schristos #undef BUF_PUSH_3
8403*75f6d617Schristos #undef STORE_JUMP
8404*75f6d617Schristos #undef STORE_JUMP2
8405*75f6d617Schristos #undef INSERT_JUMP
8406*75f6d617Schristos #undef INSERT_JUMP2
8407*75f6d617Schristos #undef EXTEND_BUFFER
8408*75f6d617Schristos #undef GET_UNSIGNED_NUMBER
8409*75f6d617Schristos #undef FREE_STACK_RETURN
8410*75f6d617Schristos 
8411*75f6d617Schristos # undef POINTER_TO_OFFSET
8412*75f6d617Schristos # undef MATCHING_IN_FRST_STRING
8413*75f6d617Schristos # undef PREFETCH
8414*75f6d617Schristos # undef AT_STRINGS_BEG
8415*75f6d617Schristos # undef AT_STRINGS_END
8416*75f6d617Schristos # undef WORDCHAR_P
8417*75f6d617Schristos # undef FREE_VAR
8418*75f6d617Schristos # undef FREE_VARIABLES
8419*75f6d617Schristos # undef NO_HIGHEST_ACTIVE_REG
8420*75f6d617Schristos # undef NO_LOWEST_ACTIVE_REG
8421*75f6d617Schristos 
8422*75f6d617Schristos # undef CHAR_T
8423*75f6d617Schristos # undef UCHAR_T
8424*75f6d617Schristos # undef COMPILED_BUFFER_VAR
8425*75f6d617Schristos # undef OFFSET_ADDRESS_SIZE
8426*75f6d617Schristos # undef CHAR_CLASS_SIZE
8427*75f6d617Schristos # undef PREFIX
8428*75f6d617Schristos # undef ARG_PREFIX
8429*75f6d617Schristos # undef PUT_CHAR
8430*75f6d617Schristos # undef BYTE
8431*75f6d617Schristos # undef WCHAR
8432*75f6d617Schristos 
8433*75f6d617Schristos # define DEFINED_ONCE
8434