xref: /netbsd-src/external/gpl2/diffutils/dist/lib/regex.c (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1 /*	$NetBSD: regex.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $	*/
2 
3 /* Extended regular expression matching and search library,
4    version 0.12.
5    (Implements POSIX draft P1003.2/D11.2, except for some of the
6    internationalization features.)
7    Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2, or (at your option)
12    any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 /* AIX requires this to be the first thing in the file. */
24 #if defined _AIX && !defined REGEX_MALLOC
25   #pragma alloca
26 #endif
27 
28 #undef	_GNU_SOURCE
29 #define _GNU_SOURCE
30 
31 #ifdef HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34 
35 #ifndef PARAMS
36 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
37 #  define PARAMS(args) args
38 # else
39 #  define PARAMS(args) ()
40 # endif  /* GCC.  */
41 #endif  /* Not PARAMS.  */
42 
43 #ifndef INSIDE_RECURSION
44 
45 # if defined STDC_HEADERS && !defined emacs
46 #  include <stddef.h>
47 # else
48 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
49 #  include <sys/types.h>
50 # endif
51 
52 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
53 
54 /* For platform which support the ISO C amendement 1 functionality we
55    support user defined character classes.  */
56 # if defined _LIBC || WIDE_CHAR_SUPPORT
57 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
58 #  include <wchar.h>
59 #  include <wctype.h>
60 # endif
61 
62 # ifdef _LIBC
63 /* We have to keep the namespace clean.  */
64 #  define regfree(preg) __regfree (preg)
65 #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
66 #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
67 #  define regerror(errcode, preg, errbuf, errbuf_size) \
68 	__regerror(errcode, preg, errbuf, errbuf_size)
69 #  define re_set_registers(bu, re, nu, st, en) \
70 	__re_set_registers (bu, re, nu, st, en)
71 #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
72 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
73 #  define re_match(bufp, string, size, pos, regs) \
74 	__re_match (bufp, string, size, pos, regs)
75 #  define re_search(bufp, string, size, startpos, range, regs) \
76 	__re_search (bufp, string, size, startpos, range, regs)
77 #  define re_compile_pattern(pattern, length, bufp) \
78 	__re_compile_pattern (pattern, length, bufp)
79 #  define re_set_syntax(syntax) __re_set_syntax (syntax)
80 #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
81 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
82 #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
83 
84 #  define btowc __btowc
85 #  define iswctype __iswctype
86 #  define mbrtowc __mbrtowc
87 #  define wcslen __wcslen
88 #  define wcscoll __wcscoll
89 #  define wcrtomb __wcrtomb
90 
91 /* We are also using some library internals.  */
92 #  include <locale/localeinfo.h>
93 #  include <locale/elem-hash.h>
94 #  include <langinfo.h>
95 #  include <locale/coll-lookup.h>
96 # endif
97 
98 /* This is for other GNU distributions with internationalized messages.  */
99 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
100 #  include <libintl.h>
101 #  ifdef _LIBC
102 #   undef gettext
103 #   define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
104 #  endif
105 # else
106 #  define gettext(msgid) (msgid)
107 # endif
108 
109 # ifndef gettext_noop
110 /* This define is so xgettext can find the internationalizable
111    strings.  */
112 #  define gettext_noop(String) String
113 # endif
114 
115 /* Support for bounded pointers.  */
116 # if !defined _LIBC && !defined __BOUNDED_POINTERS__
117 #  define __bounded	/* nothing */
118 #  define __unbounded	/* nothing */
119 #  define __ptrvalue	/* nothing */
120 # endif
121 
122 /* The `emacs' switch turns on certain matching commands
123    that make sense only in Emacs. */
124 # ifdef emacs
125 
126 #  include "lisp.h"
127 #  include "buffer.h"
128 #  include "syntax.h"
129 
130 # else  /* not emacs */
131 
132 /* If we are not linking with Emacs proper,
133    we can't use the relocating allocator
134    even if config.h says that we can.  */
135 #  undef REL_ALLOC
136 
137 #  if defined STDC_HEADERS || defined _LIBC
138 #   include <stdlib.h>
139 #  else
140 char *malloc ();
141 char *realloc ();
142 #  endif
143 
144 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
145    If nothing else has been done, use the method below.  */
146 #  ifdef INHIBIT_STRING_HEADER
147 #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
148 #    if !defined bzero && !defined bcopy
149 #     undef INHIBIT_STRING_HEADER
150 #    endif
151 #   endif
152 #  endif
153 
154 /* This is the normal way of making sure we have a bcopy and a bzero.
155    This is used in most programs--a few other programs avoid this
156    by defining INHIBIT_STRING_HEADER.  */
157 #  ifndef INHIBIT_STRING_HEADER
158 #   if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
159 #    include <string.h>
160 #    ifndef bzero
161 #     ifndef _LIBC
162 #      define bzero(s, n)	(memset (s, '\0', n), (s))
163 #     else
164 #      define bzero(s, n)	__bzero (s, n)
165 #     endif
166 #    endif
167 #   else
168 #    include <strings.h>
169 #    ifndef memcmp
170 #     define memcmp(s1, s2, n)	bcmp (s1, s2, n)
171 #    endif
172 #    ifndef memcpy
173 #     define memcpy(d, s, n)	(bcopy (s, d, n), (d))
174 #    endif
175 #   endif
176 #  endif
177 
178 /* Define the syntax stuff for \<, \>, etc.  */
179 
180 /* This must be nonzero for the wordchar and notwordchar pattern
181    commands in re_match_2.  */
182 #  ifndef Sword
183 #   define Sword 1
184 #  endif
185 
186 #  ifdef SWITCH_ENUM_BUG
187 #   define SWITCH_ENUM_CAST(x) ((int)(x))
188 #  else
189 #   define SWITCH_ENUM_CAST(x) (x)
190 #  endif
191 
192 # endif /* not emacs */
193 
194 # if defined _LIBC || HAVE_LIMITS_H
195 #  include <limits.h>
196 # endif
197 
198 # ifndef MB_LEN_MAX
199 #  define MB_LEN_MAX 1
200 # endif
201 
202 /* Get the interface, including the syntax bits.  */
203 # include <regex.h>
204 
205 /* isalpha etc. are used for the character classes.  */
206 # include <ctype.h>
207 
208 /* Jim Meyering writes:
209 
210    "... Some ctype macros are valid only for character codes that
211    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
212    using /bin/cc or gcc but without giving an ansi option).  So, all
213    ctype uses should be through macros like ISPRINT...  If
214    STDC_HEADERS is defined, then autoconf has verified that the ctype
215    macros don't need to be guarded with references to isascii. ...
216    Defining isascii to 1 should let any compiler worth its salt
217    eliminate the && through constant folding."
218    Solaris defines some of these symbols so we must undefine them first.  */
219 
220 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
221 #  define IN_CTYPE_DOMAIN(c) 1
222 # else
223 #  define IN_CTYPE_DOMAIN(c) isascii(c)
224 # endif
225 
226 # ifdef isblank
227 #  define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
228 # else
229 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
230 # endif
231 # ifdef isgraph
232 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
233 # else
234 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
235 # endif
236 
237 # undef ISPRINT
238 # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
239 # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
240 # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
241 # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
242 # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
243 # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
244 # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
245 # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
246 # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
247 # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
248 
249 # ifdef _tolower
250 #  define TOLOWER(c) _tolower(c)
251 # else
252 #  define TOLOWER(c) tolower(c)
253 # endif
254 
255 # ifndef NULL
256 #  define NULL (void *)0
257 # endif
258 
259 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
260    since ours (we hope) works properly with all combinations of
261    machines, compilers, `char' and `unsigned char' argument types.
262    (Per Bothner suggested the basic approach.)  */
263 # undef SIGN_EXTEND_CHAR
264 # if __STDC__
265 #  define SIGN_EXTEND_CHAR(c) ((signed char) (c))
266 # else  /* not __STDC__ */
267 /* As in Harbison and Steele.  */
268 #  define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
269 # endif
270 
271 # ifndef emacs
272 /* How many characters in the character set.  */
273 #  define CHAR_SET_SIZE 256
274 
275 #  ifdef SYNTAX_TABLE
276 
277 extern char *re_syntax_table;
278 
279 #  else /* not SYNTAX_TABLE */
280 
281 static char re_syntax_table[CHAR_SET_SIZE];
282 
283 static void init_syntax_once PARAMS ((void));
284 
285 static void
init_syntax_once()286 init_syntax_once ()
287 {
288    register int c;
289    static int done = 0;
290 
291    if (done)
292      return;
293    bzero (re_syntax_table, sizeof re_syntax_table);
294 
295    for (c = 0; c < CHAR_SET_SIZE; ++c)
296      if (ISALNUM (c))
297 	re_syntax_table[c] = Sword;
298 
299    re_syntax_table['_'] = Sword;
300 
301    done = 1;
302 }
303 
304 #  endif /* not SYNTAX_TABLE */
305 
306 #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
307 
308 # endif /* emacs */
309 
310 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
311    use `alloca' instead of `malloc'.  This is because using malloc in
312    re_search* or re_match* could cause memory leaks when C-g is used in
313    Emacs; also, malloc is slower and causes storage fragmentation.  On
314    the other hand, malloc is more portable, and easier to debug.
315 
316    Because we sometimes use alloca, some routines have to be macros,
317    not functions -- `alloca'-allocated space disappears at the end of the
318    function it is called in.  */
319 
320 # ifdef REGEX_MALLOC
321 
322 #  define REGEX_ALLOCATE malloc
323 #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
324 #  define REGEX_FREE free
325 
326 # else /* not REGEX_MALLOC  */
327 
328 /* Emacs already defines alloca, sometimes.  */
329 #  ifndef alloca
330 
331 /* Make alloca work the best possible way.  */
332 #   ifdef __GNUC__
333 #    define alloca __builtin_alloca
334 #   else /* not __GNUC__ */
335 #    if HAVE_ALLOCA_H
336 #     include <alloca.h>
337 #    endif /* HAVE_ALLOCA_H */
338 #   endif /* not __GNUC__ */
339 
340 #  endif /* not alloca */
341 
342 #  define REGEX_ALLOCATE alloca
343 
344 /* Assumes a `char *destination' variable.  */
345 #  define REGEX_REALLOCATE(source, osize, nsize)			\
346   (destination = (char *) alloca (nsize),				\
347    memcpy (destination, source, osize))
348 
349 /* No need to do anything to free, after alloca.  */
350 #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
351 
352 # endif /* not REGEX_MALLOC */
353 
354 /* Define how to allocate the failure stack.  */
355 
356 # if defined REL_ALLOC && defined REGEX_MALLOC
357 
358 #  define REGEX_ALLOCATE_STACK(size)				\
359   r_alloc (&failure_stack_ptr, (size))
360 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
361   r_re_alloc (&failure_stack_ptr, (nsize))
362 #  define REGEX_FREE_STACK(ptr)					\
363   r_alloc_free (&failure_stack_ptr)
364 
365 # else /* not using relocating allocator */
366 
367 #  ifdef REGEX_MALLOC
368 
369 #   define REGEX_ALLOCATE_STACK malloc
370 #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
371 #   define REGEX_FREE_STACK free
372 
373 #  else /* not REGEX_MALLOC */
374 
375 #   define REGEX_ALLOCATE_STACK alloca
376 
377 #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
378    REGEX_REALLOCATE (source, osize, nsize)
379 /* No need to explicitly free anything.  */
380 #   define REGEX_FREE_STACK(arg)
381 
382 #  endif /* not REGEX_MALLOC */
383 # endif /* not using relocating allocator */
384 
385 
386 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
387    `string1' or just past its end.  This works if PTR is NULL, which is
388    a good thing.  */
389 # define FIRST_STRING_P(ptr) 					\
390   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
391 
392 /* (Re)Allocate N items of type T using malloc, or fail.  */
393 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
394 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
395 # define RETALLOC_IF(addr, n, t) \
396   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
397 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
398 
399 # define BYTEWIDTH 8 /* In bits.  */
400 
401 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
402 
403 # undef MAX
404 # undef MIN
405 # define MAX(a, b) ((a) > (b) ? (a) : (b))
406 # define MIN(a, b) ((a) < (b) ? (a) : (b))
407 
408 typedef char boolean;
409 # define false 0
410 # define true 1
411 
412 static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
413                                                    reg_syntax_t syntax,
414                                                    struct re_pattern_buffer *bufp));
415 
416 static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
417 					     const char *string1, int size1,
418 					     const char *string2, int size2,
419 					     int pos,
420 					     struct re_registers *regs,
421 					     int stop));
422 static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
423 				     const char *string1, int size1,
424 				     const char *string2, int size2,
425 				     int startpos, int range,
426 				     struct re_registers *regs, int stop));
427 static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
428 
429 #ifdef MBS_SUPPORT
430 static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
431                                                    reg_syntax_t syntax,
432                                                    struct re_pattern_buffer *bufp));
433 
434 
435 static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
436 					    const char *cstring1, int csize1,
437 					    const char *cstring2, int csize2,
438 					    int pos,
439 					    struct re_registers *regs,
440 					    int stop,
441 					    wchar_t *string1, int size1,
442 					    wchar_t *string2, int size2,
443 					    int *mbs_offset1, int *mbs_offset2));
444 static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
445 				    const char *string1, int size1,
446 				    const char *string2, int size2,
447 				    int startpos, int range,
448 				    struct re_registers *regs, int stop));
449 static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
450 #endif
451 
452 /* These are the command codes that appear in compiled regular
453    expressions.  Some opcodes are followed by argument bytes.  A
454    command code can specify any interpretation whatsoever for its
455    arguments.  Zero bytes may appear in the compiled regular expression.  */
456 
457 typedef enum
458 {
459   no_op = 0,
460 
461   /* Succeed right away--no more backtracking.  */
462   succeed,
463 
464         /* Followed by one byte giving n, then by n literal bytes.  */
465   exactn,
466 
467 # ifdef MBS_SUPPORT
468 	/* Same as exactn, but contains binary data.  */
469   exactn_bin,
470 # endif
471 
472         /* Matches any (more or less) character.  */
473   anychar,
474 
475         /* Matches any one char belonging to specified set.  First
476            following byte is number of bitmap bytes.  Then come bytes
477            for a bitmap saying which chars are in.  Bits in each byte
478            are ordered low-bit-first.  A character is in the set if its
479            bit is 1.  A character too large to have a bit in the map is
480            automatically not in the set.  */
481         /* ifdef MBS_SUPPORT, following element is length of character
482 	   classes, length of collating symbols, length of equivalence
483 	   classes, length of character ranges, and length of characters.
484 	   Next, character class element, collating symbols elements,
485 	   equivalence class elements, range elements, and character
486 	   elements follow.
487 	   See regex_compile function.  */
488   charset,
489 
490         /* Same parameters as charset, but match any character that is
491            not one of those specified.  */
492   charset_not,
493 
494         /* Start remembering the text that is matched, for storing in a
495            register.  Followed by one byte with the register number, in
496            the range 0 to one less than the pattern buffer's re_nsub
497            field.  Then followed by one byte with the number of groups
498            inner to this one.  (This last has to be part of the
499            start_memory only because we need it in the on_failure_jump
500            of re_match_2.)  */
501   start_memory,
502 
503         /* Stop remembering the text that is matched and store it in a
504            memory register.  Followed by one byte with the register
505            number, in the range 0 to one less than `re_nsub' in the
506            pattern buffer, and one byte with the number of inner groups,
507            just like `start_memory'.  (We need the number of inner
508            groups here because we don't have any easy way of finding the
509            corresponding start_memory when we're at a stop_memory.)  */
510   stop_memory,
511 
512         /* Match a duplicate of something remembered. Followed by one
513            byte containing the register number.  */
514   duplicate,
515 
516         /* Fail unless at beginning of line.  */
517   begline,
518 
519         /* Fail unless at end of line.  */
520   endline,
521 
522         /* Succeeds if at beginning of buffer (if emacs) or at beginning
523            of string to be matched (if not).  */
524   begbuf,
525 
526         /* Analogously, for end of buffer/string.  */
527   endbuf,
528 
529         /* Followed by two byte relative address to which to jump.  */
530   jump,
531 
532 	/* Same as jump, but marks the end of an alternative.  */
533   jump_past_alt,
534 
535         /* Followed by two-byte relative address of place to resume at
536            in case of failure.  */
537         /* ifdef MBS_SUPPORT, the size of address is 1.  */
538   on_failure_jump,
539 
540         /* Like on_failure_jump, but pushes a placeholder instead of the
541            current string position when executed.  */
542   on_failure_keep_string_jump,
543 
544         /* Throw away latest failure point and then jump to following
545            two-byte relative address.  */
546         /* ifdef MBS_SUPPORT, the size of address is 1.  */
547   pop_failure_jump,
548 
549         /* Change to pop_failure_jump if know won't have to backtrack to
550            match; otherwise change to jump.  This is used to jump
551            back to the beginning of a repeat.  If what follows this jump
552            clearly won't match what the repeat does, such that we can be
553            sure that there is no use backtracking out of repetitions
554            already matched, then we change it to a pop_failure_jump.
555            Followed by two-byte address.  */
556         /* ifdef MBS_SUPPORT, the size of address is 1.  */
557   maybe_pop_jump,
558 
559         /* Jump to following two-byte address, and push a dummy failure
560            point. This failure point will be thrown away if an attempt
561            is made to use it for a failure.  A `+' construct makes this
562            before the first repeat.  Also used as an intermediary kind
563            of jump when compiling an alternative.  */
564         /* ifdef MBS_SUPPORT, the size of address is 1.  */
565   dummy_failure_jump,
566 
567 	/* Push a dummy failure point and continue.  Used at the end of
568 	   alternatives.  */
569   push_dummy_failure,
570 
571         /* Followed by two-byte relative address and two-byte number n.
572            After matching N times, jump to the address upon failure.  */
573         /* ifdef MBS_SUPPORT, the size of address is 1.  */
574   succeed_n,
575 
576         /* Followed by two-byte relative address, and two-byte number n.
577            Jump to the address N times, then fail.  */
578         /* ifdef MBS_SUPPORT, the size of address is 1.  */
579   jump_n,
580 
581         /* Set the following two-byte relative address to the
582            subsequent two-byte number.  The address *includes* the two
583            bytes of number.  */
584         /* ifdef MBS_SUPPORT, the size of address is 1.  */
585   set_number_at,
586 
587   wordchar,	/* Matches any word-constituent character.  */
588   notwordchar,	/* Matches any char that is not a word-constituent.  */
589 
590   wordbeg,	/* Succeeds if at word beginning.  */
591   wordend,	/* Succeeds if at word end.  */
592 
593   wordbound,	/* Succeeds if at a word boundary.  */
594   notwordbound	/* Succeeds if not at a word boundary.  */
595 
596 # ifdef emacs
597   ,before_dot,	/* Succeeds if before point.  */
598   at_dot,	/* Succeeds if at point.  */
599   after_dot,	/* Succeeds if after point.  */
600 
601 	/* Matches any character whose syntax is specified.  Followed by
602            a byte which contains a syntax code, e.g., Sword.  */
603   syntaxspec,
604 
605 	/* Matches any character whose syntax is not that specified.  */
606   notsyntaxspec
607 # endif /* emacs */
608 } re_opcode_t;
609 #endif /* not INSIDE_RECURSION */
610 
611 
612 #ifdef BYTE
613 # define CHAR_T char
614 # define UCHAR_T unsigned char
615 # define COMPILED_BUFFER_VAR bufp->buffer
616 # define OFFSET_ADDRESS_SIZE 2
617 # define PREFIX(name) byte_##name
618 # define ARG_PREFIX(name) name
619 # define PUT_CHAR(c) putchar (c)
620 #else
621 # ifdef WCHAR
622 #  define CHAR_T wchar_t
623 #  define UCHAR_T wchar_t
624 #  define COMPILED_BUFFER_VAR wc_buffer
625 #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
626 #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
627 #  define PREFIX(name) wcs_##name
628 #  define ARG_PREFIX(name) c##name
629 /* Should we use wide stream??  */
630 #  define PUT_CHAR(c) printf ("%C", c);
631 #  define TRUE 1
632 #  define FALSE 0
633 # else
634 #  ifdef MBS_SUPPORT
635 #   define WCHAR
636 #   define INSIDE_RECURSION
637 #   include "regex.c"
638 #   undef INSIDE_RECURSION
639 #  endif
640 #  define BYTE
641 #  define INSIDE_RECURSION
642 #  include "regex.c"
643 #  undef INSIDE_RECURSION
644 # endif
645 #endif
646 #include "unlocked-io.h"
647 
648 #ifdef INSIDE_RECURSION
649 /* Common operations on the compiled pattern.  */
650 
651 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
652 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
653 
654 # ifdef WCHAR
655 #  define STORE_NUMBER(destination, number)				\
656   do {									\
657     *(destination) = (UCHAR_T)(number);				\
658   } while (0)
659 # else /* BYTE */
660 #  define STORE_NUMBER(destination, number)				\
661   do {									\
662     (destination)[0] = (number) & 0377;					\
663     (destination)[1] = (number) >> 8;					\
664   } while (0)
665 # endif /* WCHAR */
666 
667 /* Same as STORE_NUMBER, except increment DESTINATION to
668    the byte after where the number is stored.  Therefore, DESTINATION
669    must be an lvalue.  */
670 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
671 
672 # define STORE_NUMBER_AND_INCR(destination, number)			\
673   do {									\
674     STORE_NUMBER (destination, number);					\
675     (destination) += OFFSET_ADDRESS_SIZE;				\
676   } while (0)
677 
678 /* Put into DESTINATION a number stored in two contiguous bytes starting
679    at SOURCE.  */
680 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
681 
682 # ifdef WCHAR
683 #  define EXTRACT_NUMBER(destination, source)				\
684   do {									\
685     (destination) = *(source);						\
686   } while (0)
687 # else /* BYTE */
688 #  define EXTRACT_NUMBER(destination, source)				\
689   do {									\
690     (destination) = *(source) & 0377;					\
691     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
692   } while (0)
693 # endif
694 
695 # ifdef DEBUG
696 static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
697 static void
698 PREFIX(extract_number) (dest, source)
699     int *dest;
700     UCHAR_T *source;
701 {
702 #  ifdef WCHAR
703   *dest = *source;
704 #  else /* BYTE */
705   int temp = SIGN_EXTEND_CHAR (*(source + 1));
706   *dest = *source & 0377;
707   *dest += temp << 8;
708 #  endif
709 }
710 
711 #  ifndef EXTRACT_MACROS /* To debug the macros.  */
712 #   undef EXTRACT_NUMBER
713 #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
714 #  endif /* not EXTRACT_MACROS */
715 
716 # endif /* DEBUG */
717 
718 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
719    SOURCE must be an lvalue.  */
720 
721 # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
722   do {									\
723     EXTRACT_NUMBER (destination, source);				\
724     (source) += OFFSET_ADDRESS_SIZE; 					\
725   } while (0)
726 
727 # ifdef DEBUG
728 static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
729 						       UCHAR_T **source));
730 static void
731 PREFIX(extract_number_and_incr) (destination, source)
732     int *destination;
733     UCHAR_T **source;
734 {
735   PREFIX(extract_number) (destination, *source);
736   *source += OFFSET_ADDRESS_SIZE;
737 }
738 
739 #  ifndef EXTRACT_MACROS
740 #   undef EXTRACT_NUMBER_AND_INCR
741 #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
742   PREFIX(extract_number_and_incr) (&dest, &src)
743 #  endif /* not EXTRACT_MACROS */
744 
745 # endif /* DEBUG */
746 
747 
748 
749 /* If DEBUG is defined, Regex prints many voluminous messages about what
750    it is doing (if the variable `debug' is nonzero).  If linked with the
751    main program in `iregex.c', you can enter patterns and strings
752    interactively.  And if linked with the main program in `main.c' and
753    the other test files, you can run the already-written tests.  */
754 
755 # ifdef DEBUG
756 
757 #  ifndef DEFINED_ONCE
758 
759 /* We use standard I/O for debugging.  */
760 #   include <stdio.h>
761 
762 /* It is useful to test things that ``must'' be true when debugging.  */
763 #   include <assert.h>
764 
765 static int debug;
766 
767 #   define DEBUG_STATEMENT(e) e
768 #   define DEBUG_PRINT1(x) if (debug) printf (x)
769 #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
770 #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
771 #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
772 #  endif /* not DEFINED_ONCE */
773 
774 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
775   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
776 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
777   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
778 
779 
780 /* Print the fastmap in human-readable form.  */
781 
782 #  ifndef DEFINED_ONCE
783 void
print_fastmap(fastmap)784 print_fastmap (fastmap)
785     char *fastmap;
786 {
787   unsigned was_a_range = 0;
788   unsigned i = 0;
789 
790   while (i < (1 << BYTEWIDTH))
791     {
792       if (fastmap[i++])
793 	{
794 	  was_a_range = 0;
795           putchar (i - 1);
796           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
797             {
798               was_a_range = 1;
799               i++;
800             }
801 	  if (was_a_range)
802             {
803               printf ("-");
804               putchar (i - 1);
805             }
806         }
807     }
808   putchar ('\n');
809 }
810 #  endif /* not DEFINED_ONCE */
811 
812 
813 /* Print a compiled pattern string in human-readable form, starting at
814    the START pointer into it and ending just before the pointer END.  */
815 
816 void
817 PREFIX(print_partial_compiled_pattern) (start, end)
818     UCHAR_T *start;
819     UCHAR_T *end;
820 {
821   int mcnt, mcnt2;
822   UCHAR_T *p1;
823   UCHAR_T *p = start;
824   UCHAR_T *pend = end;
825 
826   if (start == NULL)
827     {
828       printf ("(null)\n");
829       return;
830     }
831 
832   /* Loop over pattern commands.  */
833   while (p < pend)
834     {
835 #  ifdef _LIBC
836       printf ("%td:\t", p - start);
837 #  else
838       printf ("%ld:\t", (long int) (p - start));
839 #  endif
840 
841       switch ((re_opcode_t) *p++)
842 	{
843         case no_op:
844           printf ("/no_op");
845           break;
846 
847 	case exactn:
848 	  mcnt = *p++;
849           printf ("/exactn/%d", mcnt);
850           do
851 	    {
852               putchar ('/');
853 	      PUT_CHAR (*p++);
854             }
855           while (--mcnt);
856           break;
857 
858 #  ifdef MBS_SUPPORT
859 	case exactn_bin:
860 	  mcnt = *p++;
861 	  printf ("/exactn_bin/%d", mcnt);
862           do
863 	    {
864 	      printf("/%lx", (long int) *p++);
865             }
866           while (--mcnt);
867           break;
868 #  endif /* MBS_SUPPORT */
869 
870 	case start_memory:
871           mcnt = *p++;
872           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
873           break;
874 
875 	case stop_memory:
876           mcnt = *p++;
877 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
878           break;
879 
880 	case duplicate:
881 	  printf ("/duplicate/%ld", (long int) *p++);
882 	  break;
883 
884 	case anychar:
885 	  printf ("/anychar");
886 	  break;
887 
888 	case charset:
889         case charset_not:
890           {
891 #  ifdef WCHAR
892 	    int i, length;
893 	    wchar_t *workp = p;
894 	    printf ("/charset [%s",
895 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
896 	    p += 5;
897 	    length = *workp++; /* the length of char_classes */
898 	    for (i=0 ; i<length ; i++)
899 	      printf("[:%lx:]", (long int) *p++);
900 	    length = *workp++; /* the length of collating_symbol */
901 	    for (i=0 ; i<length ;)
902 	      {
903 		printf("[.");
904 		while(*p != 0)
905 		  PUT_CHAR((i++,*p++));
906 		i++,p++;
907 		printf(".]");
908 	      }
909 	    length = *workp++; /* the length of equivalence_class */
910 	    for (i=0 ; i<length ;)
911 	      {
912 		printf("[=");
913 		while(*p != 0)
914 		  PUT_CHAR((i++,*p++));
915 		i++,p++;
916 		printf("=]");
917 	      }
918 	    length = *workp++; /* the length of char_range */
919 	    for (i=0 ; i<length ; i++)
920 	      {
921 		wchar_t range_start = *p++;
922 		wchar_t range_end = *p++;
923 		printf("%C-%C", range_start, range_end);
924 	      }
925 	    length = *workp++; /* the length of char */
926 	    for (i=0 ; i<length ; i++)
927 	      printf("%C", *p++);
928 	    putchar (']');
929 #  else
930             register int c, last = -100;
931 	    register int in_range = 0;
932 
933 	    printf ("/charset [%s",
934 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
935 
936             assert (p + *p < pend);
937 
938             for (c = 0; c < 256; c++)
939 	      if (c / 8 < *p
940 		  && (p[1 + (c/8)] & (1 << (c % 8))))
941 		{
942 		  /* Are we starting a range?  */
943 		  if (last + 1 == c && ! in_range)
944 		    {
945 		      putchar ('-');
946 		      in_range = 1;
947 		    }
948 		  /* Have we broken a range?  */
949 		  else if (last + 1 != c && in_range)
950               {
951 		      putchar (last);
952 		      in_range = 0;
953 		    }
954 
955 		  if (! in_range)
956 		    putchar (c);
957 
958 		  last = c;
959               }
960 
961 	    if (in_range)
962 	      putchar (last);
963 
964 	    putchar (']');
965 
966 	    p += 1 + *p;
967 #  endif /* WCHAR */
968 	  }
969 	  break;
970 
971 	case begline:
972 	  printf ("/begline");
973           break;
974 
975 	case endline:
976           printf ("/endline");
977           break;
978 
979 	case on_failure_jump:
980           PREFIX(extract_number_and_incr) (&mcnt, &p);
981 #  ifdef _LIBC
982   	  printf ("/on_failure_jump to %td", p + mcnt - start);
983 #  else
984   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
985 #  endif
986           break;
987 
988 	case on_failure_keep_string_jump:
989           PREFIX(extract_number_and_incr) (&mcnt, &p);
990 #  ifdef _LIBC
991   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
992 #  else
993   	  printf ("/on_failure_keep_string_jump to %ld",
994 		  (long int) (p + mcnt - start));
995 #  endif
996           break;
997 
998 	case dummy_failure_jump:
999           PREFIX(extract_number_and_incr) (&mcnt, &p);
1000 #  ifdef _LIBC
1001   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
1002 #  else
1003   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
1004 #  endif
1005           break;
1006 
1007 	case push_dummy_failure:
1008           printf ("/push_dummy_failure");
1009           break;
1010 
1011         case maybe_pop_jump:
1012           PREFIX(extract_number_and_incr) (&mcnt, &p);
1013 #  ifdef _LIBC
1014   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
1015 #  else
1016   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1017 #  endif
1018 	  break;
1019 
1020         case pop_failure_jump:
1021 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1022 #  ifdef _LIBC
1023   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
1024 #  else
1025   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1026 #  endif
1027 	  break;
1028 
1029         case jump_past_alt:
1030 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1031 #  ifdef _LIBC
1032   	  printf ("/jump_past_alt to %td", p + mcnt - start);
1033 #  else
1034   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1035 #  endif
1036 	  break;
1037 
1038         case jump:
1039 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
1040 #  ifdef _LIBC
1041   	  printf ("/jump to %td", p + mcnt - start);
1042 #  else
1043   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
1044 #  endif
1045 	  break;
1046 
1047         case succeed_n:
1048           PREFIX(extract_number_and_incr) (&mcnt, &p);
1049 	  p1 = p + mcnt;
1050           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1051 #  ifdef _LIBC
1052 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1053 #  else
1054 	  printf ("/succeed_n to %ld, %d times",
1055 		  (long int) (p1 - start), mcnt2);
1056 #  endif
1057           break;
1058 
1059         case jump_n:
1060           PREFIX(extract_number_and_incr) (&mcnt, &p);
1061 	  p1 = p + mcnt;
1062           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1064           break;
1065 
1066         case set_number_at:
1067           PREFIX(extract_number_and_incr) (&mcnt, &p);
1068 	  p1 = p + mcnt;
1069           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1070 #  ifdef _LIBC
1071 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1072 #  else
1073 	  printf ("/set_number_at location %ld to %d",
1074 		  (long int) (p1 - start), mcnt2);
1075 #  endif
1076           break;
1077 
1078         case wordbound:
1079 	  printf ("/wordbound");
1080 	  break;
1081 
1082 	case notwordbound:
1083 	  printf ("/notwordbound");
1084           break;
1085 
1086 	case wordbeg:
1087 	  printf ("/wordbeg");
1088 	  break;
1089 
1090 	case wordend:
1091 	  printf ("/wordend");
1092 	  break;
1093 
1094 #  ifdef emacs
1095 	case before_dot:
1096 	  printf ("/before_dot");
1097           break;
1098 
1099 	case at_dot:
1100 	  printf ("/at_dot");
1101           break;
1102 
1103 	case after_dot:
1104 	  printf ("/after_dot");
1105           break;
1106 
1107 	case syntaxspec:
1108           printf ("/syntaxspec");
1109 	  mcnt = *p++;
1110 	  printf ("/%d", mcnt);
1111           break;
1112 
1113 	case notsyntaxspec:
1114           printf ("/notsyntaxspec");
1115 	  mcnt = *p++;
1116 	  printf ("/%d", mcnt);
1117 	  break;
1118 #  endif /* emacs */
1119 
1120 	case wordchar:
1121 	  printf ("/wordchar");
1122           break;
1123 
1124 	case notwordchar:
1125 	  printf ("/notwordchar");
1126           break;
1127 
1128 	case begbuf:
1129 	  printf ("/begbuf");
1130           break;
1131 
1132 	case endbuf:
1133 	  printf ("/endbuf");
1134           break;
1135 
1136         default:
1137           printf ("?%ld", (long int) *(p-1));
1138 	}
1139 
1140       putchar ('\n');
1141     }
1142 
1143 #  ifdef _LIBC
1144   printf ("%td:\tend of pattern.\n", p - start);
1145 #  else
1146   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1147 #  endif
1148 }
1149 
1150 
1151 void
1152 PREFIX(print_compiled_pattern) (bufp)
1153     struct re_pattern_buffer *bufp;
1154 {
1155   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1156 
1157   PREFIX(print_partial_compiled_pattern) (buffer, buffer
1158 				  + bufp->used / sizeof(UCHAR_T));
1159   printf ("%ld bytes used/%ld bytes allocated.\n",
1160 	  bufp->used, bufp->allocated);
1161 
1162   if (bufp->fastmap_accurate && bufp->fastmap)
1163     {
1164       printf ("fastmap: ");
1165       print_fastmap (bufp->fastmap);
1166     }
1167 
1168 #  ifdef _LIBC
1169   printf ("re_nsub: %Zd\t", bufp->re_nsub);
1170 #  else
1171   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1172 #  endif
1173   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1174   printf ("can_be_null: %d\t", bufp->can_be_null);
1175   printf ("newline_anchor: %d\n", bufp->newline_anchor);
1176   printf ("no_sub: %d\t", bufp->no_sub);
1177   printf ("not_bol: %d\t", bufp->not_bol);
1178   printf ("not_eol: %d\t", bufp->not_eol);
1179   printf ("syntax: %lx\n", bufp->syntax);
1180   /* Perhaps we should print the translate table?  */
1181 }
1182 
1183 
1184 void
1185 PREFIX(print_double_string) (where, string1, size1, string2, size2)
1186     const CHAR_T *where;
1187     const CHAR_T *string1;
1188     const CHAR_T *string2;
1189     int size1;
1190     int size2;
1191 {
1192   int this_char;
1193 
1194   if (where == NULL)
1195     printf ("(null)");
1196   else
1197     {
1198       int cnt;
1199 
1200       if (FIRST_STRING_P (where))
1201         {
1202           for (this_char = where - string1; this_char < size1; this_char++)
1203 	    PUT_CHAR (string1[this_char]);
1204 
1205           where = string2;
1206         }
1207 
1208       cnt = 0;
1209       for (this_char = where - string2; this_char < size2; this_char++)
1210 	{
1211 	  PUT_CHAR (string2[this_char]);
1212 	  if (++cnt > 100)
1213 	    {
1214 	      fputs ("...", stdout);
1215 	      break;
1216 	    }
1217 	}
1218     }
1219 }
1220 
1221 #  ifndef DEFINED_ONCE
1222 void
printchar(c)1223 printchar (c)
1224      int c;
1225 {
1226   putc (c, stderr);
1227 }
1228 #  endif
1229 
1230 # else /* not DEBUG */
1231 
1232 #  ifndef DEFINED_ONCE
1233 #   undef assert
1234 #   define assert(e)
1235 
1236 #   define DEBUG_STATEMENT(e)
1237 #   define DEBUG_PRINT1(x)
1238 #   define DEBUG_PRINT2(x1, x2)
1239 #   define DEBUG_PRINT3(x1, x2, x3)
1240 #   define DEBUG_PRINT4(x1, x2, x3, x4)
1241 #  endif /* not DEFINED_ONCE */
1242 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1243 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1244 
1245 # endif /* not DEBUG */
1246 
1247 
1248 
1249 # ifdef WCHAR
1250 /* This  convert a multibyte string to a wide character string.
1251    And write their correspondances to offset_buffer(see below)
1252    and write whether each wchar_t is binary data to is_binary.
1253    This assume invalid multibyte sequences as binary data.
1254    We assume offset_buffer and is_binary is already allocated
1255    enough space.  */
1256 
1257 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
1258 				  size_t len, int *offset_buffer,
1259 				  char *is_binary);
1260 static size_t
convert_mbs_to_wcs(dest,src,len,offset_buffer,is_binary)1261 convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
1262      CHAR_T *dest;
1263      const unsigned char* src;
1264      size_t len; /* the length of multibyte string.  */
1265 
1266      /* It hold correspondances between src(char string) and
1267 	dest(wchar_t string) for optimization.
1268 	e.g. src  = "xxxyzz"
1269              dest = {'X', 'Y', 'Z'}
1270 	      (each "xxx", "y" and "zz" represent one multibyte character
1271 	       corresponding to 'X', 'Y' and 'Z'.)
1272 	  offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1273 	  	        = {0, 3, 4, 6}
1274      */
1275      int *offset_buffer;
1276      char *is_binary;
1277 {
1278   wchar_t *pdest = dest;
1279   const unsigned char *psrc = src;
1280   size_t wc_count = 0;
1281 
1282   mbstate_t mbs;
1283   int i, consumed;
1284   size_t mb_remain = len;
1285   size_t mb_count = 0;
1286 
1287   /* Initialize the conversion state.  */
1288   memset (&mbs, 0, sizeof (mbstate_t));
1289 
1290   offset_buffer[0] = 0;
1291   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1292 	 psrc += consumed)
1293     {
1294       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1295 
1296       if (consumed <= 0)
1297 	/* failed to convert. maybe src contains binary data.
1298 	   So we consume 1 byte manualy.  */
1299 	{
1300 	  *pdest = *psrc;
1301 	  consumed = 1;
1302 	  is_binary[wc_count] = TRUE;
1303 	}
1304       else
1305 	is_binary[wc_count] = FALSE;
1306       /* In sjis encoding, we use yen sign as escape character in
1307 	 place of reverse solidus. So we convert 0x5c(yen sign in
1308 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1309 	 solidus in UCS2).  */
1310       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1311 	*pdest = (wchar_t) *psrc;
1312 
1313       offset_buffer[wc_count + 1] = mb_count += consumed;
1314     }
1315 
1316   /* Fill remain of the buffer with sentinel.  */
1317   for (i = wc_count + 1 ; i <= len ; i++)
1318     offset_buffer[i] = mb_count + 1;
1319 
1320   return wc_count;
1321 }
1322 
1323 # endif /* WCHAR */
1324 
1325 #else /* not INSIDE_RECURSION */
1326 
1327 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1328    also be assigned to arbitrarily: each pattern buffer stores its own
1329    syntax, so it can be changed between regex compilations.  */
1330 /* This has no initializer because initialized variables in Emacs
1331    become read-only after dumping.  */
1332 reg_syntax_t re_syntax_options;
1333 
1334 
1335 /* Specify the precise syntax of regexps for compilation.  This provides
1336    for compatibility for various utilities which historically have
1337    different, incompatible syntaxes.
1338 
1339    The argument SYNTAX is a bit mask comprised of the various bits
1340    defined in regex.h.  We return the old syntax.  */
1341 
1342 reg_syntax_t
re_set_syntax(syntax)1343 re_set_syntax (syntax)
1344     reg_syntax_t syntax;
1345 {
1346   reg_syntax_t ret = re_syntax_options;
1347 
1348   re_syntax_options = syntax;
1349 # ifdef DEBUG
1350   if (syntax & RE_DEBUG)
1351     debug = 1;
1352   else if (debug) /* was on but now is not */
1353     debug = 0;
1354 # endif /* DEBUG */
1355   return ret;
1356 }
1357 # ifdef _LIBC
1358 weak_alias (__re_set_syntax, re_set_syntax)
1359 # endif
1360 
1361 /* This table gives an error message for each of the error codes listed
1362    in regex.h.  Obviously the order here has to be same as there.
1363    POSIX doesn't require that we do anything for REG_NOERROR,
1364    but why not be nice?  */
1365 
1366 static const char re_error_msgid[] =
1367   {
1368 # define REG_NOERROR_IDX	0
1369     gettext_noop ("Success")	/* REG_NOERROR */
1370     "\0"
1371 # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1372     gettext_noop ("No match")	/* REG_NOMATCH */
1373     "\0"
1374 # define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1375     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1376     "\0"
1377 # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1378     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1379     "\0"
1380 # define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1381     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1382     "\0"
1383 # define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1384     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1385     "\0"
1386 # define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1387     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1388     "\0"
1389 # define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1390     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1391     "\0"
1392 # define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1393     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1394     "\0"
1395 # define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1396     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1397     "\0"
1398 # define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1399     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1400     "\0"
1401 # define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1402     gettext_noop ("Invalid range end")	/* REG_ERANGE */
1403     "\0"
1404 # define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1405     gettext_noop ("Memory exhausted") /* REG_ESPACE */
1406     "\0"
1407 # define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1408     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1409     "\0"
1410 # define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1411     gettext_noop ("Premature end of regular expression") /* REG_EEND */
1412     "\0"
1413 # define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1414     gettext_noop ("Regular expression too big") /* REG_ESIZE */
1415     "\0"
1416 # define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1417     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1418   };
1419 
1420 static const size_t re_error_msgid_idx[] =
1421   {
1422     REG_NOERROR_IDX,
1423     REG_NOMATCH_IDX,
1424     REG_BADPAT_IDX,
1425     REG_ECOLLATE_IDX,
1426     REG_ECTYPE_IDX,
1427     REG_EESCAPE_IDX,
1428     REG_ESUBREG_IDX,
1429     REG_EBRACK_IDX,
1430     REG_EPAREN_IDX,
1431     REG_EBRACE_IDX,
1432     REG_BADBR_IDX,
1433     REG_ERANGE_IDX,
1434     REG_ESPACE_IDX,
1435     REG_BADRPT_IDX,
1436     REG_EEND_IDX,
1437     REG_ESIZE_IDX,
1438     REG_ERPAREN_IDX
1439   };
1440 
1441 #endif /* INSIDE_RECURSION */
1442 
1443 #ifndef DEFINED_ONCE
1444 /* Avoiding alloca during matching, to placate r_alloc.  */
1445 
1446 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1447    searching and matching functions should not call alloca.  On some
1448    systems, alloca is implemented in terms of malloc, and if we're
1449    using the relocating allocator routines, then malloc could cause a
1450    relocation, which might (if the strings being searched are in the
1451    ralloc heap) shift the data out from underneath the regexp
1452    routines.
1453 
1454    Here's another reason to avoid allocation: Emacs
1455    processes input from X in a signal handler; processing X input may
1456    call malloc; if input arrives while a matching routine is calling
1457    malloc, then we're scrod.  But Emacs can't just block input while
1458    calling matching routines; then we don't notice interrupts when
1459    they come in.  So, Emacs blocks input around all regexp calls
1460    except the matching calls, which it leaves unprotected, in the
1461    faith that they will not malloc.  */
1462 
1463 /* Normally, this is fine.  */
1464 # define MATCH_MAY_ALLOCATE
1465 
1466 /* When using GNU C, we are not REALLY using the C alloca, no matter
1467    what config.h may say.  So don't take precautions for it.  */
1468 # ifdef __GNUC__
1469 #  undef C_ALLOCA
1470 # endif
1471 
1472 /* The match routines may not allocate if (1) they would do it with malloc
1473    and (2) it's not safe for them to use malloc.
1474    Note that if REL_ALLOC is defined, matching would not use malloc for the
1475    failure stack, but we would still use it for the register vectors;
1476    so REL_ALLOC should not affect this.  */
1477 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1478 #  undef MATCH_MAY_ALLOCATE
1479 # endif
1480 #endif /* not DEFINED_ONCE */
1481 
1482 #ifdef INSIDE_RECURSION
1483 /* Failure stack declarations and macros; both re_compile_fastmap and
1484    re_match_2 use a failure stack.  These have to be macros because of
1485    REGEX_ALLOCATE_STACK.  */
1486 
1487 
1488 /* Number of failure points for which to initially allocate space
1489    when matching.  If this number is exceeded, we allocate more
1490    space, so it is not a hard limit.  */
1491 # ifndef INIT_FAILURE_ALLOC
1492 #  define INIT_FAILURE_ALLOC 5
1493 # endif
1494 
1495 /* Roughly the maximum number of failure points on the stack.  Would be
1496    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1497    This is a variable only so users of regex can assign to it; we never
1498    change it ourselves.  */
1499 
1500 # ifdef INT_IS_16BIT
1501 
1502 #  ifndef DEFINED_ONCE
1503 #   if defined MATCH_MAY_ALLOCATE
1504 /* 4400 was enough to cause a crash on Alpha OSF/1,
1505    whose default stack limit is 2mb.  */
1506 long int re_max_failures = 4000;
1507 #   else
1508 long int re_max_failures = 2000;
1509 #   endif
1510 #  endif
1511 
PREFIX(fail_stack_elt)1512 union PREFIX(fail_stack_elt)
1513 {
1514   UCHAR_T *pointer;
1515   long int integer;
1516 };
1517 
1518 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1519 
1520 typedef struct
1521 {
1522   PREFIX(fail_stack_elt_t) *stack;
1523   unsigned long int size;
1524   unsigned long int avail;		/* Offset of next open position.  */
1525 } PREFIX(fail_stack_type);
1526 
1527 # else /* not INT_IS_16BIT */
1528 
1529 #  ifndef DEFINED_ONCE
1530 #   if defined MATCH_MAY_ALLOCATE
1531 /* 4400 was enough to cause a crash on Alpha OSF/1,
1532    whose default stack limit is 2mb.  */
1533 int re_max_failures = 4000;
1534 #   else
1535 int re_max_failures = 2000;
1536 #   endif
1537 #  endif
1538 
PREFIX(fail_stack_elt)1539 union PREFIX(fail_stack_elt)
1540 {
1541   UCHAR_T *pointer;
1542   int integer;
1543 };
1544 
1545 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1546 
1547 typedef struct
1548 {
1549   PREFIX(fail_stack_elt_t) *stack;
1550   unsigned size;
1551   unsigned avail;			/* Offset of next open position.  */
1552 } PREFIX(fail_stack_type);
1553 
1554 # endif /* INT_IS_16BIT */
1555 
1556 # ifndef DEFINED_ONCE
1557 #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1558 #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1559 #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1560 # endif
1561 
1562 
1563 /* Define macros to initialize and free the failure stack.
1564    Do `return -2' if the alloc fails.  */
1565 
1566 # ifdef MATCH_MAY_ALLOCATE
1567 #  define INIT_FAIL_STACK()						\
1568   do {									\
1569     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
1570       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1571 									\
1572     if (fail_stack.stack == NULL)				\
1573       return -2;							\
1574 									\
1575     fail_stack.size = INIT_FAILURE_ALLOC;			\
1576     fail_stack.avail = 0;					\
1577   } while (0)
1578 
1579 #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1580 # else
1581 #  define INIT_FAIL_STACK()						\
1582   do {									\
1583     fail_stack.avail = 0;					\
1584   } while (0)
1585 
1586 #  define RESET_FAIL_STACK()
1587 # endif
1588 
1589 
1590 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1591 
1592    Return 1 if succeeds, and 0 if either ran out of memory
1593    allocating space for it or it was already too large.
1594 
1595    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1596 
1597 # define DOUBLE_FAIL_STACK(fail_stack)					\
1598   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1599    ? 0									\
1600    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
1601         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1602           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
1603           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1604 									\
1605       (fail_stack).stack == NULL					\
1606       ? 0								\
1607       : ((fail_stack).size <<= 1, 					\
1608          1)))
1609 
1610 
1611 /* Push pointer POINTER on FAIL_STACK.
1612    Return 1 if was able to do so and 0 if ran out of memory allocating
1613    space to do so.  */
1614 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1615   ((FAIL_STACK_FULL ()							\
1616     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1617    ? 0									\
1618    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1619       1))
1620 
1621 /* Push a pointer value onto the failure stack.
1622    Assumes the variable `fail_stack'.  Probably should only
1623    be called from within `PUSH_FAILURE_POINT'.  */
1624 # define PUSH_FAILURE_POINTER(item)					\
1625   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1626 
1627 /* This pushes an integer-valued item onto the failure stack.
1628    Assumes the variable `fail_stack'.  Probably should only
1629    be called from within `PUSH_FAILURE_POINT'.  */
1630 # define PUSH_FAILURE_INT(item)					\
1631   fail_stack.stack[fail_stack.avail++].integer = (item)
1632 
1633 /* Push a fail_stack_elt_t value onto the failure stack.
1634    Assumes the variable `fail_stack'.  Probably should only
1635    be called from within `PUSH_FAILURE_POINT'.  */
1636 # define PUSH_FAILURE_ELT(item)					\
1637   fail_stack.stack[fail_stack.avail++] =  (item)
1638 
1639 /* These three POP... operations complement the three PUSH... operations.
1640    All assume that `fail_stack' is nonempty.  */
1641 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1642 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1643 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1644 
1645 /* Used to omit pushing failure point id's when we're not debugging.  */
1646 # ifdef DEBUG
1647 #  define DEBUG_PUSH PUSH_FAILURE_INT
1648 #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1649 # else
1650 #  define DEBUG_PUSH(item)
1651 #  define DEBUG_POP(item_addr)
1652 # endif
1653 
1654 
1655 /* Push the information about the state we will need
1656    if we ever fail back to it.
1657 
1658    Requires variables fail_stack, regstart, regend, reg_info, and
1659    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1660    be declared.
1661 
1662    Does `return FAILURE_CODE' if runs out of memory.  */
1663 
1664 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1665   do {									\
1666     char *destination;							\
1667     /* Must be int, so when we don't save any registers, the arithmetic	\
1668        of 0 + -1 isn't done as unsigned.  */				\
1669     /* Can't be int, since there is not a shred of a guarantee that int	\
1670        is wide enough to hold a value of something to which pointer can	\
1671        be assigned */							\
1672     active_reg_t this_reg;						\
1673     									\
1674     DEBUG_STATEMENT (failure_id++);					\
1675     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1676     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1677     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1678     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1679 									\
1680     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1681     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1682 									\
1683     /* Ensure we have enough space allocated for what we will push.  */	\
1684     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1685       {									\
1686         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1687           return failure_code;						\
1688 									\
1689         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1690 		       (fail_stack).size);				\
1691         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1692       }									\
1693 									\
1694     /* Push the info, starting with the registers.  */			\
1695     DEBUG_PRINT1 ("\n");						\
1696 									\
1697     if (1)								\
1698       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1699 	   this_reg++)							\
1700 	{								\
1701 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1702 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1703 									\
1704 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1705 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1706 									\
1707 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1708 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1709 									\
1710 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1711 			reg_info[this_reg].word.pointer);		\
1712 	  DEBUG_PRINT2 (" match_null=%d",				\
1713 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1714 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1715 	  DEBUG_PRINT2 (" matched_something=%d",			\
1716 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1717 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1718 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1719 	  DEBUG_PRINT1 ("\n");						\
1720 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1721 	}								\
1722 									\
1723     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1724     PUSH_FAILURE_INT (lowest_active_reg);				\
1725 									\
1726     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1727     PUSH_FAILURE_INT (highest_active_reg);				\
1728 									\
1729     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1730     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1731     PUSH_FAILURE_POINTER (pattern_place);				\
1732 									\
1733     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1734     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1735 				 size2);				\
1736     DEBUG_PRINT1 ("'\n");						\
1737     PUSH_FAILURE_POINTER (string_place);				\
1738 									\
1739     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1740     DEBUG_PUSH (failure_id);						\
1741   } while (0)
1742 
1743 # ifndef DEFINED_ONCE
1744 /* This is the number of items that are pushed and popped on the stack
1745    for each register.  */
1746 #  define NUM_REG_ITEMS  3
1747 
1748 /* Individual items aside from the registers.  */
1749 #  ifdef DEBUG
1750 #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1751 #  else
1752 #   define NUM_NONREG_ITEMS 4
1753 #  endif
1754 
1755 /* We push at most this many items on the stack.  */
1756 /* We used to use (num_regs - 1), which is the number of registers
1757    this regexp will save; but that was changed to 5
1758    to avoid stack overflow for a regexp with lots of parens.  */
1759 #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1760 
1761 /* We actually push this many items.  */
1762 #  define NUM_FAILURE_ITEMS				\
1763   (((0							\
1764      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1765     * NUM_REG_ITEMS)					\
1766    + NUM_NONREG_ITEMS)
1767 
1768 /* How many items can still be added to the stack without overflowing it.  */
1769 #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1770 # endif /* not DEFINED_ONCE */
1771 
1772 
1773 /* Pops what PUSH_FAIL_STACK pushes.
1774 
1775    We restore into the parameters, all of which should be lvalues:
1776      STR -- the saved data position.
1777      PAT -- the saved pattern position.
1778      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1779      REGSTART, REGEND -- arrays of string positions.
1780      REG_INFO -- array of information about each subexpression.
1781 
1782    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1783    `pend', `string1', `size1', `string2', and `size2'.  */
1784 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1785 {									\
1786   DEBUG_STATEMENT (unsigned failure_id;)				\
1787   active_reg_t this_reg;						\
1788   const UCHAR_T *string_temp;						\
1789 									\
1790   assert (!FAIL_STACK_EMPTY ());					\
1791 									\
1792   /* Remove failure points and point to how many regs pushed.  */	\
1793   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1794   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1795   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1796 									\
1797   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1798 									\
1799   DEBUG_POP (&failure_id);						\
1800   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1801 									\
1802   /* If the saved string location is NULL, it came from an		\
1803      on_failure_keep_string_jump opcode, and we want to throw away the	\
1804      saved NULL, thus retaining our current position in the string.  */	\
1805   string_temp = POP_FAILURE_POINTER ();					\
1806   if (string_temp != NULL)						\
1807     str = (const CHAR_T *) string_temp;					\
1808 									\
1809   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1810   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1811   DEBUG_PRINT1 ("'\n");							\
1812 									\
1813   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
1814   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1815   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1816 									\
1817   /* Restore register info.  */						\
1818   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1819   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1820 									\
1821   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1822   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1823 									\
1824   if (1)								\
1825     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1826       {									\
1827 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1828 									\
1829 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1830 	DEBUG_PRINT2 ("      info: %p\n",				\
1831 		      reg_info[this_reg].word.pointer);			\
1832 									\
1833 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1834 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1835 									\
1836 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1837 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1838       }									\
1839   else									\
1840     {									\
1841       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1842 	{								\
1843 	  reg_info[this_reg].word.integer = 0;				\
1844 	  regend[this_reg] = 0;						\
1845 	  regstart[this_reg] = 0;					\
1846 	}								\
1847       highest_active_reg = high_reg;					\
1848     }									\
1849 									\
1850   set_regs_matched_done = 0;						\
1851   DEBUG_STATEMENT (nfailure_points_popped++);				\
1852 } /* POP_FAILURE_POINT */
1853 
1854 /* Structure for per-register (a.k.a. per-group) information.
1855    Other register information, such as the
1856    starting and ending positions (which are addresses), and the list of
1857    inner groups (which is a bits list) are maintained in separate
1858    variables.
1859 
1860    We are making a (strictly speaking) nonportable assumption here: that
1861    the compiler will pack our bit fields into something that fits into
1862    the type of `word', i.e., is something that fits into one item on the
1863    failure stack.  */
1864 
1865 
1866 /* Declarations and macros for re_match_2.  */
1867 
1868 typedef union
1869 {
1870   PREFIX(fail_stack_elt_t) word;
1871   struct
1872   {
1873       /* This field is one if this group can match the empty string,
1874          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1875 # define MATCH_NULL_UNSET_VALUE 3
1876     unsigned match_null_string_p : 2;
1877     unsigned is_active : 1;
1878     unsigned matched_something : 1;
1879     unsigned ever_matched_something : 1;
1880   } bits;
1881 } PREFIX(register_info_type);
1882 
1883 # ifndef DEFINED_ONCE
1884 #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1885 #  define IS_ACTIVE(R)  ((R).bits.is_active)
1886 #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1887 #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1888 
1889 
1890 /* Call this when have matched a real character; it sets `matched' flags
1891    for the subexpressions which we are currently inside.  Also records
1892    that those subexprs have matched.  */
1893 #  define SET_REGS_MATCHED()						\
1894   do									\
1895     {									\
1896       if (!set_regs_matched_done)					\
1897 	{								\
1898 	  active_reg_t r;						\
1899 	  set_regs_matched_done = 1;					\
1900 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1901 	    {								\
1902 	      MATCHED_SOMETHING (reg_info[r])				\
1903 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1904 		= 1;							\
1905 	    }								\
1906 	}								\
1907     }									\
1908   while (0)
1909 # endif /* not DEFINED_ONCE */
1910 
1911 /* Registers are set to a sentinel when they haven't yet matched.  */
1912 static CHAR_T PREFIX(reg_unset_dummy);
1913 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1914 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1915 
1916 /* Subroutine declarations and macros for regex_compile.  */
1917 static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
1918 static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1919 				 int arg1, int arg2));
1920 static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1921 				  int arg, UCHAR_T *end));
1922 static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1923 				  int arg1, int arg2, UCHAR_T *end));
1924 static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
1925 					   const CHAR_T *p,
1926 					   reg_syntax_t syntax));
1927 static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
1928 					   const CHAR_T *pend,
1929 					   reg_syntax_t syntax));
1930 # ifdef WCHAR
1931 static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
1932 						  const CHAR_T **p_ptr,
1933 						  const CHAR_T *pend,
1934 						  char *translate,
1935 						  reg_syntax_t syntax,
1936 						  UCHAR_T *b,
1937 						  CHAR_T *char_set));
1938 static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
1939 # else /* BYTE */
1940 static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
1941 						   const char **p_ptr,
1942 						   const char *pend,
1943 						   char *translate,
1944 						   reg_syntax_t syntax,
1945 						   unsigned char *b));
1946 # endif /* WCHAR */
1947 
1948 /* Fetch the next character in the uncompiled pattern---translating it
1949    if necessary.  Also cast from a signed character in the constant
1950    string passed to us by the user to an unsigned char that we can use
1951    as an array index (in, e.g., `translate').  */
1952 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1953    because it is impossible to allocate 4GB array for some encodings
1954    which have 4 byte character_set like UCS4.  */
1955 # ifndef PATFETCH
1956 #  ifdef WCHAR
1957 #   define PATFETCH(c)							\
1958   do {if (p == pend) return REG_EEND;					\
1959     c = (UCHAR_T) *p++;							\
1960     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
1961   } while (0)
1962 #  else /* BYTE */
1963 #   define PATFETCH(c)							\
1964   do {if (p == pend) return REG_EEND;					\
1965     c = (unsigned char) *p++;						\
1966     if (translate) c = (unsigned char) translate[c];			\
1967   } while (0)
1968 #  endif /* WCHAR */
1969 # endif
1970 
1971 /* Fetch the next character in the uncompiled pattern, with no
1972    translation.  */
1973 # define PATFETCH_RAW(c)						\
1974   do {if (p == pend) return REG_EEND;					\
1975     c = (UCHAR_T) *p++; 	       					\
1976   } while (0)
1977 
1978 /* Go backwards one character in the pattern.  */
1979 # define PATUNFETCH p--
1980 
1981 
1982 /* If `translate' is non-null, return translate[D], else just D.  We
1983    cast the subscript to translate because some data is declared as
1984    `char *', to avoid warnings when a string constant is passed.  But
1985    when we use a character as a subscript we must make it unsigned.  */
1986 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1987    because it is impossible to allocate 4GB array for some encodings
1988    which have 4 byte character_set like UCS4.  */
1989 
1990 # ifndef TRANSLATE
1991 #  ifdef WCHAR
1992 #   define TRANSLATE(d) \
1993   ((translate && ((UCHAR_T) (d)) <= 0xff) \
1994    ? (char) translate[(unsigned char) (d)] : (d))
1995 # else /* BYTE */
1996 #   define TRANSLATE(d) \
1997   (translate ? (char) translate[(unsigned char) (d)] : (d))
1998 #  endif /* WCHAR */
1999 # endif
2000 
2001 
2002 /* Macros for outputting the compiled pattern into `buffer'.  */
2003 
2004 /* If the buffer isn't allocated when it comes in, use this.  */
2005 # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
2006 
2007 /* Make sure we have at least N more bytes of space in buffer.  */
2008 # ifdef WCHAR
2009 #  define GET_BUFFER_SPACE(n)						\
2010     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
2011             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
2012       EXTEND_BUFFER ()
2013 # else /* BYTE */
2014 #  define GET_BUFFER_SPACE(n)						\
2015     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
2016       EXTEND_BUFFER ()
2017 # endif /* WCHAR */
2018 
2019 /* Make sure we have one more byte of buffer space and then add C to it.  */
2020 # define BUF_PUSH(c)							\
2021   do {									\
2022     GET_BUFFER_SPACE (1);						\
2023     *b++ = (UCHAR_T) (c);						\
2024   } while (0)
2025 
2026 
2027 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
2028 # define BUF_PUSH_2(c1, c2)						\
2029   do {									\
2030     GET_BUFFER_SPACE (2);						\
2031     *b++ = (UCHAR_T) (c1);						\
2032     *b++ = (UCHAR_T) (c2);						\
2033   } while (0)
2034 
2035 
2036 /* As with BUF_PUSH_2, except for three bytes.  */
2037 # define BUF_PUSH_3(c1, c2, c3)						\
2038   do {									\
2039     GET_BUFFER_SPACE (3);						\
2040     *b++ = (UCHAR_T) (c1);						\
2041     *b++ = (UCHAR_T) (c2);						\
2042     *b++ = (UCHAR_T) (c3);						\
2043   } while (0)
2044 
2045 /* Store a jump with opcode OP at LOC to location TO.  We store a
2046    relative address offset by the three bytes the jump itself occupies.  */
2047 # define STORE_JUMP(op, loc, to) \
2048  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
2049 
2050 /* Likewise, for a two-argument jump.  */
2051 # define STORE_JUMP2(op, loc, to, arg) \
2052   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
2053 
2054 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
2055 # define INSERT_JUMP(op, loc, to) \
2056   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
2057 
2058 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
2059 # define INSERT_JUMP2(op, loc, to, arg) \
2060   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
2061 	      arg, b)
2062 
2063 /* This is not an arbitrary limit: the arguments which represent offsets
2064    into the pattern are two bytes long.  So if 2^16 bytes turns out to
2065    be too small, many things would have to change.  */
2066 /* Any other compiler which, like MSC, has allocation limit below 2^16
2067    bytes will have to use approach similar to what was done below for
2068    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
2069    reallocating to 0 bytes.  Such thing is not going to work too well.
2070    You have been warned!!  */
2071 # ifndef DEFINED_ONCE
2072 #  if defined _MSC_VER  && !defined WIN32
2073 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2074    The REALLOC define eliminates a flurry of conversion warnings,
2075    but is not required. */
2076 #   define MAX_BUF_SIZE  65500L
2077 #   define REALLOC(p,s) realloc ((p), (size_t) (s))
2078 #  else
2079 #   define MAX_BUF_SIZE (1L << 16)
2080 #   define REALLOC(p,s) realloc ((p), (s))
2081 #  endif
2082 
2083 /* Extend the buffer by twice its current size via realloc and
2084    reset the pointers that pointed into the old block to point to the
2085    correct places in the new one.  If extending the buffer results in it
2086    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
2087 #  if __BOUNDED_POINTERS__
2088 #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2089 #   define MOVE_BUFFER_POINTER(P) \
2090   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2091 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
2092   else						\
2093     {						\
2094       SET_HIGH_BOUND (b);			\
2095       SET_HIGH_BOUND (begalt);			\
2096       if (fixup_alt_jump)			\
2097 	SET_HIGH_BOUND (fixup_alt_jump);	\
2098       if (laststart)				\
2099 	SET_HIGH_BOUND (laststart);		\
2100       if (pending_exact)			\
2101 	SET_HIGH_BOUND (pending_exact);		\
2102     }
2103 #  else
2104 #   define MOVE_BUFFER_POINTER(P) (P) += incr
2105 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
2106 #  endif
2107 # endif /* not DEFINED_ONCE */
2108 
2109 # ifdef WCHAR
2110 #  define EXTEND_BUFFER()						\
2111   do {									\
2112     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2113     int wchar_count;							\
2114     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
2115       return REG_ESIZE;							\
2116     bufp->allocated <<= 1;						\
2117     if (bufp->allocated > MAX_BUF_SIZE)					\
2118       bufp->allocated = MAX_BUF_SIZE;					\
2119     /* How many characters the new buffer can have?  */			\
2120     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
2121     if (wchar_count == 0) wchar_count = 1;				\
2122     /* Truncate the buffer to CHAR_T align.  */			\
2123     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
2124     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
2125     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2126     if (COMPILED_BUFFER_VAR == NULL)					\
2127       return REG_ESPACE;						\
2128     /* If the buffer moved, move all the pointers into it.  */		\
2129     if (old_buffer != COMPILED_BUFFER_VAR)				\
2130       {									\
2131 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2132 	MOVE_BUFFER_POINTER (b);					\
2133 	MOVE_BUFFER_POINTER (begalt);					\
2134 	if (fixup_alt_jump)						\
2135 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2136 	if (laststart)							\
2137 	  MOVE_BUFFER_POINTER (laststart);				\
2138 	if (pending_exact)						\
2139 	  MOVE_BUFFER_POINTER (pending_exact);				\
2140       }									\
2141     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2142   } while (0)
2143 # else /* BYTE */
2144 #  define EXTEND_BUFFER()						\
2145   do {									\
2146     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2147     if (bufp->allocated == MAX_BUF_SIZE)				\
2148       return REG_ESIZE;							\
2149     bufp->allocated <<= 1;						\
2150     if (bufp->allocated > MAX_BUF_SIZE)					\
2151       bufp->allocated = MAX_BUF_SIZE;					\
2152     bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR,		\
2153 						bufp->allocated);	\
2154     if (COMPILED_BUFFER_VAR == NULL)					\
2155       return REG_ESPACE;						\
2156     /* If the buffer moved, move all the pointers into it.  */		\
2157     if (old_buffer != COMPILED_BUFFER_VAR)				\
2158       {									\
2159 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2160 	MOVE_BUFFER_POINTER (b);					\
2161 	MOVE_BUFFER_POINTER (begalt);					\
2162 	if (fixup_alt_jump)						\
2163 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2164 	if (laststart)							\
2165 	  MOVE_BUFFER_POINTER (laststart);				\
2166 	if (pending_exact)						\
2167 	  MOVE_BUFFER_POINTER (pending_exact);				\
2168       }									\
2169     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2170   } while (0)
2171 # endif /* WCHAR */
2172 
2173 # ifndef DEFINED_ONCE
2174 /* Since we have one byte reserved for the register number argument to
2175    {start,stop}_memory, the maximum number of groups we can report
2176    things about is what fits in that byte.  */
2177 #  define MAX_REGNUM 255
2178 
2179 /* But patterns can have more than `MAX_REGNUM' registers.  We just
2180    ignore the excess.  */
2181 typedef unsigned regnum_t;
2182 
2183 
2184 /* Macros for the compile stack.  */
2185 
2186 /* Since offsets can go either forwards or backwards, this type needs to
2187    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2188 /* int may be not enough when sizeof(int) == 2.  */
2189 typedef long pattern_offset_t;
2190 
2191 typedef struct
2192 {
2193   pattern_offset_t begalt_offset;
2194   pattern_offset_t fixup_alt_jump;
2195   pattern_offset_t inner_group_offset;
2196   pattern_offset_t laststart_offset;
2197   regnum_t regnum;
2198 } compile_stack_elt_t;
2199 
2200 
2201 typedef struct
2202 {
2203   compile_stack_elt_t *stack;
2204   unsigned size;
2205   unsigned avail;			/* Offset of next open position.  */
2206 } compile_stack_type;
2207 
2208 
2209 #  define INIT_COMPILE_STACK_SIZE 32
2210 
2211 #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2212 #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2213 
2214 /* The next available element.  */
2215 #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2216 
2217 # endif /* not DEFINED_ONCE */
2218 
2219 /* Set the bit for character C in a list.  */
2220 # ifndef DEFINED_ONCE
2221 #  define SET_LIST_BIT(c)                               \
2222   (b[((unsigned char) (c)) / BYTEWIDTH]               \
2223    |= 1 << (((unsigned char) c) % BYTEWIDTH))
2224 # endif /* DEFINED_ONCE */
2225 
2226 /* Get the next unsigned number in the uncompiled pattern.  */
2227 # define GET_UNSIGNED_NUMBER(num) \
2228   {									\
2229     while (p != pend)							\
2230       {									\
2231 	PATFETCH (c);							\
2232 	if (c < '0' || c > '9')						\
2233 	  break;							\
2234 	if (num <= RE_DUP_MAX)						\
2235 	  {								\
2236 	    if (num < 0)						\
2237 	      num = 0;							\
2238 	    num = num * 10 + c - '0';					\
2239 	  }								\
2240       }									\
2241   }
2242 
2243 # ifndef DEFINED_ONCE
2244 #  if defined _LIBC || WIDE_CHAR_SUPPORT
2245 /* The GNU C library provides support for user-defined character classes
2246    and the functions from ISO C amendement 1.  */
2247 #   ifdef CHARCLASS_NAME_MAX
2248 #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2249 #   else
2250 /* This shouldn't happen but some implementation might still have this
2251    problem.  Use a reasonable default value.  */
2252 #    define CHAR_CLASS_MAX_LENGTH 256
2253 #   endif
2254 
2255 #   ifdef _LIBC
2256 #    define IS_CHAR_CLASS(string) __wctype (string)
2257 #   else
2258 #    define IS_CHAR_CLASS(string) wctype (string)
2259 #   endif
2260 #  else
2261 #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2262 
2263 #   define IS_CHAR_CLASS(string)					\
2264    (STREQ (string, "alpha") || STREQ (string, "upper")			\
2265     || STREQ (string, "lower") || STREQ (string, "digit")		\
2266     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2267     || STREQ (string, "space") || STREQ (string, "print")		\
2268     || STREQ (string, "punct") || STREQ (string, "graph")		\
2269     || STREQ (string, "cntrl") || STREQ (string, "blank"))
2270 #  endif
2271 # endif /* DEFINED_ONCE */
2272 
2273 # ifndef MATCH_MAY_ALLOCATE
2274 
2275 /* If we cannot allocate large objects within re_match_2_internal,
2276    we make the fail stack and register vectors global.
2277    The fail stack, we grow to the maximum size when a regexp
2278    is compiled.
2279    The register vectors, we adjust in size each time we
2280    compile a regexp, according to the number of registers it needs.  */
2281 
2282 static PREFIX(fail_stack_type) fail_stack;
2283 
2284 /* Size with which the following vectors are currently allocated.
2285    That is so we can make them bigger as needed,
2286    but never make them smaller.  */
2287 #  ifdef DEFINED_ONCE
2288 static int regs_allocated_size;
2289 
2290 static const char **     regstart, **     regend;
2291 static const char ** old_regstart, ** old_regend;
2292 static const char **best_regstart, **best_regend;
2293 static const char **reg_dummy;
2294 #  endif /* DEFINED_ONCE */
2295 
2296 static PREFIX(register_info_type) *PREFIX(reg_info);
2297 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2298 
2299 /* Make the register vectors big enough for NUM_REGS registers,
2300    but don't make them smaller.  */
2301 
2302 static void
2303 PREFIX(regex_grow_registers) (num_regs)
2304      int num_regs;
2305 {
2306   if (num_regs > regs_allocated_size)
2307     {
2308       RETALLOC_IF (regstart,	 num_regs, const char *);
2309       RETALLOC_IF (regend,	 num_regs, const char *);
2310       RETALLOC_IF (old_regstart, num_regs, const char *);
2311       RETALLOC_IF (old_regend,	 num_regs, const char *);
2312       RETALLOC_IF (best_regstart, num_regs, const char *);
2313       RETALLOC_IF (best_regend,	 num_regs, const char *);
2314       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2315       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2316       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2317 
2318       regs_allocated_size = num_regs;
2319     }
2320 }
2321 
2322 # endif /* not MATCH_MAY_ALLOCATE */
2323 
2324 # ifndef DEFINED_ONCE
2325 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2326 						 compile_stack,
2327 						 regnum_t regnum));
2328 # endif /* not DEFINED_ONCE */
2329 
2330 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2331    Returns one of error codes defined in `regex.h', or zero for success.
2332 
2333    Assumes the `allocated' (and perhaps `buffer') and `translate'
2334    fields are set in BUFP on entry.
2335 
2336    If it succeeds, results are put in BUFP (if it returns an error, the
2337    contents of BUFP are undefined):
2338      `buffer' is the compiled pattern;
2339      `syntax' is set to SYNTAX;
2340      `used' is set to the length of the compiled pattern;
2341      `fastmap_accurate' is zero;
2342      `re_nsub' is the number of subexpressions in PATTERN;
2343      `not_bol' and `not_eol' are zero;
2344 
2345    The `fastmap' and `newline_anchor' fields are neither
2346    examined nor set.  */
2347 
2348 /* Return, freeing storage we allocated.  */
2349 # ifdef WCHAR
2350 #  define FREE_STACK_RETURN(value)		\
2351   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2352 # else
2353 #  define FREE_STACK_RETURN(value)		\
2354   return (free (compile_stack.stack), value)
2355 # endif /* WCHAR */
2356 
2357 static reg_errcode_t
2358 PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
2359      const char *ARG_PREFIX(pattern);
2360      size_t ARG_PREFIX(size);
2361      reg_syntax_t syntax;
2362      struct re_pattern_buffer *bufp;
2363 {
2364   /* We fetch characters from PATTERN here.  Even though PATTERN is
2365      `char *' (i.e., signed), we declare these variables as unsigned, so
2366      they can be reliably used as array indices.  */
2367   register UCHAR_T c, c1;
2368 
2369 #ifdef WCHAR
2370   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2371   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2372   size_t size;
2373   /* offset buffer for optimization. See convert_mbs_to_wc.  */
2374   int *mbs_offset = NULL;
2375   /* It hold whether each wchar_t is binary data or not.  */
2376   char *is_binary = NULL;
2377   /* A flag whether exactn is handling binary data or not.  */
2378   char is_exactn_bin = FALSE;
2379 #endif /* WCHAR */
2380 
2381   /* A random temporary spot in PATTERN.  */
2382   const CHAR_T *p1;
2383 
2384   /* Points to the end of the buffer, where we should append.  */
2385   register UCHAR_T *b;
2386 
2387   /* Keeps track of unclosed groups.  */
2388   compile_stack_type compile_stack;
2389 
2390   /* Points to the current (ending) position in the pattern.  */
2391 #ifdef WCHAR
2392   const CHAR_T *p;
2393   const CHAR_T *pend;
2394 #else /* BYTE */
2395   const CHAR_T *p = pattern;
2396   const CHAR_T *pend = pattern + size;
2397 #endif /* WCHAR */
2398 
2399   /* How to translate the characters in the pattern.  */
2400   RE_TRANSLATE_TYPE translate = bufp->translate;
2401 
2402   /* Address of the count-byte of the most recently inserted `exactn'
2403      command.  This makes it possible to tell if a new exact-match
2404      character can be added to that command or if the character requires
2405      a new `exactn' command.  */
2406   UCHAR_T *pending_exact = 0;
2407 
2408   /* Address of start of the most recently finished expression.
2409      This tells, e.g., postfix * where to find the start of its
2410      operand.  Reset at the beginning of groups and alternatives.  */
2411   UCHAR_T *laststart = 0;
2412 
2413   /* Address of beginning of regexp, or inside of last group.  */
2414   UCHAR_T *begalt;
2415 
2416   /* Address of the place where a forward jump should go to the end of
2417      the containing expression.  Each alternative of an `or' -- except the
2418      last -- ends with a forward jump of this sort.  */
2419   UCHAR_T *fixup_alt_jump = 0;
2420 
2421   /* Counts open-groups as they are encountered.  Remembered for the
2422      matching close-group on the compile stack, so the same register
2423      number is put in the stop_memory as the start_memory.  */
2424   regnum_t regnum = 0;
2425 
2426 #ifdef WCHAR
2427   /* Initialize the wchar_t PATTERN and offset_buffer.  */
2428   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2429   mbs_offset = TALLOC(csize + 1, int);
2430   is_binary = TALLOC(csize + 1, char);
2431   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2432     {
2433       free(pattern);
2434       free(mbs_offset);
2435       free(is_binary);
2436       return REG_ESPACE;
2437     }
2438   pattern[csize] = L'\0';	/* sentinel */
2439   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2440   pend = p + size;
2441   if (size < 0)
2442     {
2443       free(pattern);
2444       free(mbs_offset);
2445       free(is_binary);
2446       return REG_BADPAT;
2447     }
2448 #endif
2449 
2450 #ifdef DEBUG
2451   DEBUG_PRINT1 ("\nCompiling pattern: ");
2452   if (debug)
2453     {
2454       unsigned debug_count;
2455 
2456       for (debug_count = 0; debug_count < size; debug_count++)
2457         PUT_CHAR (pattern[debug_count]);
2458       putchar ('\n');
2459     }
2460 #endif /* DEBUG */
2461 
2462   /* Initialize the compile stack.  */
2463   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2464   if (compile_stack.stack == NULL)
2465     {
2466 #ifdef WCHAR
2467       free(pattern);
2468       free(mbs_offset);
2469       free(is_binary);
2470 #endif
2471       return REG_ESPACE;
2472     }
2473 
2474   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2475   compile_stack.avail = 0;
2476 
2477   /* Initialize the pattern buffer.  */
2478   bufp->syntax = syntax;
2479   bufp->fastmap_accurate = 0;
2480   bufp->not_bol = bufp->not_eol = 0;
2481 
2482   /* Set `used' to zero, so that if we return an error, the pattern
2483      printer (for debugging) will think there's no pattern.  We reset it
2484      at the end.  */
2485   bufp->used = 0;
2486 
2487   /* Always count groups, whether or not bufp->no_sub is set.  */
2488   bufp->re_nsub = 0;
2489 
2490 #if !defined emacs && !defined SYNTAX_TABLE
2491   /* Initialize the syntax table.  */
2492    init_syntax_once ();
2493 #endif
2494 
2495   if (bufp->allocated == 0)
2496     {
2497       if (bufp->buffer)
2498 	{ /* If zero allocated, but buffer is non-null, try to realloc
2499              enough space.  This loses if buffer's address is bogus, but
2500              that is the user's responsibility.  */
2501 #ifdef WCHAR
2502 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2503 	     buffer.  */
2504           free(bufp->buffer);
2505           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2506 					UCHAR_T);
2507 #else
2508           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2509 #endif /* WCHAR */
2510         }
2511       else
2512         { /* Caller did not allocate a buffer.  Do it for them.  */
2513           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2514 					UCHAR_T);
2515         }
2516 
2517       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2518 #ifdef WCHAR
2519       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2520 #endif /* WCHAR */
2521       bufp->allocated = INIT_BUF_SIZE;
2522     }
2523 #ifdef WCHAR
2524   else
2525     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2526 #endif
2527 
2528   begalt = b = COMPILED_BUFFER_VAR;
2529 
2530   /* Loop through the uncompiled pattern until we're at the end.  */
2531   while (p != pend)
2532     {
2533       PATFETCH (c);
2534 
2535       switch (c)
2536         {
2537         case '^':
2538           {
2539             if (   /* If at start of pattern, it's an operator.  */
2540                    p == pattern + 1
2541                    /* If context independent, it's an operator.  */
2542                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2543                    /* Otherwise, depends on what's come before.  */
2544                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2545               BUF_PUSH (begline);
2546             else
2547               goto normal_char;
2548           }
2549           break;
2550 
2551 
2552         case '$':
2553           {
2554             if (   /* If at end of pattern, it's an operator.  */
2555                    p == pend
2556                    /* If context independent, it's an operator.  */
2557                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2558                    /* Otherwise, depends on what's next.  */
2559                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
2560                BUF_PUSH (endline);
2561              else
2562                goto normal_char;
2563            }
2564            break;
2565 
2566 
2567 	case '+':
2568         case '?':
2569           if ((syntax & RE_BK_PLUS_QM)
2570               || (syntax & RE_LIMITED_OPS))
2571             goto normal_char;
2572         handle_plus:
2573         case '*':
2574           /* If there is no previous pattern... */
2575           if (!laststart)
2576             {
2577               if (syntax & RE_CONTEXT_INVALID_OPS)
2578                 FREE_STACK_RETURN (REG_BADRPT);
2579               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2580                 goto normal_char;
2581             }
2582 
2583           {
2584             /* Are we optimizing this jump?  */
2585             boolean keep_string_p = false;
2586 
2587             /* 1 means zero (many) matches is allowed.  */
2588             char zero_times_ok = 0, many_times_ok = 0;
2589 
2590             /* If there is a sequence of repetition chars, collapse it
2591                down to just one (the right one).  We can't combine
2592                interval operators with these because of, e.g., `a{2}*',
2593                which should only match an even number of `a's.  */
2594 
2595             for (;;)
2596               {
2597                 zero_times_ok |= c != '+';
2598                 many_times_ok |= c != '?';
2599 
2600                 if (p == pend)
2601                   break;
2602 
2603                 PATFETCH (c);
2604 
2605                 if (c == '*'
2606                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2607                   ;
2608 
2609                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2610                   {
2611                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2612 
2613                     PATFETCH (c1);
2614                     if (!(c1 == '+' || c1 == '?'))
2615                       {
2616                         PATUNFETCH;
2617                         PATUNFETCH;
2618                         break;
2619                       }
2620 
2621                     c = c1;
2622                   }
2623                 else
2624                   {
2625                     PATUNFETCH;
2626                     break;
2627                   }
2628 
2629                 /* If we get here, we found another repeat character.  */
2630                }
2631 
2632             /* Star, etc. applied to an empty pattern is equivalent
2633                to an empty pattern.  */
2634             if (!laststart)
2635               break;
2636 
2637             /* Now we know whether or not zero matches is allowed
2638                and also whether or not two or more matches is allowed.  */
2639             if (many_times_ok)
2640               { /* More than one repetition is allowed, so put in at the
2641                    end a backward relative jump from `b' to before the next
2642                    jump we're going to put in below (which jumps from
2643                    laststart to after this jump).
2644 
2645                    But if we are at the `*' in the exact sequence `.*\n',
2646                    insert an unconditional jump backwards to the .,
2647                    instead of the beginning of the loop.  This way we only
2648                    push a failure point once, instead of every time
2649                    through the loop.  */
2650                 assert (p - 1 > pattern);
2651 
2652                 /* Allocate the space for the jump.  */
2653                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2654 
2655                 /* We know we are not at the first character of the pattern,
2656                    because laststart was nonzero.  And we've already
2657                    incremented `p', by the way, to be the character after
2658                    the `*'.  Do we have to do something analogous here
2659                    for null bytes, because of RE_DOT_NOT_NULL?  */
2660                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2661 		    && zero_times_ok
2662                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2663                     && !(syntax & RE_DOT_NEWLINE))
2664                   { /* We have .*\n.  */
2665                     STORE_JUMP (jump, b, laststart);
2666                     keep_string_p = true;
2667                   }
2668                 else
2669                   /* Anything else.  */
2670                   STORE_JUMP (maybe_pop_jump, b, laststart -
2671 			      (1 + OFFSET_ADDRESS_SIZE));
2672 
2673                 /* We've added more stuff to the buffer.  */
2674                 b += 1 + OFFSET_ADDRESS_SIZE;
2675               }
2676 
2677             /* On failure, jump from laststart to b + 3, which will be the
2678                end of the buffer after this jump is inserted.  */
2679 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2680 	       'b + 3'.  */
2681             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2682             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2683                                        : on_failure_jump,
2684                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2685             pending_exact = 0;
2686             b += 1 + OFFSET_ADDRESS_SIZE;
2687 
2688             if (!zero_times_ok)
2689               {
2690                 /* At least one repetition is required, so insert a
2691                    `dummy_failure_jump' before the initial
2692                    `on_failure_jump' instruction of the loop. This
2693                    effects a skip over that instruction the first time
2694                    we hit that loop.  */
2695                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2696                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2697 			     2 + 2 * OFFSET_ADDRESS_SIZE);
2698                 b += 1 + OFFSET_ADDRESS_SIZE;
2699               }
2700             }
2701 	  break;
2702 
2703 
2704 	case '.':
2705           laststart = b;
2706           BUF_PUSH (anychar);
2707           break;
2708 
2709 
2710         case '[':
2711           {
2712             boolean had_char_class = false;
2713 #ifdef WCHAR
2714 	    CHAR_T range_start = 0xffffffff;
2715 #else
2716 	    unsigned int range_start = 0xffffffff;
2717 #endif
2718             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2719 
2720 #ifdef WCHAR
2721 	    /* We assume a charset(_not) structure as a wchar_t array.
2722 	       charset[0] = (re_opcode_t) charset(_not)
2723                charset[1] = l (= length of char_classes)
2724                charset[2] = m (= length of collating_symbols)
2725                charset[3] = n (= length of equivalence_classes)
2726 	       charset[4] = o (= length of char_ranges)
2727 	       charset[5] = p (= length of chars)
2728 
2729                charset[6] = char_class (wctype_t)
2730                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2731                          ...
2732                charset[l+5]  = char_class (wctype_t)
2733 
2734                charset[l+6]  = collating_symbol (wchar_t)
2735                             ...
2736                charset[l+m+5]  = collating_symbol (wchar_t)
2737 					ifdef _LIBC we use the index if
2738 					_NL_COLLATE_SYMB_EXTRAMB instead of
2739 					wchar_t string.
2740 
2741                charset[l+m+6]  = equivalence_classes (wchar_t)
2742                               ...
2743                charset[l+m+n+5]  = equivalence_classes (wchar_t)
2744 					ifdef _LIBC we use the index in
2745 					_NL_COLLATE_WEIGHT instead of
2746 					wchar_t string.
2747 
2748 	       charset[l+m+n+6] = range_start
2749 	       charset[l+m+n+7] = range_end
2750 	                       ...
2751 	       charset[l+m+n+2o+4] = range_start
2752 	       charset[l+m+n+2o+5] = range_end
2753 					ifdef _LIBC we use the value looked up
2754 					in _NL_COLLATE_COLLSEQ instead of
2755 					wchar_t character.
2756 
2757 	       charset[l+m+n+2o+6] = char
2758 	                          ...
2759 	       charset[l+m+n+2o+p+5] = char
2760 
2761 	     */
2762 
2763 	    /* We need at least 6 spaces: the opcode, the length of
2764                char_classes, the length of collating_symbols, the length of
2765                equivalence_classes, the length of char_ranges, the length of
2766                chars.  */
2767 	    GET_BUFFER_SPACE (6);
2768 
2769 	    /* Save b as laststart. And We use laststart as the pointer
2770 	       to the first element of the charset here.
2771 	       In other words, laststart[i] indicates charset[i].  */
2772             laststart = b;
2773 
2774             /* We test `*p == '^' twice, instead of using an if
2775                statement, so we only need one BUF_PUSH.  */
2776             BUF_PUSH (*p == '^' ? charset_not : charset);
2777             if (*p == '^')
2778               p++;
2779 
2780             /* Push the length of char_classes, the length of
2781                collating_symbols, the length of equivalence_classes, the
2782                length of char_ranges and the length of chars.  */
2783             BUF_PUSH_3 (0, 0, 0);
2784             BUF_PUSH_2 (0, 0);
2785 
2786             /* Remember the first position in the bracket expression.  */
2787             p1 = p;
2788 
2789             /* charset_not matches newline according to a syntax bit.  */
2790             if ((re_opcode_t) b[-6] == charset_not
2791                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2792 	      {
2793 		BUF_PUSH('\n');
2794 		laststart[5]++; /* Update the length of characters  */
2795 	      }
2796 
2797             /* Read in characters and ranges, setting map bits.  */
2798             for (;;)
2799               {
2800                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801 
2802                 PATFETCH (c);
2803 
2804                 /* \ might escape characters inside [...] and [^...].  */
2805                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2806                   {
2807                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2808 
2809                     PATFETCH (c1);
2810 		    BUF_PUSH(c1);
2811 		    laststart[5]++; /* Update the length of chars  */
2812 		    range_start = c1;
2813                     continue;
2814                   }
2815 
2816                 /* Could be the end of the bracket expression.  If it's
2817                    not (i.e., when the bracket expression is `[]' so
2818                    far), the ']' character bit gets set way below.  */
2819                 if (c == ']' && p != p1 + 1)
2820                   break;
2821 
2822                 /* Look ahead to see if it's a range when the last thing
2823                    was a character class.  */
2824                 if (had_char_class && c == '-' && *p != ']')
2825                   FREE_STACK_RETURN (REG_ERANGE);
2826 
2827                 /* Look ahead to see if it's a range when the last thing
2828                    was a character: if this is a hyphen not at the
2829                    beginning or the end of a list, then it's the range
2830                    operator.  */
2831                 if (c == '-'
2832                     && !(p - 2 >= pattern && p[-2] == '[')
2833                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2834                     && *p != ']')
2835                   {
2836                     reg_errcode_t ret;
2837 		    /* Allocate the space for range_start and range_end.  */
2838 		    GET_BUFFER_SPACE (2);
2839 		    /* Update the pointer to indicate end of buffer.  */
2840                     b += 2;
2841                     ret = wcs_compile_range (range_start, &p, pend, translate,
2842                                          syntax, b, laststart);
2843                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2844                     range_start = 0xffffffff;
2845                   }
2846                 else if (p[0] == '-' && p[1] != ']')
2847                   { /* This handles ranges made up of characters only.  */
2848                     reg_errcode_t ret;
2849 
2850 		    /* Move past the `-'.  */
2851                     PATFETCH (c1);
2852 		    /* Allocate the space for range_start and range_end.  */
2853 		    GET_BUFFER_SPACE (2);
2854 		    /* Update the pointer to indicate end of buffer.  */
2855                     b += 2;
2856                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2857                                          laststart);
2858                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2859 		    range_start = 0xffffffff;
2860                   }
2861 
2862                 /* See if we're at the beginning of a possible character
2863                    class.  */
2864                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2865                   { /* Leave room for the null.  */
2866                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2867 
2868                     PATFETCH (c);
2869                     c1 = 0;
2870 
2871                     /* If pattern is `[[:'.  */
2872                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2873 
2874                     for (;;)
2875                       {
2876                         PATFETCH (c);
2877                         if ((c == ':' && *p == ']') || p == pend)
2878                           break;
2879 			if (c1 < CHAR_CLASS_MAX_LENGTH)
2880 			  str[c1++] = c;
2881 			else
2882 			  /* This is in any case an invalid class name.  */
2883 			  str[0] = '\0';
2884                       }
2885                     str[c1] = '\0';
2886 
2887                     /* If isn't a word bracketed by `[:' and `:]':
2888                        undo the ending character, the letters, and leave
2889                        the leading `:' and `[' (but store them as character).  */
2890                     if (c == ':' && *p == ']')
2891                       {
2892 			wctype_t wt;
2893 			uintptr_t alignedp;
2894 
2895 			/* Query the character class as wctype_t.  */
2896 			wt = IS_CHAR_CLASS (str);
2897 			if (wt == 0)
2898 			  FREE_STACK_RETURN (REG_ECTYPE);
2899 
2900                         /* Throw away the ] at the end of the character
2901                            class.  */
2902                         PATFETCH (c);
2903 
2904                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2905 
2906 			/* Allocate the space for character class.  */
2907                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2908 			/* Update the pointer to indicate end of buffer.  */
2909                         b += CHAR_CLASS_SIZE;
2910 			/* Move data which follow character classes
2911 			    not to violate the data.  */
2912                         insert_space(CHAR_CLASS_SIZE,
2913 				     laststart + 6 + laststart[1],
2914 				     b - 1);
2915 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2916 				    + __alignof__(wctype_t) - 1)
2917 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2918 			/* Store the character class.  */
2919                         *((wctype_t*)alignedp) = wt;
2920                         /* Update length of char_classes */
2921                         laststart[1] += CHAR_CLASS_SIZE;
2922 
2923                         had_char_class = true;
2924                       }
2925                     else
2926                       {
2927                         c1++;
2928                         while (c1--)
2929                           PATUNFETCH;
2930                         BUF_PUSH ('[');
2931                         BUF_PUSH (':');
2932                         laststart[5] += 2; /* Update the length of characters  */
2933 			range_start = ':';
2934                         had_char_class = false;
2935                       }
2936                   }
2937                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2938 							  || *p == '.'))
2939 		  {
2940 		    CHAR_T str[128];	/* Should be large enough.  */
2941 		    CHAR_T delim = *p; /* '=' or '.'  */
2942 # ifdef _LIBC
2943 		    uint32_t nrules =
2944 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2945 # endif
2946 		    PATFETCH (c);
2947 		    c1 = 0;
2948 
2949 		    /* If pattern is `[[=' or '[[.'.  */
2950 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2951 
2952 		    for (;;)
2953 		      {
2954 			PATFETCH (c);
2955 			if ((c == delim && *p == ']') || p == pend)
2956 			  break;
2957 			if (c1 < sizeof (str) - 1)
2958 			  str[c1++] = c;
2959 			else
2960 			  /* This is in any case an invalid class name.  */
2961 			  str[0] = '\0';
2962                       }
2963 		    str[c1] = '\0';
2964 
2965 		    if (c == delim && *p == ']' && str[0] != '\0')
2966 		      {
2967                         unsigned int i, offset;
2968 			/* If we have no collation data we use the default
2969 			   collation in which each character is in a class
2970 			   by itself.  It also means that ASCII is the
2971 			   character set and therefore we cannot have character
2972 			   with more than one byte in the multibyte
2973 			   representation.  */
2974 
2975                         /* If not defined _LIBC, we push the name and
2976 			   `\0' for the sake of matching performance.  */
2977 			int datasize = c1 + 1;
2978 
2979 # ifdef _LIBC
2980 			int32_t idx = 0;
2981 			if (nrules == 0)
2982 # endif
2983 			  {
2984 			    if (c1 != 1)
2985 			      FREE_STACK_RETURN (REG_ECOLLATE);
2986 			  }
2987 # ifdef _LIBC
2988 			else
2989 			  {
2990 			    const int32_t *table;
2991 			    const int32_t *weights;
2992 			    const int32_t *extra;
2993 			    const int32_t *indirect;
2994 			    wint_t *cp;
2995 
2996 			    /* This #include defines a local function!  */
2997 #  include <locale/weightwc.h>
2998 
2999 			    if(delim == '=')
3000 			      {
3001 				/* We push the index for equivalence class.  */
3002 				cp = (wint_t*)str;
3003 
3004 				table = (const int32_t *)
3005 				  _NL_CURRENT (LC_COLLATE,
3006 					       _NL_COLLATE_TABLEWC);
3007 				weights = (const int32_t *)
3008 				  _NL_CURRENT (LC_COLLATE,
3009 					       _NL_COLLATE_WEIGHTWC);
3010 				extra = (const int32_t *)
3011 				  _NL_CURRENT (LC_COLLATE,
3012 					       _NL_COLLATE_EXTRAWC);
3013 				indirect = (const int32_t *)
3014 				  _NL_CURRENT (LC_COLLATE,
3015 					       _NL_COLLATE_INDIRECTWC);
3016 
3017 				idx = findidx ((const wint_t**)&cp);
3018 				if (idx == 0 || cp < (wint_t*) str + c1)
3019 				  /* This is no valid character.  */
3020 				  FREE_STACK_RETURN (REG_ECOLLATE);
3021 
3022 				str[0] = (wchar_t)idx;
3023 			      }
3024 			    else /* delim == '.' */
3025 			      {
3026 				/* We push collation sequence value
3027 				   for collating symbol.  */
3028 				int32_t table_size;
3029 				const int32_t *symb_table;
3030 				const unsigned char *extra;
3031 				int32_t idx;
3032 				int32_t elem;
3033 				int32_t second;
3034 				int32_t hash;
3035 				char char_str[c1];
3036 
3037 				/* We have to convert the name to a single-byte
3038 				   string.  This is possible since the names
3039 				   consist of ASCII characters and the internal
3040 				   representation is UCS4.  */
3041 				for (i = 0; i < c1; ++i)
3042 				  char_str[i] = str[i];
3043 
3044 				table_size =
3045 				  _NL_CURRENT_WORD (LC_COLLATE,
3046 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
3047 				symb_table = (const int32_t *)
3048 				  _NL_CURRENT (LC_COLLATE,
3049 					       _NL_COLLATE_SYMB_TABLEMB);
3050 				extra = (const unsigned char *)
3051 				  _NL_CURRENT (LC_COLLATE,
3052 					       _NL_COLLATE_SYMB_EXTRAMB);
3053 
3054 				/* Locate the character in the hashing table.  */
3055 				hash = elem_hash (char_str, c1);
3056 
3057 				idx = 0;
3058 				elem = hash % table_size;
3059 				second = hash % (table_size - 2);
3060 				while (symb_table[2 * elem] != 0)
3061 				  {
3062 				    /* First compare the hashing value.  */
3063 				    if (symb_table[2 * elem] == hash
3064 					&& c1 == extra[symb_table[2 * elem + 1]]
3065 					&& memcmp (char_str,
3066 						   &extra[symb_table[2 * elem + 1]
3067 							 + 1], c1) == 0)
3068 				      {
3069 					/* Yep, this is the entry.  */
3070 					idx = symb_table[2 * elem + 1];
3071 					idx += 1 + extra[idx];
3072 					break;
3073 				      }
3074 
3075 				    /* Next entry.  */
3076 				    elem += second;
3077 				  }
3078 
3079 				if (symb_table[2 * elem] != 0)
3080 				  {
3081 				    /* Compute the index of the byte sequence
3082 				       in the table.  */
3083 				    idx += 1 + extra[idx];
3084 				    /* Adjust for the alignment.  */
3085 				    idx = (idx + 3) & ~3;
3086 
3087 				    str[0] = (wchar_t) idx + 4;
3088 				  }
3089 				else if (symb_table[2 * elem] == 0 && c1 == 1)
3090 				  {
3091 				    /* No valid character.  Match it as a
3092 				       single byte character.  */
3093 				    had_char_class = false;
3094 				    BUF_PUSH(str[0]);
3095 				    /* Update the length of characters  */
3096 				    laststart[5]++;
3097 				    range_start = str[0];
3098 
3099 				    /* Throw away the ] at the end of the
3100 				       collating symbol.  */
3101 				    PATFETCH (c);
3102 				    /* exit from the switch block.  */
3103 				    continue;
3104 				  }
3105 				else
3106 				  FREE_STACK_RETURN (REG_ECOLLATE);
3107 			      }
3108 			    datasize = 1;
3109 			  }
3110 # endif
3111                         /* Throw away the ] at the end of the equivalence
3112                            class (or collating symbol).  */
3113                         PATFETCH (c);
3114 
3115 			/* Allocate the space for the equivalence class
3116 			   (or collating symbol) (and '\0' if needed).  */
3117                         GET_BUFFER_SPACE(datasize);
3118 			/* Update the pointer to indicate end of buffer.  */
3119                         b += datasize;
3120 
3121 			if (delim == '=')
3122 			  { /* equivalence class  */
3123 			    /* Calculate the offset of char_ranges,
3124 			       which is next to equivalence_classes.  */
3125 			    offset = laststart[1] + laststart[2]
3126 			      + laststart[3] +6;
3127 			    /* Insert space.  */
3128 			    insert_space(datasize, laststart + offset, b - 1);
3129 
3130 			    /* Write the equivalence_class and \0.  */
3131 			    for (i = 0 ; i < datasize ; i++)
3132 			      laststart[offset + i] = str[i];
3133 
3134 			    /* Update the length of equivalence_classes.  */
3135 			    laststart[3] += datasize;
3136 			    had_char_class = true;
3137 			  }
3138 			else /* delim == '.' */
3139 			  { /* collating symbol  */
3140 			    /* Calculate the offset of the equivalence_classes,
3141 			       which is next to collating_symbols.  */
3142 			    offset = laststart[1] + laststart[2] + 6;
3143 			    /* Insert space and write the collationg_symbol
3144 			       and \0.  */
3145 			    insert_space(datasize, laststart + offset, b-1);
3146 			    for (i = 0 ; i < datasize ; i++)
3147 			      laststart[offset + i] = str[i];
3148 
3149 			    /* In re_match_2_internal if range_start < -1, we
3150 			       assume -range_start is the offset of the
3151 			       collating symbol which is specified as
3152 			       the character of the range start.  So we assign
3153 			       -(laststart[1] + laststart[2] + 6) to
3154 			       range_start.  */
3155 			    range_start = -(laststart[1] + laststart[2] + 6);
3156 			    /* Update the length of collating_symbol.  */
3157 			    laststart[2] += datasize;
3158 			    had_char_class = false;
3159 			  }
3160 		      }
3161                     else
3162                       {
3163                         c1++;
3164                         while (c1--)
3165                           PATUNFETCH;
3166                         BUF_PUSH ('[');
3167                         BUF_PUSH (delim);
3168                         laststart[5] += 2; /* Update the length of characters  */
3169 			range_start = delim;
3170                         had_char_class = false;
3171                       }
3172 		  }
3173                 else
3174                   {
3175                     had_char_class = false;
3176 		    BUF_PUSH(c);
3177 		    laststart[5]++;  /* Update the length of characters  */
3178 		    range_start = c;
3179                   }
3180 	      }
3181 
3182 #else /* BYTE */
3183             /* Ensure that we have enough space to push a charset: the
3184                opcode, the length count, and the bitset; 34 bytes in all.  */
3185 	    GET_BUFFER_SPACE (34);
3186 
3187             laststart = b;
3188 
3189             /* We test `*p == '^' twice, instead of using an if
3190                statement, so we only need one BUF_PUSH.  */
3191             BUF_PUSH (*p == '^' ? charset_not : charset);
3192             if (*p == '^')
3193               p++;
3194 
3195             /* Remember the first position in the bracket expression.  */
3196             p1 = p;
3197 
3198             /* Push the number of bytes in the bitmap.  */
3199             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3200 
3201             /* Clear the whole map.  */
3202             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3203 
3204             /* charset_not matches newline according to a syntax bit.  */
3205             if ((re_opcode_t) b[-2] == charset_not
3206                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3207               SET_LIST_BIT ('\n');
3208 
3209             /* Read in characters and ranges, setting map bits.  */
3210             for (;;)
3211               {
3212                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3213 
3214                 PATFETCH (c);
3215 
3216                 /* \ might escape characters inside [...] and [^...].  */
3217                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3218                   {
3219                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3220 
3221                     PATFETCH (c1);
3222                     SET_LIST_BIT (c1);
3223 		    range_start = c1;
3224                     continue;
3225                   }
3226 
3227                 /* Could be the end of the bracket expression.  If it's
3228                    not (i.e., when the bracket expression is `[]' so
3229                    far), the ']' character bit gets set way below.  */
3230                 if (c == ']' && p != p1 + 1)
3231                   break;
3232 
3233                 /* Look ahead to see if it's a range when the last thing
3234                    was a character class.  */
3235                 if (had_char_class && c == '-' && *p != ']')
3236                   FREE_STACK_RETURN (REG_ERANGE);
3237 
3238                 /* Look ahead to see if it's a range when the last thing
3239                    was a character: if this is a hyphen not at the
3240                    beginning or the end of a list, then it's the range
3241                    operator.  */
3242                 if (c == '-'
3243                     && !(p - 2 >= pattern && p[-2] == '[')
3244                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3245                     && *p != ']')
3246                   {
3247                     reg_errcode_t ret
3248                       = byte_compile_range (range_start, &p, pend, translate,
3249 					    syntax, b);
3250                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3251 		    range_start = 0xffffffff;
3252                   }
3253 
3254                 else if (p[0] == '-' && p[1] != ']')
3255                   { /* This handles ranges made up of characters only.  */
3256                     reg_errcode_t ret;
3257 
3258 		    /* Move past the `-'.  */
3259                     PATFETCH (c1);
3260 
3261                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3262                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3263 		    range_start = 0xffffffff;
3264                   }
3265 
3266                 /* See if we're at the beginning of a possible character
3267                    class.  */
3268 
3269                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3270                   { /* Leave room for the null.  */
3271                     char str[CHAR_CLASS_MAX_LENGTH + 1];
3272 
3273                     PATFETCH (c);
3274                     c1 = 0;
3275 
3276                     /* If pattern is `[[:'.  */
3277                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3278 
3279                     for (;;)
3280                       {
3281                         PATFETCH (c);
3282                         if ((c == ':' && *p == ']') || p == pend)
3283                           break;
3284 			if (c1 < CHAR_CLASS_MAX_LENGTH)
3285 			  str[c1++] = c;
3286 			else
3287 			  /* This is in any case an invalid class name.  */
3288 			  str[0] = '\0';
3289                       }
3290                     str[c1] = '\0';
3291 
3292                     /* If isn't a word bracketed by `[:' and `:]':
3293                        undo the ending character, the letters, and leave
3294                        the leading `:' and `[' (but set bits for them).  */
3295                     if (c == ':' && *p == ']')
3296                       {
3297 # if defined _LIBC || WIDE_CHAR_SUPPORT
3298                         boolean is_lower = STREQ (str, "lower");
3299                         boolean is_upper = STREQ (str, "upper");
3300 			wctype_t wt;
3301                         int ch;
3302 
3303 			wt = IS_CHAR_CLASS (str);
3304 			if (wt == 0)
3305 			  FREE_STACK_RETURN (REG_ECTYPE);
3306 
3307                         /* Throw away the ] at the end of the character
3308                            class.  */
3309                         PATFETCH (c);
3310 
3311                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3312 
3313                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3314 			  {
3315 			    if (iswctype (btowc (ch), wt))
3316 			      SET_LIST_BIT (ch);
3317 
3318 			    if (translate && (is_upper || is_lower)
3319 				&& (ISUPPER (ch) || ISLOWER (ch)))
3320 			      SET_LIST_BIT (ch);
3321 			  }
3322 
3323                         had_char_class = true;
3324 # else
3325                         int ch;
3326                         boolean is_alnum = STREQ (str, "alnum");
3327                         boolean is_alpha = STREQ (str, "alpha");
3328                         boolean is_blank = STREQ (str, "blank");
3329                         boolean is_cntrl = STREQ (str, "cntrl");
3330                         boolean is_digit = STREQ (str, "digit");
3331                         boolean is_graph = STREQ (str, "graph");
3332                         boolean is_lower = STREQ (str, "lower");
3333                         boolean is_print = STREQ (str, "print");
3334                         boolean is_punct = STREQ (str, "punct");
3335                         boolean is_space = STREQ (str, "space");
3336                         boolean is_upper = STREQ (str, "upper");
3337                         boolean is_xdigit = STREQ (str, "xdigit");
3338 
3339                         if (!IS_CHAR_CLASS (str))
3340 			  FREE_STACK_RETURN (REG_ECTYPE);
3341 
3342                         /* Throw away the ] at the end of the character
3343                            class.  */
3344                         PATFETCH (c);
3345 
3346                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3347 
3348                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3349                           {
3350 			    /* This was split into 3 if's to
3351 			       avoid an arbitrary limit in some compiler.  */
3352                             if (   (is_alnum  && ISALNUM (ch))
3353                                 || (is_alpha  && ISALPHA (ch))
3354                                 || (is_blank  && ISBLANK (ch))
3355                                 || (is_cntrl  && ISCNTRL (ch)))
3356 			      SET_LIST_BIT (ch);
3357 			    if (   (is_digit  && ISDIGIT (ch))
3358                                 || (is_graph  && ISGRAPH (ch))
3359                                 || (is_lower  && ISLOWER (ch))
3360                                 || (is_print  && ISPRINT (ch)))
3361 			      SET_LIST_BIT (ch);
3362 			    if (   (is_punct  && ISPUNCT (ch))
3363                                 || (is_space  && ISSPACE (ch))
3364                                 || (is_upper  && ISUPPER (ch))
3365                                 || (is_xdigit && ISXDIGIT (ch)))
3366 			      SET_LIST_BIT (ch);
3367 			    if (   translate && (is_upper || is_lower)
3368 				&& (ISUPPER (ch) || ISLOWER (ch)))
3369 			      SET_LIST_BIT (ch);
3370                           }
3371                         had_char_class = true;
3372 # endif	/* libc || wctype.h */
3373                       }
3374                     else
3375                       {
3376                         c1++;
3377                         while (c1--)
3378                           PATUNFETCH;
3379                         SET_LIST_BIT ('[');
3380                         SET_LIST_BIT (':');
3381 			range_start = ':';
3382                         had_char_class = false;
3383                       }
3384                   }
3385                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3386 		  {
3387 		    unsigned char str[MB_LEN_MAX + 1];
3388 # ifdef _LIBC
3389 		    uint32_t nrules =
3390 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3391 # endif
3392 
3393 		    PATFETCH (c);
3394 		    c1 = 0;
3395 
3396 		    /* If pattern is `[[='.  */
3397 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3398 
3399 		    for (;;)
3400 		      {
3401 			PATFETCH (c);
3402 			if ((c == '=' && *p == ']') || p == pend)
3403 			  break;
3404 			if (c1 < MB_LEN_MAX)
3405 			  str[c1++] = c;
3406 			else
3407 			  /* This is in any case an invalid class name.  */
3408 			  str[0] = '\0';
3409                       }
3410 		    str[c1] = '\0';
3411 
3412 		    if (c == '=' && *p == ']' && str[0] != '\0')
3413 		      {
3414 			/* If we have no collation data we use the default
3415 			   collation in which each character is in a class
3416 			   by itself.  It also means that ASCII is the
3417 			   character set and therefore we cannot have character
3418 			   with more than one byte in the multibyte
3419 			   representation.  */
3420 # ifdef _LIBC
3421 			if (nrules == 0)
3422 # endif
3423 			  {
3424 			    if (c1 != 1)
3425 			      FREE_STACK_RETURN (REG_ECOLLATE);
3426 
3427 			    /* Throw away the ] at the end of the equivalence
3428 			       class.  */
3429 			    PATFETCH (c);
3430 
3431 			    /* Set the bit for the character.  */
3432 			    SET_LIST_BIT (str[0]);
3433 			  }
3434 # ifdef _LIBC
3435 			else
3436 			  {
3437 			    /* Try to match the byte sequence in `str' against
3438 			       those known to the collate implementation.
3439 			       First find out whether the bytes in `str' are
3440 			       actually from exactly one character.  */
3441 			    const int32_t *table;
3442 			    const unsigned char *weights;
3443 			    const unsigned char *extra;
3444 			    const int32_t *indirect;
3445 			    int32_t idx;
3446 			    const unsigned char *cp = str;
3447 			    int ch;
3448 
3449 			    /* This #include defines a local function!  */
3450 #  include <locale/weight.h>
3451 
3452 			    table = (const int32_t *)
3453 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3454 			    weights = (const unsigned char *)
3455 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3456 			    extra = (const unsigned char *)
3457 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3458 			    indirect = (const int32_t *)
3459 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3460 
3461 			    idx = findidx (&cp);
3462 			    if (idx == 0 || cp < str + c1)
3463 			      /* This is no valid character.  */
3464 			      FREE_STACK_RETURN (REG_ECOLLATE);
3465 
3466 			    /* Throw away the ] at the end of the equivalence
3467 			       class.  */
3468 			    PATFETCH (c);
3469 
3470 			    /* Now we have to go throught the whole table
3471 			       and find all characters which have the same
3472 			       first level weight.
3473 
3474 			       XXX Note that this is not entirely correct.
3475 			       we would have to match multibyte sequences
3476 			       but this is not possible with the current
3477 			       implementation.  */
3478 			    for (ch = 1; ch < 256; ++ch)
3479 			      /* XXX This test would have to be changed if we
3480 				 would allow matching multibyte sequences.  */
3481 			      if (table[ch] > 0)
3482 				{
3483 				  int32_t idx2 = table[ch];
3484 				  size_t len = weights[idx2];
3485 
3486 				  /* Test whether the lenghts match.  */
3487 				  if (weights[idx] == len)
3488 				    {
3489 				      /* They do.  New compare the bytes of
3490 					 the weight.  */
3491 				      size_t cnt = 0;
3492 
3493 				      while (cnt < len
3494 					     && (weights[idx + 1 + cnt]
3495 						 == weights[idx2 + 1 + cnt]))
3496 					++cnt;
3497 
3498 				      if (cnt == len)
3499 					/* They match.  Mark the character as
3500 					   acceptable.  */
3501 					SET_LIST_BIT (ch);
3502 				    }
3503 				}
3504 			  }
3505 # endif
3506 			had_char_class = true;
3507 		      }
3508                     else
3509                       {
3510                         c1++;
3511                         while (c1--)
3512                           PATUNFETCH;
3513                         SET_LIST_BIT ('[');
3514                         SET_LIST_BIT ('=');
3515 			range_start = '=';
3516                         had_char_class = false;
3517                       }
3518 		  }
3519                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3520 		  {
3521 		    unsigned char str[128];	/* Should be large enough.  */
3522 # ifdef _LIBC
3523 		    uint32_t nrules =
3524 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3525 # endif
3526 
3527 		    PATFETCH (c);
3528 		    c1 = 0;
3529 
3530 		    /* If pattern is `[[.'.  */
3531 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3532 
3533 		    for (;;)
3534 		      {
3535 			PATFETCH (c);
3536 			if ((c == '.' && *p == ']') || p == pend)
3537 			  break;
3538 			if (c1 < sizeof (str))
3539 			  str[c1++] = c;
3540 			else
3541 			  /* This is in any case an invalid class name.  */
3542 			  str[0] = '\0';
3543                       }
3544 		    str[c1] = '\0';
3545 
3546 		    if (c == '.' && *p == ']' && str[0] != '\0')
3547 		      {
3548 			/* If we have no collation data we use the default
3549 			   collation in which each character is the name
3550 			   for its own class which contains only the one
3551 			   character.  It also means that ASCII is the
3552 			   character set and therefore we cannot have character
3553 			   with more than one byte in the multibyte
3554 			   representation.  */
3555 # ifdef _LIBC
3556 			if (nrules == 0)
3557 # endif
3558 			  {
3559 			    if (c1 != 1)
3560 			      FREE_STACK_RETURN (REG_ECOLLATE);
3561 
3562 			    /* Throw away the ] at the end of the equivalence
3563 			       class.  */
3564 			    PATFETCH (c);
3565 
3566 			    /* Set the bit for the character.  */
3567 			    SET_LIST_BIT (str[0]);
3568 			    range_start = ((const unsigned char *) str)[0];
3569 			  }
3570 # ifdef _LIBC
3571 			else
3572 			  {
3573 			    /* Try to match the byte sequence in `str' against
3574 			       those known to the collate implementation.
3575 			       First find out whether the bytes in `str' are
3576 			       actually from exactly one character.  */
3577 			    int32_t table_size;
3578 			    const int32_t *symb_table;
3579 			    const unsigned char *extra;
3580 			    int32_t idx;
3581 			    int32_t elem;
3582 			    int32_t second;
3583 			    int32_t hash;
3584 
3585 			    table_size =
3586 			      _NL_CURRENT_WORD (LC_COLLATE,
3587 						_NL_COLLATE_SYMB_HASH_SIZEMB);
3588 			    symb_table = (const int32_t *)
3589 			      _NL_CURRENT (LC_COLLATE,
3590 					   _NL_COLLATE_SYMB_TABLEMB);
3591 			    extra = (const unsigned char *)
3592 			      _NL_CURRENT (LC_COLLATE,
3593 					   _NL_COLLATE_SYMB_EXTRAMB);
3594 
3595 			    /* Locate the character in the hashing table.  */
3596 			    hash = elem_hash (str, c1);
3597 
3598 			    idx = 0;
3599 			    elem = hash % table_size;
3600 			    second = hash % (table_size - 2);
3601 			    while (symb_table[2 * elem] != 0)
3602 			      {
3603 				/* First compare the hashing value.  */
3604 				if (symb_table[2 * elem] == hash
3605 				    && c1 == extra[symb_table[2 * elem + 1]]
3606 				    && memcmp (str,
3607 					       &extra[symb_table[2 * elem + 1]
3608 						     + 1],
3609 					       c1) == 0)
3610 				  {
3611 				    /* Yep, this is the entry.  */
3612 				    idx = symb_table[2 * elem + 1];
3613 				    idx += 1 + extra[idx];
3614 				    break;
3615 				  }
3616 
3617 				/* Next entry.  */
3618 				elem += second;
3619 			      }
3620 
3621 			    if (symb_table[2 * elem] == 0)
3622 			      /* This is no valid character.  */
3623 			      FREE_STACK_RETURN (REG_ECOLLATE);
3624 
3625 			    /* Throw away the ] at the end of the equivalence
3626 			       class.  */
3627 			    PATFETCH (c);
3628 
3629 			    /* Now add the multibyte character(s) we found
3630 			       to the accept list.
3631 
3632 			       XXX Note that this is not entirely correct.
3633 			       we would have to match multibyte sequences
3634 			       but this is not possible with the current
3635 			       implementation.  Also, we have to match
3636 			       collating symbols, which expand to more than
3637 			       one file, as a whole and not allow the
3638 			       individual bytes.  */
3639 			    c1 = extra[idx++];
3640 			    if (c1 == 1)
3641 			      range_start = extra[idx];
3642 			    while (c1-- > 0)
3643 			      {
3644 				SET_LIST_BIT (extra[idx]);
3645 				++idx;
3646 			      }
3647 			  }
3648 # endif
3649 			had_char_class = false;
3650 		      }
3651                     else
3652                       {
3653                         c1++;
3654                         while (c1--)
3655                           PATUNFETCH;
3656                         SET_LIST_BIT ('[');
3657                         SET_LIST_BIT ('.');
3658 			range_start = '.';
3659                         had_char_class = false;
3660                       }
3661 		  }
3662                 else
3663                   {
3664                     had_char_class = false;
3665                     SET_LIST_BIT (c);
3666 		    range_start = c;
3667                   }
3668               }
3669 
3670             /* Discard any (non)matching list bytes that are all 0 at the
3671                end of the map.  Decrease the map-length byte too.  */
3672             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3673               b[-1]--;
3674             b += b[-1];
3675 #endif /* WCHAR */
3676           }
3677           break;
3678 
3679 
3680 	case '(':
3681           if (syntax & RE_NO_BK_PARENS)
3682             goto handle_open;
3683           else
3684             goto normal_char;
3685 
3686 
3687         case ')':
3688           if (syntax & RE_NO_BK_PARENS)
3689             goto handle_close;
3690           else
3691             goto normal_char;
3692 
3693 
3694         case '\n':
3695           if (syntax & RE_NEWLINE_ALT)
3696             goto handle_alt;
3697           else
3698             goto normal_char;
3699 
3700 
3701 	case '|':
3702           if (syntax & RE_NO_BK_VBAR)
3703             goto handle_alt;
3704           else
3705             goto normal_char;
3706 
3707 
3708         case '{':
3709            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3710              goto handle_interval;
3711            else
3712              goto normal_char;
3713 
3714 
3715         case '\\':
3716           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3717 
3718           /* Do not translate the character after the \, so that we can
3719              distinguish, e.g., \B from \b, even if we normally would
3720              translate, e.g., B to b.  */
3721           PATFETCH_RAW (c);
3722 
3723           switch (c)
3724             {
3725             case '(':
3726               if (syntax & RE_NO_BK_PARENS)
3727                 goto normal_backslash;
3728 
3729             handle_open:
3730               bufp->re_nsub++;
3731               regnum++;
3732 
3733               if (COMPILE_STACK_FULL)
3734                 {
3735                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
3736                             compile_stack_elt_t);
3737                   if (compile_stack.stack == NULL) return REG_ESPACE;
3738 
3739                   compile_stack.size <<= 1;
3740                 }
3741 
3742               /* These are the values to restore when we hit end of this
3743                  group.  They are all relative offsets, so that if the
3744                  whole pattern moves because of realloc, they will still
3745                  be valid.  */
3746               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3747               COMPILE_STACK_TOP.fixup_alt_jump
3748                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3749               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3750               COMPILE_STACK_TOP.regnum = regnum;
3751 
3752               /* We will eventually replace the 0 with the number of
3753                  groups inner to this one.  But do not push a
3754                  start_memory for groups beyond the last one we can
3755                  represent in the compiled pattern.  */
3756               if (regnum <= MAX_REGNUM)
3757                 {
3758                   COMPILE_STACK_TOP.inner_group_offset = b
3759 		    - COMPILED_BUFFER_VAR + 2;
3760                   BUF_PUSH_3 (start_memory, regnum, 0);
3761                 }
3762 
3763               compile_stack.avail++;
3764 
3765               fixup_alt_jump = 0;
3766               laststart = 0;
3767               begalt = b;
3768 	      /* If we've reached MAX_REGNUM groups, then this open
3769 		 won't actually generate any code, so we'll have to
3770 		 clear pending_exact explicitly.  */
3771 	      pending_exact = 0;
3772               break;
3773 
3774 
3775             case ')':
3776               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3777 
3778               if (COMPILE_STACK_EMPTY)
3779 		{
3780 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3781 		    goto normal_backslash;
3782 		  else
3783 		    FREE_STACK_RETURN (REG_ERPAREN);
3784 		}
3785 
3786             handle_close:
3787               if (fixup_alt_jump)
3788                 { /* Push a dummy failure point at the end of the
3789                      alternative for a possible future
3790                      `pop_failure_jump' to pop.  See comments at
3791                      `push_dummy_failure' in `re_match_2'.  */
3792                   BUF_PUSH (push_dummy_failure);
3793 
3794                   /* We allocated space for this jump when we assigned
3795                      to `fixup_alt_jump', in the `handle_alt' case below.  */
3796                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3797                 }
3798 
3799               /* See similar code for backslashed left paren above.  */
3800               if (COMPILE_STACK_EMPTY)
3801 		{
3802 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3803 		    goto normal_char;
3804 		  else
3805 		    FREE_STACK_RETURN (REG_ERPAREN);
3806 		}
3807 
3808               /* Since we just checked for an empty stack above, this
3809                  ``can't happen''.  */
3810               assert (compile_stack.avail != 0);
3811               {
3812                 /* We don't just want to restore into `regnum', because
3813                    later groups should continue to be numbered higher,
3814                    as in `(ab)c(de)' -- the second group is #2.  */
3815                 regnum_t this_group_regnum;
3816 
3817                 compile_stack.avail--;
3818                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3819                 fixup_alt_jump
3820                   = COMPILE_STACK_TOP.fixup_alt_jump
3821                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3822                     : 0;
3823                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3824                 this_group_regnum = COMPILE_STACK_TOP.regnum;
3825 		/* If we've reached MAX_REGNUM groups, then this open
3826 		   won't actually generate any code, so we'll have to
3827 		   clear pending_exact explicitly.  */
3828 		pending_exact = 0;
3829 
3830                 /* We're at the end of the group, so now we know how many
3831                    groups were inside this one.  */
3832                 if (this_group_regnum <= MAX_REGNUM)
3833                   {
3834 		    UCHAR_T *inner_group_loc
3835                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3836 
3837                     *inner_group_loc = regnum - this_group_regnum;
3838                     BUF_PUSH_3 (stop_memory, this_group_regnum,
3839                                 regnum - this_group_regnum);
3840                   }
3841               }
3842               break;
3843 
3844 
3845             case '|':					/* `\|'.  */
3846               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3847                 goto normal_backslash;
3848             handle_alt:
3849               if (syntax & RE_LIMITED_OPS)
3850                 goto normal_char;
3851 
3852               /* Insert before the previous alternative a jump which
3853                  jumps to this alternative if the former fails.  */
3854               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3855               INSERT_JUMP (on_failure_jump, begalt,
3856 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3857               pending_exact = 0;
3858               b += 1 + OFFSET_ADDRESS_SIZE;
3859 
3860               /* The alternative before this one has a jump after it
3861                  which gets executed if it gets matched.  Adjust that
3862                  jump so it will jump to this alternative's analogous
3863                  jump (put in below, which in turn will jump to the next
3864                  (if any) alternative's such jump, etc.).  The last such
3865                  jump jumps to the correct final destination.  A picture:
3866                           _____ _____
3867                           |   | |   |
3868                           |   v |   v
3869                          a | b   | c
3870 
3871                  If we are at `b', then fixup_alt_jump right now points to a
3872                  three-byte space after `a'.  We'll put in the jump, set
3873                  fixup_alt_jump to right after `b', and leave behind three
3874                  bytes which we'll fill in when we get to after `c'.  */
3875 
3876               if (fixup_alt_jump)
3877                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3878 
3879               /* Mark and leave space for a jump after this alternative,
3880                  to be filled in later either by next alternative or
3881                  when know we're at the end of a series of alternatives.  */
3882               fixup_alt_jump = b;
3883               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3884               b += 1 + OFFSET_ADDRESS_SIZE;
3885 
3886               laststart = 0;
3887               begalt = b;
3888               break;
3889 
3890 
3891             case '{':
3892               /* If \{ is a literal.  */
3893               if (!(syntax & RE_INTERVALS)
3894                      /* If we're at `\{' and it's not the open-interval
3895                         operator.  */
3896 		  || (syntax & RE_NO_BK_BRACES))
3897                 goto normal_backslash;
3898 
3899             handle_interval:
3900               {
3901                 /* If got here, then the syntax allows intervals.  */
3902 
3903                 /* At least (most) this many matches must be made.  */
3904                 int lower_bound = -1, upper_bound = -1;
3905 
3906 		/* Place in the uncompiled pattern (i.e., just after
3907 		   the '{') to go back to if the interval is invalid.  */
3908 		const CHAR_T *beg_interval = p;
3909 
3910                 if (p == pend)
3911 		  goto invalid_interval;
3912 
3913                 GET_UNSIGNED_NUMBER (lower_bound);
3914 
3915                 if (c == ',')
3916                   {
3917                     GET_UNSIGNED_NUMBER (upper_bound);
3918 		    if (upper_bound < 0)
3919 		      upper_bound = RE_DUP_MAX;
3920                   }
3921                 else
3922                   /* Interval such as `{1}' => match exactly once. */
3923                   upper_bound = lower_bound;
3924 
3925                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
3926 		  goto invalid_interval;
3927 
3928                 if (!(syntax & RE_NO_BK_BRACES))
3929                   {
3930 		    if (c != '\\' || p == pend)
3931 		      goto invalid_interval;
3932                     PATFETCH (c);
3933                   }
3934 
3935                 if (c != '}')
3936 		  goto invalid_interval;
3937 
3938                 /* If it's invalid to have no preceding re.  */
3939                 if (!laststart)
3940                   {
3941 		    if (syntax & RE_CONTEXT_INVALID_OPS
3942 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3943                       FREE_STACK_RETURN (REG_BADRPT);
3944                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3945                       laststart = b;
3946                     else
3947                       goto unfetch_interval;
3948                   }
3949 
3950                 /* We just parsed a valid interval.  */
3951 
3952                 if (RE_DUP_MAX < upper_bound)
3953 		  FREE_STACK_RETURN (REG_BADBR);
3954 
3955                 /* If the upper bound is zero, don't want to succeed at
3956                    all; jump from `laststart' to `b + 3', which will be
3957 		   the end of the buffer after we insert the jump.  */
3958 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3959 		   instead of 'b + 3'.  */
3960                  if (upper_bound == 0)
3961                    {
3962                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3963                      INSERT_JUMP (jump, laststart, b + 1
3964 				  + OFFSET_ADDRESS_SIZE);
3965                      b += 1 + OFFSET_ADDRESS_SIZE;
3966                    }
3967 
3968                  /* Otherwise, we have a nontrivial interval.  When
3969                     we're all done, the pattern will look like:
3970                       set_number_at <jump count> <upper bound>
3971                       set_number_at <succeed_n count> <lower bound>
3972                       succeed_n <after jump addr> <succeed_n count>
3973                       <body of loop>
3974                       jump_n <succeed_n addr> <jump count>
3975                     (The upper bound and `jump_n' are omitted if
3976                     `upper_bound' is 1, though.)  */
3977                  else
3978                    { /* If the upper bound is > 1, we need to insert
3979                         more at the end of the loop.  */
3980                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3981 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3982 
3983                      GET_BUFFER_SPACE (nbytes);
3984 
3985                      /* Initialize lower bound of the `succeed_n', even
3986                         though it will be set during matching by its
3987                         attendant `set_number_at' (inserted next),
3988                         because `re_compile_fastmap' needs to know.
3989                         Jump to the `jump_n' we might insert below.  */
3990                      INSERT_JUMP2 (succeed_n, laststart,
3991                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
3992 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3993 				   , lower_bound);
3994                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3995 
3996                      /* Code to initialize the lower bound.  Insert
3997                         before the `succeed_n'.  The `5' is the last two
3998                         bytes of this `set_number_at', plus 3 bytes of
3999                         the following `succeed_n'.  */
4000 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
4001 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
4002 			of the following `succeed_n'.  */
4003                      PREFIX(insert_op2) (set_number_at, laststart, 1
4004 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
4005                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4006 
4007                      if (upper_bound > 1)
4008                        { /* More than one repetition is allowed, so
4009                             append a backward jump to the `succeed_n'
4010                             that starts this interval.
4011 
4012                             When we've reached this during matching,
4013                             we'll have matched the interval once, so
4014                             jump back only `upper_bound - 1' times.  */
4015                          STORE_JUMP2 (jump_n, b, laststart
4016 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
4017                                       upper_bound - 1);
4018                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4019 
4020                          /* The location we want to set is the second
4021                             parameter of the `jump_n'; that is `b-2' as
4022                             an absolute address.  `laststart' will be
4023                             the `set_number_at' we're about to insert;
4024                             `laststart+3' the number to set, the source
4025                             for the relative address.  But we are
4026                             inserting into the middle of the pattern --
4027                             so everything is getting moved up by 5.
4028                             Conclusion: (b - 2) - (laststart + 3) + 5,
4029                             i.e., b - laststart.
4030 
4031                             We insert this at the beginning of the loop
4032                             so that if we fail during matching, we'll
4033                             reinitialize the bounds.  */
4034                          PREFIX(insert_op2) (set_number_at, laststart,
4035 					     b - laststart,
4036 					     upper_bound - 1, b);
4037                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4038                        }
4039                    }
4040                 pending_exact = 0;
4041 		break;
4042 
4043 	      invalid_interval:
4044 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
4045 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
4046 	      unfetch_interval:
4047 		/* Match the characters as literals.  */
4048 		p = beg_interval;
4049 		c = '{';
4050 		if (syntax & RE_NO_BK_BRACES)
4051 		  goto normal_char;
4052 		else
4053 		  goto normal_backslash;
4054 	      }
4055 
4056 #ifdef emacs
4057             /* There is no way to specify the before_dot and after_dot
4058                operators.  rms says this is ok.  --karl  */
4059             case '=':
4060               BUF_PUSH (at_dot);
4061               break;
4062 
4063             case 's':
4064               laststart = b;
4065               PATFETCH (c);
4066               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4067               break;
4068 
4069             case 'S':
4070               laststart = b;
4071               PATFETCH (c);
4072               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4073               break;
4074 #endif /* emacs */
4075 
4076 
4077             case 'w':
4078 	      if (syntax & RE_NO_GNU_OPS)
4079 		goto normal_char;
4080               laststart = b;
4081               BUF_PUSH (wordchar);
4082               break;
4083 
4084 
4085             case 'W':
4086 	      if (syntax & RE_NO_GNU_OPS)
4087 		goto normal_char;
4088               laststart = b;
4089               BUF_PUSH (notwordchar);
4090               break;
4091 
4092 
4093             case '<':
4094 	      if (syntax & RE_NO_GNU_OPS)
4095 		goto normal_char;
4096               BUF_PUSH (wordbeg);
4097               break;
4098 
4099             case '>':
4100 	      if (syntax & RE_NO_GNU_OPS)
4101 		goto normal_char;
4102               BUF_PUSH (wordend);
4103               break;
4104 
4105             case 'b':
4106 	      if (syntax & RE_NO_GNU_OPS)
4107 		goto normal_char;
4108               BUF_PUSH (wordbound);
4109               break;
4110 
4111             case 'B':
4112 	      if (syntax & RE_NO_GNU_OPS)
4113 		goto normal_char;
4114               BUF_PUSH (notwordbound);
4115               break;
4116 
4117             case '`':
4118 	      if (syntax & RE_NO_GNU_OPS)
4119 		goto normal_char;
4120               BUF_PUSH (begbuf);
4121               break;
4122 
4123             case '\'':
4124 	      if (syntax & RE_NO_GNU_OPS)
4125 		goto normal_char;
4126               BUF_PUSH (endbuf);
4127               break;
4128 
4129             case '1': case '2': case '3': case '4': case '5':
4130             case '6': case '7': case '8': case '9':
4131               if (syntax & RE_NO_BK_REFS)
4132                 goto normal_char;
4133 
4134               c1 = c - '0';
4135 
4136               if (c1 > regnum)
4137                 FREE_STACK_RETURN (REG_ESUBREG);
4138 
4139               /* Can't back reference to a subexpression if inside of it.  */
4140               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4141                 goto normal_char;
4142 
4143               laststart = b;
4144               BUF_PUSH_2 (duplicate, c1);
4145               break;
4146 
4147 
4148             case '+':
4149             case '?':
4150               if (syntax & RE_BK_PLUS_QM)
4151                 goto handle_plus;
4152               else
4153                 goto normal_backslash;
4154 
4155             default:
4156             normal_backslash:
4157               /* You might think it would be useful for \ to mean
4158                  not to translate; but if we don't translate it
4159                  it will never match anything.  */
4160               c = TRANSLATE (c);
4161               goto normal_char;
4162             }
4163           break;
4164 
4165 
4166 	default:
4167         /* Expects the character in `c'.  */
4168 	normal_char:
4169 	      /* If no exactn currently being built.  */
4170           if (!pending_exact
4171 #ifdef WCHAR
4172 	      /* If last exactn handle binary(or character) and
4173 		 new exactn handle character(or binary).  */
4174 	      || is_exactn_bin != is_binary[p - 1 - pattern]
4175 #endif /* WCHAR */
4176 
4177               /* If last exactn not at current position.  */
4178               || pending_exact + *pending_exact + 1 != b
4179 
4180               /* We have only one byte following the exactn for the count.  */
4181 	      || *pending_exact == (1 << BYTEWIDTH) - 1
4182 
4183               /* If followed by a repetition operator.  */
4184               || *p == '*' || *p == '^'
4185 	      || ((syntax & RE_BK_PLUS_QM)
4186 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4187 		  : (*p == '+' || *p == '?'))
4188 	      || ((syntax & RE_INTERVALS)
4189                   && ((syntax & RE_NO_BK_BRACES)
4190 		      ? *p == '{'
4191                       : (p[0] == '\\' && p[1] == '{'))))
4192 	    {
4193 	      /* Start building a new exactn.  */
4194 
4195               laststart = b;
4196 
4197 #ifdef WCHAR
4198 	      /* Is this exactn binary data or character? */
4199 	      is_exactn_bin = is_binary[p - 1 - pattern];
4200 	      if (is_exactn_bin)
4201 		  BUF_PUSH_2 (exactn_bin, 0);
4202 	      else
4203 		  BUF_PUSH_2 (exactn, 0);
4204 #else
4205 	      BUF_PUSH_2 (exactn, 0);
4206 #endif /* WCHAR */
4207 	      pending_exact = b - 1;
4208             }
4209 
4210 	  BUF_PUSH (c);
4211           (*pending_exact)++;
4212 	  break;
4213         } /* switch (c) */
4214     } /* while p != pend */
4215 
4216 
4217   /* Through the pattern now.  */
4218 
4219   if (fixup_alt_jump)
4220     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4221 
4222   if (!COMPILE_STACK_EMPTY)
4223     FREE_STACK_RETURN (REG_EPAREN);
4224 
4225   /* If we don't want backtracking, force success
4226      the first time we reach the end of the compiled pattern.  */
4227   if (syntax & RE_NO_POSIX_BACKTRACKING)
4228     BUF_PUSH (succeed);
4229 
4230 #ifdef WCHAR
4231   free (pattern);
4232   free (mbs_offset);
4233   free (is_binary);
4234 #endif
4235   free (compile_stack.stack);
4236 
4237   /* We have succeeded; set the length of the buffer.  */
4238 #ifdef WCHAR
4239   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4240 #else
4241   bufp->used = b - bufp->buffer;
4242 #endif
4243 
4244 #ifdef DEBUG
4245   if (debug)
4246     {
4247       DEBUG_PRINT1 ("\nCompiled pattern: \n");
4248       PREFIX(print_compiled_pattern) (bufp);
4249     }
4250 #endif /* DEBUG */
4251 
4252 #ifndef MATCH_MAY_ALLOCATE
4253   /* Initialize the failure stack to the largest possible stack.  This
4254      isn't necessary unless we're trying to avoid calling alloca in
4255      the search and match routines.  */
4256   {
4257     int num_regs = bufp->re_nsub + 1;
4258 
4259     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4260        is strictly greater than re_max_failures, the largest possible stack
4261        is 2 * re_max_failures failure points.  */
4262     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4263       {
4264 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4265 
4266 # ifdef emacs
4267 	if (! fail_stack.stack)
4268 	  fail_stack.stack
4269 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4270 				    * sizeof (PREFIX(fail_stack_elt_t)));
4271 	else
4272 	  fail_stack.stack
4273 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4274 				     (fail_stack.size
4275 				      * sizeof (PREFIX(fail_stack_elt_t))));
4276 # else /* not emacs */
4277 	if (! fail_stack.stack)
4278 	  fail_stack.stack
4279 	    = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4280 				   * sizeof (PREFIX(fail_stack_elt_t)));
4281 	else
4282 	  fail_stack.stack
4283 	    = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4284 					    (fail_stack.size
4285 				     * sizeof (PREFIX(fail_stack_elt_t))));
4286 # endif /* not emacs */
4287       }
4288 
4289    PREFIX(regex_grow_registers) (num_regs);
4290   }
4291 #endif /* not MATCH_MAY_ALLOCATE */
4292 
4293   return REG_NOERROR;
4294 } /* regex_compile */
4295 
4296 /* Subroutines for `regex_compile'.  */
4297 
4298 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
4299 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4300 
4301 static void
4302 PREFIX(store_op1) (op, loc, arg)
4303     re_opcode_t op;
4304     UCHAR_T *loc;
4305     int arg;
4306 {
4307   *loc = (UCHAR_T) op;
4308   STORE_NUMBER (loc + 1, arg);
4309 }
4310 
4311 
4312 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4313 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4314 
4315 static void
4316 PREFIX(store_op2) (op, loc, arg1, arg2)
4317     re_opcode_t op;
4318     UCHAR_T *loc;
4319     int arg1, arg2;
4320 {
4321   *loc = (UCHAR_T) op;
4322   STORE_NUMBER (loc + 1, arg1);
4323   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4324 }
4325 
4326 
4327 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
4328    for OP followed by two-byte integer parameter ARG.  */
4329 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4330 
4331 static void
4332 PREFIX(insert_op1) (op, loc, arg, end)
4333     re_opcode_t op;
4334     UCHAR_T *loc;
4335     int arg;
4336     UCHAR_T *end;
4337 {
4338   register UCHAR_T *pfrom = end;
4339   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4340 
4341   while (pfrom != loc)
4342     *--pto = *--pfrom;
4343 
4344   PREFIX(store_op1) (op, loc, arg);
4345 }
4346 
4347 
4348 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4349 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4350 
4351 static void
4352 PREFIX(insert_op2) (op, loc, arg1, arg2, end)
4353     re_opcode_t op;
4354     UCHAR_T *loc;
4355     int arg1, arg2;
4356     UCHAR_T *end;
4357 {
4358   register UCHAR_T *pfrom = end;
4359   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4360 
4361   while (pfrom != loc)
4362     *--pto = *--pfrom;
4363 
4364   PREFIX(store_op2) (op, loc, arg1, arg2);
4365 }
4366 
4367 
4368 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4369    after an alternative or a begin-subexpression.  We assume there is at
4370    least one character before the ^.  */
4371 
4372 static boolean
4373 PREFIX(at_begline_loc_p) (pattern, p, syntax)
4374     const CHAR_T *pattern, *p;
4375     reg_syntax_t syntax;
4376 {
4377   const CHAR_T *prev = p - 2;
4378   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4379 
4380   return
4381        /* After a subexpression?  */
4382        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4383        /* After an alternative?  */
4384     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4385 }
4386 
4387 
4388 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4389    at least one character after the $, i.e., `P < PEND'.  */
4390 
4391 static boolean
4392 PREFIX(at_endline_loc_p) (p, pend, syntax)
4393     const CHAR_T *p, *pend;
4394     reg_syntax_t syntax;
4395 {
4396   const CHAR_T *next = p;
4397   boolean next_backslash = *next == '\\';
4398   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4399 
4400   return
4401        /* Before a subexpression?  */
4402        (syntax & RE_NO_BK_PARENS ? *next == ')'
4403         : next_backslash && next_next && *next_next == ')')
4404        /* Before an alternative?  */
4405     || (syntax & RE_NO_BK_VBAR ? *next == '|'
4406         : next_backslash && next_next && *next_next == '|');
4407 }
4408 
4409 #else /* not INSIDE_RECURSION */
4410 
4411 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4412    false if it's not.  */
4413 
4414 static boolean
group_in_compile_stack(compile_stack,regnum)4415 group_in_compile_stack (compile_stack, regnum)
4416     compile_stack_type compile_stack;
4417     regnum_t regnum;
4418 {
4419   int this_element;
4420 
4421   for (this_element = compile_stack.avail - 1;
4422        this_element >= 0;
4423        this_element--)
4424     if (compile_stack.stack[this_element].regnum == regnum)
4425       return true;
4426 
4427   return false;
4428 }
4429 #endif /* not INSIDE_RECURSION */
4430 
4431 #ifdef INSIDE_RECURSION
4432 
4433 #ifdef WCHAR
4434 /* This insert space, which size is "num", into the pattern at "loc".
4435    "end" must point the end of the allocated buffer.  */
4436 static void
insert_space(num,loc,end)4437 insert_space (num, loc, end)
4438      int num;
4439      CHAR_T *loc;
4440      CHAR_T *end;
4441 {
4442   register CHAR_T *pto = end;
4443   register CHAR_T *pfrom = end - num;
4444 
4445   while (pfrom >= loc)
4446     *pto-- = *pfrom--;
4447 }
4448 #endif /* WCHAR */
4449 
4450 #ifdef WCHAR
4451 static reg_errcode_t
wcs_compile_range(range_start_char,p_ptr,pend,translate,syntax,b,char_set)4452 wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
4453 		   char_set)
4454      CHAR_T range_start_char;
4455      const CHAR_T **p_ptr, *pend;
4456      CHAR_T *char_set, *b;
4457      RE_TRANSLATE_TYPE translate;
4458      reg_syntax_t syntax;
4459 {
4460   const CHAR_T *p = *p_ptr;
4461   CHAR_T range_start, range_end;
4462   reg_errcode_t ret;
4463 # ifdef _LIBC
4464   uint32_t nrules;
4465   uint32_t start_val, end_val;
4466 # endif
4467   if (p == pend)
4468     return REG_ERANGE;
4469 
4470 # ifdef _LIBC
4471   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4472   if (nrules != 0)
4473     {
4474       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4475 						       _NL_COLLATE_COLLSEQWC);
4476       const unsigned char *extra = (const unsigned char *)
4477 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4478 
4479       if (range_start_char < -1)
4480 	{
4481 	  /* range_start is a collating symbol.  */
4482 	  int32_t *wextra;
4483 	  /* Retreive the index and get collation sequence value.  */
4484 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4485 	  start_val = wextra[1 + *wextra];
4486 	}
4487       else
4488 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4489 
4490       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4491 
4492       /* Report an error if the range is empty and the syntax prohibits
4493 	 this.  */
4494       ret = ((syntax & RE_NO_EMPTY_RANGES)
4495 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4496 
4497       /* Insert space to the end of the char_ranges.  */
4498       insert_space(2, b - char_set[5] - 2, b - 1);
4499       *(b - char_set[5] - 2) = (wchar_t)start_val;
4500       *(b - char_set[5] - 1) = (wchar_t)end_val;
4501       char_set[4]++; /* ranges_index */
4502     }
4503   else
4504 # endif
4505     {
4506       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4507 	range_start_char;
4508       range_end = TRANSLATE (p[0]);
4509       /* Report an error if the range is empty and the syntax prohibits
4510 	 this.  */
4511       ret = ((syntax & RE_NO_EMPTY_RANGES)
4512 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4513 
4514       /* Insert space to the end of the char_ranges.  */
4515       insert_space(2, b - char_set[5] - 2, b - 1);
4516       *(b - char_set[5] - 2) = range_start;
4517       *(b - char_set[5] - 1) = range_end;
4518       char_set[4]++; /* ranges_index */
4519     }
4520   /* Have to increment the pointer into the pattern string, so the
4521      caller isn't still at the ending character.  */
4522   (*p_ptr)++;
4523 
4524   return ret;
4525 }
4526 #else /* BYTE */
4527 /* Read the ending character of a range (in a bracket expression) from the
4528    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4529    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4530    Then we set the translation of all bits between the starting and
4531    ending characters (inclusive) in the compiled pattern B.
4532 
4533    Return an error code.
4534 
4535    We use these short variable names so we can use the same macros as
4536    `regex_compile' itself.  */
4537 
4538 static reg_errcode_t
byte_compile_range(range_start_char,p_ptr,pend,translate,syntax,b)4539 byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
4540      unsigned int range_start_char;
4541      const char **p_ptr, *pend;
4542      RE_TRANSLATE_TYPE translate;
4543      reg_syntax_t syntax;
4544      unsigned char *b;
4545 {
4546   unsigned this_char;
4547   const char *p = *p_ptr;
4548   reg_errcode_t ret;
4549 # if _LIBC
4550   const unsigned char *collseq;
4551   unsigned int start_colseq;
4552   unsigned int end_colseq;
4553 # else
4554   unsigned end_char;
4555 # endif
4556 
4557   if (p == pend)
4558     return REG_ERANGE;
4559 
4560   /* Have to increment the pointer into the pattern string, so the
4561      caller isn't still at the ending character.  */
4562   (*p_ptr)++;
4563 
4564   /* Report an error if the range is empty and the syntax prohibits this.  */
4565   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4566 
4567 # if _LIBC
4568   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4569 						 _NL_COLLATE_COLLSEQMB);
4570 
4571   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4572   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4573   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4574     {
4575       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4576 
4577       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4578 	{
4579 	  SET_LIST_BIT (TRANSLATE (this_char));
4580 	  ret = REG_NOERROR;
4581 	}
4582     }
4583 # else
4584   /* Here we see why `this_char' has to be larger than an `unsigned
4585      char' -- we would otherwise go into an infinite loop, since all
4586      characters <= 0xff.  */
4587   range_start_char = TRANSLATE (range_start_char);
4588   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4589      and some compilers cast it to int implicitly, so following for_loop
4590      may fall to (almost) infinite loop.
4591      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4592      To avoid this, we cast p[0] to unsigned int and truncate it.  */
4593   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4594 
4595   for (this_char = range_start_char; this_char <= end_char; ++this_char)
4596     {
4597       SET_LIST_BIT (TRANSLATE (this_char));
4598       ret = REG_NOERROR;
4599     }
4600 # endif
4601 
4602   return ret;
4603 }
4604 #endif /* WCHAR */
4605 
4606 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4607    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4608    characters can start a string that matches the pattern.  This fastmap
4609    is used by re_search to skip quickly over impossible starting points.
4610 
4611    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4612    area as BUFP->fastmap.
4613 
4614    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4615    the pattern buffer.
4616 
4617    Returns 0 if we succeed, -2 if an internal error.   */
4618 
4619 #ifdef WCHAR
4620 /* local function for re_compile_fastmap.
4621    truncate wchar_t character to char.  */
4622 static unsigned char truncate_wchar (CHAR_T c);
4623 
4624 static unsigned char
truncate_wchar(c)4625 truncate_wchar (c)
4626      CHAR_T c;
4627 {
4628   unsigned char buf[MB_CUR_MAX];
4629   mbstate_t state;
4630   int retval;
4631   memset (&state, '\0', sizeof (state));
4632   retval = wcrtomb (buf, c, &state);
4633   return retval > 0 ? buf[0] : (unsigned char) c;
4634 }
4635 #endif /* WCHAR */
4636 
4637 static int
4638 PREFIX(re_compile_fastmap) (bufp)
4639      struct re_pattern_buffer *bufp;
4640 {
4641   int j, k;
4642 #ifdef MATCH_MAY_ALLOCATE
4643   PREFIX(fail_stack_type) fail_stack;
4644 #endif
4645 #ifndef REGEX_MALLOC
4646   char *destination;
4647 #endif
4648 
4649   register char *fastmap = bufp->fastmap;
4650 
4651 #ifdef WCHAR
4652   /* We need to cast pattern to (wchar_t*), because we casted this compiled
4653      pattern to (char*) in regex_compile.  */
4654   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4655   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4656 #else /* BYTE */
4657   UCHAR_T *pattern = bufp->buffer;
4658   register UCHAR_T *pend = pattern + bufp->used;
4659 #endif /* WCHAR */
4660   UCHAR_T *p = pattern;
4661 
4662 #ifdef REL_ALLOC
4663   /* This holds the pointer to the failure stack, when
4664      it is allocated relocatably.  */
4665   fail_stack_elt_t *failure_stack_ptr;
4666 #endif
4667 
4668   /* Assume that each path through the pattern can be null until
4669      proven otherwise.  We set this false at the bottom of switch
4670      statement, to which we get only if a particular path doesn't
4671      match the empty string.  */
4672   boolean path_can_be_null = true;
4673 
4674   /* We aren't doing a `succeed_n' to begin with.  */
4675   boolean succeed_n_p = false;
4676 
4677   assert (fastmap != NULL && p != NULL);
4678 
4679   INIT_FAIL_STACK ();
4680   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4681   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4682   bufp->can_be_null = 0;
4683 
4684   while (1)
4685     {
4686       if (p == pend || *p == succeed)
4687 	{
4688 	  /* We have reached the (effective) end of pattern.  */
4689 	  if (!FAIL_STACK_EMPTY ())
4690 	    {
4691 	      bufp->can_be_null |= path_can_be_null;
4692 
4693 	      /* Reset for next path.  */
4694 	      path_can_be_null = true;
4695 
4696 	      p = fail_stack.stack[--fail_stack.avail].pointer;
4697 
4698 	      continue;
4699 	    }
4700 	  else
4701 	    break;
4702 	}
4703 
4704       /* We should never be about to go beyond the end of the pattern.  */
4705       assert (p < pend);
4706 
4707       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4708 	{
4709 
4710         /* I guess the idea here is to simply not bother with a fastmap
4711            if a backreference is used, since it's too hard to figure out
4712            the fastmap for the corresponding group.  Setting
4713            `can_be_null' stops `re_search_2' from using the fastmap, so
4714            that is all we do.  */
4715 	case duplicate:
4716 	  bufp->can_be_null = 1;
4717           goto done;
4718 
4719 
4720       /* Following are the cases which match a character.  These end
4721          with `break'.  */
4722 
4723 #ifdef WCHAR
4724 	case exactn:
4725           fastmap[truncate_wchar(p[1])] = 1;
4726 	  break;
4727 #else /* BYTE */
4728 	case exactn:
4729           fastmap[p[1]] = 1;
4730 	  break;
4731 #endif /* WCHAR */
4732 #ifdef MBS_SUPPORT
4733 	case exactn_bin:
4734 	  fastmap[p[1]] = 1;
4735 	  break;
4736 #endif
4737 
4738 #ifdef WCHAR
4739         /* It is hard to distinguish fastmap from (multi byte) characters
4740            which depends on current locale.  */
4741         case charset:
4742 	case charset_not:
4743 	case wordchar:
4744 	case notwordchar:
4745           bufp->can_be_null = 1;
4746           goto done;
4747 #else /* BYTE */
4748         case charset:
4749           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4750 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4751               fastmap[j] = 1;
4752 	  break;
4753 
4754 
4755 	case charset_not:
4756 	  /* Chars beyond end of map must be allowed.  */
4757 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4758             fastmap[j] = 1;
4759 
4760 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4761 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4762               fastmap[j] = 1;
4763           break;
4764 
4765 
4766 	case wordchar:
4767 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4768 	    if (SYNTAX (j) == Sword)
4769 	      fastmap[j] = 1;
4770 	  break;
4771 
4772 
4773 	case notwordchar:
4774 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4775 	    if (SYNTAX (j) != Sword)
4776 	      fastmap[j] = 1;
4777 	  break;
4778 #endif /* WCHAR */
4779 
4780         case anychar:
4781 	  {
4782 	    int fastmap_newline = fastmap['\n'];
4783 
4784 	    /* `.' matches anything ...  */
4785 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4786 	      fastmap[j] = 1;
4787 
4788 	    /* ... except perhaps newline.  */
4789 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4790 	      fastmap['\n'] = fastmap_newline;
4791 
4792 	    /* Return if we have already set `can_be_null'; if we have,
4793 	       then the fastmap is irrelevant.  Something's wrong here.  */
4794 	    else if (bufp->can_be_null)
4795 	      goto done;
4796 
4797 	    /* Otherwise, have to check alternative paths.  */
4798 	    break;
4799 	  }
4800 
4801 #ifdef emacs
4802         case syntaxspec:
4803 	  k = *p++;
4804 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4805 	    if (SYNTAX (j) == (enum syntaxcode) k)
4806 	      fastmap[j] = 1;
4807 	  break;
4808 
4809 
4810 	case notsyntaxspec:
4811 	  k = *p++;
4812 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4813 	    if (SYNTAX (j) != (enum syntaxcode) k)
4814 	      fastmap[j] = 1;
4815 	  break;
4816 
4817 
4818       /* All cases after this match the empty string.  These end with
4819          `continue'.  */
4820 
4821 
4822 	case before_dot:
4823 	case at_dot:
4824 	case after_dot:
4825           continue;
4826 #endif /* emacs */
4827 
4828 
4829         case no_op:
4830         case begline:
4831         case endline:
4832 	case begbuf:
4833 	case endbuf:
4834 	case wordbound:
4835 	case notwordbound:
4836 	case wordbeg:
4837 	case wordend:
4838         case push_dummy_failure:
4839           continue;
4840 
4841 
4842 	case jump_n:
4843         case pop_failure_jump:
4844 	case maybe_pop_jump:
4845 	case jump:
4846         case jump_past_alt:
4847 	case dummy_failure_jump:
4848           EXTRACT_NUMBER_AND_INCR (j, p);
4849 	  p += j;
4850 	  if (j > 0)
4851 	    continue;
4852 
4853           /* Jump backward implies we just went through the body of a
4854              loop and matched nothing.  Opcode jumped to should be
4855              `on_failure_jump' or `succeed_n'.  Just treat it like an
4856              ordinary jump.  For a * loop, it has pushed its failure
4857              point already; if so, discard that as redundant.  */
4858           if ((re_opcode_t) *p != on_failure_jump
4859 	      && (re_opcode_t) *p != succeed_n)
4860 	    continue;
4861 
4862           p++;
4863           EXTRACT_NUMBER_AND_INCR (j, p);
4864           p += j;
4865 
4866           /* If what's on the stack is where we are now, pop it.  */
4867           if (!FAIL_STACK_EMPTY ()
4868 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4869             fail_stack.avail--;
4870 
4871           continue;
4872 
4873 
4874         case on_failure_jump:
4875         case on_failure_keep_string_jump:
4876 	handle_on_failure_jump:
4877           EXTRACT_NUMBER_AND_INCR (j, p);
4878 
4879           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4880              end of the pattern.  We don't want to push such a point,
4881              since when we restore it above, entering the switch will
4882              increment `p' past the end of the pattern.  We don't need
4883              to push such a point since we obviously won't find any more
4884              fastmap entries beyond `pend'.  Such a pattern can match
4885              the null string, though.  */
4886           if (p + j < pend)
4887             {
4888               if (!PUSH_PATTERN_OP (p + j, fail_stack))
4889 		{
4890 		  RESET_FAIL_STACK ();
4891 		  return -2;
4892 		}
4893             }
4894           else
4895             bufp->can_be_null = 1;
4896 
4897           if (succeed_n_p)
4898             {
4899               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4900               succeed_n_p = false;
4901 	    }
4902 
4903           continue;
4904 
4905 
4906 	case succeed_n:
4907           /* Get to the number of times to succeed.  */
4908           p += OFFSET_ADDRESS_SIZE;
4909 
4910           /* Increment p past the n for when k != 0.  */
4911           EXTRACT_NUMBER_AND_INCR (k, p);
4912           if (k == 0)
4913 	    {
4914               p -= 2 * OFFSET_ADDRESS_SIZE;
4915   	      succeed_n_p = true;  /* Spaghetti code alert.  */
4916               goto handle_on_failure_jump;
4917             }
4918           continue;
4919 
4920 
4921 	case set_number_at:
4922           p += 2 * OFFSET_ADDRESS_SIZE;
4923           continue;
4924 
4925 
4926 	case start_memory:
4927         case stop_memory:
4928 	  p += 2;
4929 	  continue;
4930 
4931 
4932 	default:
4933           abort (); /* We have listed all the cases.  */
4934         } /* switch *p++ */
4935 
4936       /* Getting here means we have found the possible starting
4937          characters for one path of the pattern -- and that the empty
4938          string does not match.  We need not follow this path further.
4939          Instead, look at the next alternative (remembered on the
4940          stack), or quit if no more.  The test at the top of the loop
4941          does these things.  */
4942       path_can_be_null = false;
4943       p = pend;
4944     } /* while p */
4945 
4946   /* Set `can_be_null' for the last path (also the first path, if the
4947      pattern is empty).  */
4948   bufp->can_be_null |= path_can_be_null;
4949 
4950  done:
4951   RESET_FAIL_STACK ();
4952   return 0;
4953 }
4954 
4955 #else /* not INSIDE_RECURSION */
4956 
4957 int
re_compile_fastmap(bufp)4958 re_compile_fastmap (bufp)
4959      struct re_pattern_buffer *bufp;
4960 {
4961 # ifdef MBS_SUPPORT
4962   if (MB_CUR_MAX != 1)
4963     return wcs_re_compile_fastmap(bufp);
4964   else
4965 # endif
4966     return byte_re_compile_fastmap(bufp);
4967 } /* re_compile_fastmap */
4968 #ifdef _LIBC
4969 weak_alias (__re_compile_fastmap, re_compile_fastmap)
4970 #endif
4971 
4972 
4973 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4974    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4975    this memory for recording register information.  STARTS and ENDS
4976    must be allocated using the malloc library routine, and must each
4977    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4978 
4979    If NUM_REGS == 0, then subsequent matches should allocate their own
4980    register data.
4981 
4982    Unless this function is called, the first search or match using
4983    PATTERN_BUFFER will allocate its own register data, without
4984    freeing the old data.  */
4985 
4986 void
4987 re_set_registers (bufp, regs, num_regs, starts, ends)
4988     struct re_pattern_buffer *bufp;
4989     struct re_registers *regs;
4990     unsigned num_regs;
4991     regoff_t *starts, *ends;
4992 {
4993   if (num_regs)
4994     {
4995       bufp->regs_allocated = REGS_REALLOCATE;
4996       regs->num_regs = num_regs;
4997       regs->start = starts;
4998       regs->end = ends;
4999     }
5000   else
5001     {
5002       bufp->regs_allocated = REGS_UNALLOCATED;
5003       regs->num_regs = 0;
5004       regs->start = regs->end = (regoff_t *) 0;
5005     }
5006 }
5007 #ifdef _LIBC
5008 weak_alias (__re_set_registers, re_set_registers)
5009 #endif
5010 
5011 /* Searching routines.  */
5012 
5013 /* Like re_search_2, below, but only one string is specified, and
5014    doesn't let you say where to stop matching.  */
5015 
5016 int
5017 re_search (bufp, string, size, startpos, range, regs)
5018      struct re_pattern_buffer *bufp;
5019      const char *string;
5020      int size, startpos, range;
5021      struct re_registers *regs;
5022 {
5023   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
5024 		      regs, size);
5025 }
5026 #ifdef _LIBC
5027 weak_alias (__re_search, re_search)
5028 #endif
5029 
5030 
5031 /* Using the compiled pattern in BUFP->buffer, first tries to match the
5032    virtual concatenation of STRING1 and STRING2, starting first at index
5033    STARTPOS, then at STARTPOS + 1, and so on.
5034 
5035    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5036 
5037    RANGE is how far to scan while trying to match.  RANGE = 0 means try
5038    only at STARTPOS; in general, the last start tried is STARTPOS +
5039    RANGE.
5040 
5041    In REGS, return the indices of the virtual concatenation of STRING1
5042    and STRING2 that matched the entire BUFP->buffer and its contained
5043    subexpressions.
5044 
5045    Do not consider matching one past the index STOP in the virtual
5046    concatenation of STRING1 and STRING2.
5047 
5048    We return either the position in the strings at which the match was
5049    found, -1 if no match, or -2 if error (such as failure
5050    stack overflow).  */
5051 
5052 int
5053 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
5054      struct re_pattern_buffer *bufp;
5055      const char *string1, *string2;
5056      int size1, size2;
5057      int startpos;
5058      int range;
5059      struct re_registers *regs;
5060      int stop;
5061 {
5062 # ifdef MBS_SUPPORT
5063   if (MB_CUR_MAX != 1)
5064     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5065 			    range, regs, stop);
5066   else
5067 # endif
5068     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5069 			     range, regs, stop);
5070 } /* re_search_2 */
5071 #ifdef _LIBC
5072 weak_alias (__re_search_2, re_search_2)
5073 #endif
5074 
5075 #endif /* not INSIDE_RECURSION */
5076 
5077 #ifdef INSIDE_RECURSION
5078 
5079 #ifdef MATCH_MAY_ALLOCATE
5080 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
5081 #else
5082 # define FREE_VAR(var) if (var) free (var); var = NULL
5083 #endif
5084 
5085 #ifdef WCHAR
5086 # define MAX_ALLOCA_SIZE	2000
5087 
5088 # define FREE_WCS_BUFFERS() \
5089   do {									      \
5090     if (size1 > MAX_ALLOCA_SIZE)					      \
5091       {									      \
5092 	free (wcs_string1);						      \
5093 	free (mbs_offset1);						      \
5094       }									      \
5095     else								      \
5096       {									      \
5097 	FREE_VAR (wcs_string1);						      \
5098 	FREE_VAR (mbs_offset1);						      \
5099       }									      \
5100     if (size2 > MAX_ALLOCA_SIZE) 					      \
5101       {									      \
5102 	free (wcs_string2);						      \
5103 	free (mbs_offset2);						      \
5104       }									      \
5105     else								      \
5106       {									      \
5107 	FREE_VAR (wcs_string2);						      \
5108 	FREE_VAR (mbs_offset2);						      \
5109       }									      \
5110   } while (0)
5111 
5112 #endif
5113 
5114 
5115 static int
5116 PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
5117 		     regs, stop)
5118      struct re_pattern_buffer *bufp;
5119      const char *string1, *string2;
5120      int size1, size2;
5121      int startpos;
5122      int range;
5123      struct re_registers *regs;
5124      int stop;
5125 {
5126   int val;
5127   register char *fastmap = bufp->fastmap;
5128   register RE_TRANSLATE_TYPE translate = bufp->translate;
5129   int total_size = size1 + size2;
5130   int endpos = startpos + range;
5131 #ifdef WCHAR
5132   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5133   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5134   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5135   int wcs_size1 = 0, wcs_size2 = 0;
5136   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5137   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5138   /* They hold whether each wchar_t is binary data or not.  */
5139   char *is_binary = NULL;
5140 #endif /* WCHAR */
5141 
5142   /* Check for out-of-range STARTPOS.  */
5143   if (startpos < 0 || startpos > total_size)
5144     return -1;
5145 
5146   /* Fix up RANGE if it might eventually take us outside
5147      the virtual concatenation of STRING1 and STRING2.
5148      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
5149   if (endpos < 0)
5150     range = 0 - startpos;
5151   else if (endpos > total_size)
5152     range = total_size - startpos;
5153 
5154   /* If the search isn't to be a backwards one, don't waste time in a
5155      search for a pattern that must be anchored.  */
5156   if (bufp->used > 0 && range > 0
5157       && ((re_opcode_t) bufp->buffer[0] == begbuf
5158 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
5159 	  || ((re_opcode_t) bufp->buffer[0] == begline
5160 	      && !bufp->newline_anchor)))
5161     {
5162       if (startpos > 0)
5163 	return -1;
5164       else
5165 	range = 1;
5166     }
5167 
5168 #ifdef emacs
5169   /* In a forward search for something that starts with \=.
5170      don't keep searching past point.  */
5171   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5172     {
5173       range = PT - startpos;
5174       if (range <= 0)
5175 	return -1;
5176     }
5177 #endif /* emacs */
5178 
5179   /* Update the fastmap now if not correct already.  */
5180   if (fastmap && !bufp->fastmap_accurate)
5181     if (re_compile_fastmap (bufp) == -2)
5182       return -2;
5183 
5184 #ifdef WCHAR
5185   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5186      fill them with converted string.  */
5187   if (size1 != 0)
5188     {
5189       if (size1 > MAX_ALLOCA_SIZE)
5190 	{
5191 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5192 	  mbs_offset1 = TALLOC (size1 + 1, int);
5193 	  is_binary = TALLOC (size1 + 1, char);
5194 	}
5195       else
5196 	{
5197 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5198 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5199 	  is_binary = REGEX_TALLOC (size1 + 1, char);
5200 	}
5201       if (!wcs_string1 || !mbs_offset1 || !is_binary)
5202 	{
5203 	  if (size1 > MAX_ALLOCA_SIZE)
5204 	    {
5205 	      free (wcs_string1);
5206 	      free (mbs_offset1);
5207 	      free (is_binary);
5208 	    }
5209 	  else
5210 	    {
5211 	      FREE_VAR (wcs_string1);
5212 	      FREE_VAR (mbs_offset1);
5213 	      FREE_VAR (is_binary);
5214 	    }
5215 	  return -2;
5216 	}
5217       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5218 				     mbs_offset1, is_binary);
5219       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
5220       if (size1 > MAX_ALLOCA_SIZE)
5221 	free (is_binary);
5222       else
5223 	FREE_VAR (is_binary);
5224     }
5225   if (size2 != 0)
5226     {
5227       if (size2 > MAX_ALLOCA_SIZE)
5228 	{
5229 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5230 	  mbs_offset2 = TALLOC (size2 + 1, int);
5231 	  is_binary = TALLOC (size2 + 1, char);
5232 	}
5233       else
5234 	{
5235 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5236 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5237 	  is_binary = REGEX_TALLOC (size2 + 1, char);
5238 	}
5239       if (!wcs_string2 || !mbs_offset2 || !is_binary)
5240 	{
5241 	  FREE_WCS_BUFFERS ();
5242 	  if (size2 > MAX_ALLOCA_SIZE)
5243 	    free (is_binary);
5244 	  else
5245 	    FREE_VAR (is_binary);
5246 	  return -2;
5247 	}
5248       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5249 				     mbs_offset2, is_binary);
5250       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
5251       if (size2 > MAX_ALLOCA_SIZE)
5252 	free (is_binary);
5253       else
5254 	FREE_VAR (is_binary);
5255     }
5256 #endif /* WCHAR */
5257 
5258 
5259   /* Loop through the string, looking for a place to start matching.  */
5260   for (;;)
5261     {
5262       /* If a fastmap is supplied, skip quickly over characters that
5263          cannot be the start of a match.  If the pattern can match the
5264          null string, however, we don't need to skip characters; we want
5265          the first null string.  */
5266       if (fastmap && startpos < total_size && !bufp->can_be_null)
5267 	{
5268 	  if (range > 0)	/* Searching forwards.  */
5269 	    {
5270 	      register const char *d;
5271 	      register int lim = 0;
5272 	      int irange = range;
5273 
5274               if (startpos < size1 && startpos + range >= size1)
5275                 lim = range - (size1 - startpos);
5276 
5277 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5278 
5279               /* Written out as an if-else to avoid testing `translate'
5280                  inside the loop.  */
5281 	      if (translate)
5282                 while (range > lim
5283                        && !fastmap[(unsigned char)
5284 				   translate[(unsigned char) *d++]])
5285                   range--;
5286 	      else
5287                 while (range > lim && !fastmap[(unsigned char) *d++])
5288                   range--;
5289 
5290 	      startpos += irange - range;
5291 	    }
5292 	  else				/* Searching backwards.  */
5293 	    {
5294 	      register CHAR_T c = (size1 == 0 || startpos >= size1
5295 				      ? string2[startpos - size1]
5296 				      : string1[startpos]);
5297 
5298 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5299 		goto advance;
5300 	    }
5301 	}
5302 
5303       /* If can't match the null string, and that's all we have left, fail.  */
5304       if (range >= 0 && startpos == total_size && fastmap
5305           && !bufp->can_be_null)
5306        {
5307 #ifdef WCHAR
5308          FREE_WCS_BUFFERS ();
5309 #endif
5310          return -1;
5311        }
5312 
5313 #ifdef WCHAR
5314       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5315 				     size2, startpos, regs, stop,
5316 				     wcs_string1, wcs_size1,
5317 				     wcs_string2, wcs_size2,
5318 				     mbs_offset1, mbs_offset2);
5319 #else /* BYTE */
5320       val = byte_re_match_2_internal (bufp, string1, size1, string2,
5321 				      size2, startpos, regs, stop);
5322 #endif /* BYTE */
5323 
5324 #ifndef REGEX_MALLOC
5325 # ifdef C_ALLOCA
5326       alloca (0);
5327 # endif
5328 #endif
5329 
5330       if (val >= 0)
5331 	{
5332 #ifdef WCHAR
5333 	  FREE_WCS_BUFFERS ();
5334 #endif
5335 	  return startpos;
5336 	}
5337 
5338       if (val == -2)
5339 	{
5340 #ifdef WCHAR
5341 	  FREE_WCS_BUFFERS ();
5342 #endif
5343 	  return -2;
5344 	}
5345 
5346     advance:
5347       if (!range)
5348         break;
5349       else if (range > 0)
5350         {
5351           range--;
5352           startpos++;
5353         }
5354       else
5355         {
5356           range++;
5357           startpos--;
5358         }
5359     }
5360 #ifdef WCHAR
5361   FREE_WCS_BUFFERS ();
5362 #endif
5363   return -1;
5364 }
5365 
5366 #ifdef WCHAR
5367 /* This converts PTR, a pointer into one of the search wchar_t strings
5368    `string1' and `string2' into an multibyte string offset from the
5369    beginning of that string. We use mbs_offset to optimize.
5370    See convert_mbs_to_wcs.  */
5371 # define POINTER_TO_OFFSET(ptr)						\
5372   (FIRST_STRING_P (ptr)							\
5373    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5374    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5375 		 + csize1)))
5376 #else /* BYTE */
5377 /* This converts PTR, a pointer into one of the search strings `string1'
5378    and `string2' into an offset from the beginning of that string.  */
5379 # define POINTER_TO_OFFSET(ptr)			\
5380   (FIRST_STRING_P (ptr)				\
5381    ? ((regoff_t) ((ptr) - string1))		\
5382    : ((regoff_t) ((ptr) - string2 + size1)))
5383 #endif /* WCHAR */
5384 
5385 /* Macros for dealing with the split strings in re_match_2.  */
5386 
5387 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5388 
5389 /* Call before fetching a character with *d.  This switches over to
5390    string2 if necessary.  */
5391 #define PREFETCH()							\
5392   while (d == dend)						    	\
5393     {									\
5394       /* End of string2 => fail.  */					\
5395       if (dend == end_match_2) 						\
5396         goto fail;							\
5397       /* End of string1 => advance to string2.  */ 			\
5398       d = string2;						        \
5399       dend = end_match_2;						\
5400     }
5401 
5402 /* Test if at very beginning or at very end of the virtual concatenation
5403    of `string1' and `string2'.  If only one string, it's `string2'.  */
5404 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5405 #define AT_STRINGS_END(d) ((d) == end2)
5406 
5407 
5408 /* Test if D points to a character which is word-constituent.  We have
5409    two special cases to check for: if past the end of string1, look at
5410    the first character in string2; and if before the beginning of
5411    string2, look at the last character in string1.  */
5412 #ifdef WCHAR
5413 /* Use internationalized API instead of SYNTAX.  */
5414 # define WORDCHAR_P(d)							\
5415   (iswalnum ((wint_t)((d) == end1 ? *string2				\
5416            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
5417    || ((d) == end1 ? *string2						\
5418        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5419 #else /* BYTE */
5420 # define WORDCHAR_P(d)							\
5421   (SYNTAX ((d) == end1 ? *string2					\
5422            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5423    == Sword)
5424 #endif /* WCHAR */
5425 
5426 /* Disabled due to a compiler bug -- see comment at case wordbound */
5427 #if 0
5428 /* Test if the character before D and the one at D differ with respect
5429    to being word-constituent.  */
5430 #define AT_WORD_BOUNDARY(d)						\
5431   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5432    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5433 #endif
5434 
5435 /* Free everything we malloc.  */
5436 #ifdef MATCH_MAY_ALLOCATE
5437 # ifdef WCHAR
5438 #  define FREE_VARIABLES()						\
5439   do {									\
5440     REGEX_FREE_STACK (fail_stack.stack);				\
5441     FREE_VAR (regstart);						\
5442     FREE_VAR (regend);							\
5443     FREE_VAR (old_regstart);						\
5444     FREE_VAR (old_regend);						\
5445     FREE_VAR (best_regstart);						\
5446     FREE_VAR (best_regend);						\
5447     FREE_VAR (reg_info);						\
5448     FREE_VAR (reg_dummy);						\
5449     FREE_VAR (reg_info_dummy);						\
5450     if (!cant_free_wcs_buf)						\
5451       {									\
5452         FREE_VAR (string1);						\
5453         FREE_VAR (string2);						\
5454         FREE_VAR (mbs_offset1);						\
5455         FREE_VAR (mbs_offset2);						\
5456       }									\
5457   } while (0)
5458 # else /* BYTE */
5459 #  define FREE_VARIABLES()						\
5460   do {									\
5461     REGEX_FREE_STACK (fail_stack.stack);				\
5462     FREE_VAR (regstart);						\
5463     FREE_VAR (regend);							\
5464     FREE_VAR (old_regstart);						\
5465     FREE_VAR (old_regend);						\
5466     FREE_VAR (best_regstart);						\
5467     FREE_VAR (best_regend);						\
5468     FREE_VAR (reg_info);						\
5469     FREE_VAR (reg_dummy);						\
5470     FREE_VAR (reg_info_dummy);						\
5471   } while (0)
5472 # endif /* WCHAR */
5473 #else
5474 # ifdef WCHAR
5475 #  define FREE_VARIABLES()						\
5476   do {									\
5477     if (!cant_free_wcs_buf)						\
5478       {									\
5479         FREE_VAR (string1);						\
5480         FREE_VAR (string2);						\
5481         FREE_VAR (mbs_offset1);						\
5482         FREE_VAR (mbs_offset2);						\
5483       }									\
5484   } while (0)
5485 # else /* BYTE */
5486 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5487 # endif /* WCHAR */
5488 #endif /* not MATCH_MAY_ALLOCATE */
5489 
5490 /* These values must meet several constraints.  They must not be valid
5491    register values; since we have a limit of 255 registers (because
5492    we use only one byte in the pattern for the register number), we can
5493    use numbers larger than 255.  They must differ by 1, because of
5494    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5495    be larger than the value for the highest register, so we do not try
5496    to actually save any registers when none are active.  */
5497 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5498 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5499 
5500 #else /* not INSIDE_RECURSION */
5501 /* Matching routines.  */
5502 
5503 #ifndef emacs   /* Emacs never uses this.  */
5504 /* re_match is like re_match_2 except it takes only a single string.  */
5505 
5506 int
5507 re_match (bufp, string, size, pos, regs)
5508      struct re_pattern_buffer *bufp;
5509      const char *string;
5510      int size, pos;
5511      struct re_registers *regs;
5512 {
5513   int result;
5514 # ifdef MBS_SUPPORT
5515   if (MB_CUR_MAX != 1)
5516     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5517 				      pos, regs, size,
5518 				      NULL, 0, NULL, 0, NULL, NULL);
5519   else
5520 # endif
5521     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5522 				  pos, regs, size);
5523 # ifndef REGEX_MALLOC
5524 #  ifdef C_ALLOCA
5525   alloca (0);
5526 #  endif
5527 # endif
5528   return result;
5529 }
5530 # ifdef _LIBC
5531 weak_alias (__re_match, re_match)
5532 # endif
5533 #endif /* not emacs */
5534 
5535 #endif /* not INSIDE_RECURSION */
5536 
5537 #ifdef INSIDE_RECURSION
5538 static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5539 						    UCHAR_T *end,
5540 					PREFIX(register_info_type) *reg_info));
5541 static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
5542 						  UCHAR_T *end,
5543 					PREFIX(register_info_type) *reg_info));
5544 static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5545 							UCHAR_T *end,
5546 					PREFIX(register_info_type) *reg_info));
5547 static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
5548 				     int len, char *translate));
5549 #else /* not INSIDE_RECURSION */
5550 
5551 /* re_match_2 matches the compiled pattern in BUFP against the
5552    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5553    and SIZE2, respectively).  We start matching at POS, and stop
5554    matching at STOP.
5555 
5556    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5557    store offsets for the substring each group matched in REGS.  See the
5558    documentation for exactly how many groups we fill.
5559 
5560    We return -1 if no match, -2 if an internal error (such as the
5561    failure stack overflowing).  Otherwise, we return the length of the
5562    matched substring.  */
5563 
5564 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)5565 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5566      struct re_pattern_buffer *bufp;
5567      const char *string1, *string2;
5568      int size1, size2;
5569      int pos;
5570      struct re_registers *regs;
5571      int stop;
5572 {
5573   int result;
5574 # ifdef MBS_SUPPORT
5575   if (MB_CUR_MAX != 1)
5576     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5577 				      pos, regs, stop,
5578 				      NULL, 0, NULL, 0, NULL, NULL);
5579   else
5580 # endif
5581     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5582 				  pos, regs, stop);
5583 
5584 #ifndef REGEX_MALLOC
5585 # ifdef C_ALLOCA
5586   alloca (0);
5587 # endif
5588 #endif
5589   return result;
5590 }
5591 #ifdef _LIBC
5592 weak_alias (__re_match_2, re_match_2)
5593 #endif
5594 
5595 #endif /* not INSIDE_RECURSION */
5596 
5597 #ifdef INSIDE_RECURSION
5598 
5599 #ifdef WCHAR
5600 static int count_mbs_length PARAMS ((int *, int));
5601 
5602 /* This check the substring (from 0, to length) of the multibyte string,
5603    to which offset_buffer correspond. And count how many wchar_t_characters
5604    the substring occupy. We use offset_buffer to optimization.
5605    See convert_mbs_to_wcs.  */
5606 
5607 static int
count_mbs_length(offset_buffer,length)5608 count_mbs_length(offset_buffer, length)
5609      int *offset_buffer;
5610      int length;
5611 {
5612   int upper, lower;
5613 
5614   /* Check whether the size is valid.  */
5615   if (length < 0)
5616     return -1;
5617 
5618   if (offset_buffer == NULL)
5619     return 0;
5620 
5621   /* If there are no multibyte character, offset_buffer[i] == i.
5622    Optmize for this case.  */
5623   if (offset_buffer[length] == length)
5624     return length;
5625 
5626   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
5627   upper = length;
5628   lower = 0;
5629 
5630   while (true)
5631     {
5632       int middle = (lower + upper) / 2;
5633       if (middle == lower || middle == upper)
5634 	break;
5635       if (offset_buffer[middle] > length)
5636 	upper = middle;
5637       else if (offset_buffer[middle] < length)
5638 	lower = middle;
5639       else
5640 	return middle;
5641     }
5642 
5643   return -1;
5644 }
5645 #endif /* WCHAR */
5646 
5647 /* This is a separate function so that we can force an alloca cleanup
5648    afterwards.  */
5649 #ifdef WCHAR
5650 static int
wcs_re_match_2_internal(bufp,cstring1,csize1,cstring2,csize2,pos,regs,stop,string1,size1,string2,size2,mbs_offset1,mbs_offset2)5651 wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
5652 			 regs, stop, string1, size1, string2, size2,
5653 			 mbs_offset1, mbs_offset2)
5654      struct re_pattern_buffer *bufp;
5655      const char *cstring1, *cstring2;
5656      int csize1, csize2;
5657      int pos;
5658      struct re_registers *regs;
5659      int stop;
5660      /* string1 == string2 == NULL means string1/2, size1/2 and
5661 	mbs_offset1/2 need seting up in this function.  */
5662      /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5663      wchar_t *string1, *string2;
5664      /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5665      int size1, size2;
5666      /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5667      int *mbs_offset1, *mbs_offset2;
5668 #else /* BYTE */
5669 static int
5670 byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
5671 			  regs, stop)
5672      struct re_pattern_buffer *bufp;
5673      const char *string1, *string2;
5674      int size1, size2;
5675      int pos;
5676      struct re_registers *regs;
5677      int stop;
5678 #endif /* BYTE */
5679 {
5680   /* General temporaries.  */
5681   int mcnt;
5682   UCHAR_T *p1;
5683 #ifdef WCHAR
5684   /* They hold whether each wchar_t is binary data or not.  */
5685   char *is_binary = NULL;
5686   /* If true, we can't free string1/2, mbs_offset1/2.  */
5687   int cant_free_wcs_buf = 1;
5688 #endif /* WCHAR */
5689 
5690   /* Just past the end of the corresponding string.  */
5691   const CHAR_T *end1, *end2;
5692 
5693   /* Pointers into string1 and string2, just past the last characters in
5694      each to consider matching.  */
5695   const CHAR_T *end_match_1, *end_match_2;
5696 
5697   /* Where we are in the data, and the end of the current string.  */
5698   const CHAR_T *d, *dend;
5699 
5700   /* Where we are in the pattern, and the end of the pattern.  */
5701 #ifdef WCHAR
5702   UCHAR_T *pattern, *p;
5703   register UCHAR_T *pend;
5704 #else /* BYTE */
5705   UCHAR_T *p = bufp->buffer;
5706   register UCHAR_T *pend = p + bufp->used;
5707 #endif /* WCHAR */
5708 
5709   /* Mark the opcode just after a start_memory, so we can test for an
5710      empty subpattern when we get to the stop_memory.  */
5711   UCHAR_T *just_past_start_mem = 0;
5712 
5713   /* We use this to map every character in the string.  */
5714   RE_TRANSLATE_TYPE translate = bufp->translate;
5715 
5716   /* Failure point stack.  Each place that can handle a failure further
5717      down the line pushes a failure point on this stack.  It consists of
5718      restart, regend, and reg_info for all registers corresponding to
5719      the subexpressions we're currently inside, plus the number of such
5720      registers, and, finally, two char *'s.  The first char * is where
5721      to resume scanning the pattern; the second one is where to resume
5722      scanning the strings.  If the latter is zero, the failure point is
5723      a ``dummy''; if a failure happens and the failure point is a dummy,
5724      it gets discarded and the next next one is tried.  */
5725 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5726   PREFIX(fail_stack_type) fail_stack;
5727 #endif
5728 #ifdef DEBUG
5729   static unsigned failure_id;
5730   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5731 #endif
5732 
5733 #ifdef REL_ALLOC
5734   /* This holds the pointer to the failure stack, when
5735      it is allocated relocatably.  */
5736   fail_stack_elt_t *failure_stack_ptr;
5737 #endif
5738 
5739   /* We fill all the registers internally, independent of what we
5740      return, for use in backreferences.  The number here includes
5741      an element for register zero.  */
5742   size_t num_regs = bufp->re_nsub + 1;
5743 
5744   /* The currently active registers.  */
5745   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5746   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5747 
5748   /* Information on the contents of registers. These are pointers into
5749      the input strings; they record just what was matched (on this
5750      attempt) by a subexpression part of the pattern, that is, the
5751      regnum-th regstart pointer points to where in the pattern we began
5752      matching and the regnum-th regend points to right after where we
5753      stopped matching the regnum-th subexpression.  (The zeroth register
5754      keeps track of what the whole pattern matches.)  */
5755 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5756   const CHAR_T **regstart, **regend;
5757 #endif
5758 
5759   /* If a group that's operated upon by a repetition operator fails to
5760      match anything, then the register for its start will need to be
5761      restored because it will have been set to wherever in the string we
5762      are when we last see its open-group operator.  Similarly for a
5763      register's end.  */
5764 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5765   const CHAR_T **old_regstart, **old_regend;
5766 #endif
5767 
5768   /* The is_active field of reg_info helps us keep track of which (possibly
5769      nested) subexpressions we are currently in. The matched_something
5770      field of reg_info[reg_num] helps us tell whether or not we have
5771      matched any of the pattern so far this time through the reg_num-th
5772      subexpression.  These two fields get reset each time through any
5773      loop their register is in.  */
5774 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5775   PREFIX(register_info_type) *reg_info;
5776 #endif
5777 
5778   /* The following record the register info as found in the above
5779      variables when we find a match better than any we've seen before.
5780      This happens as we backtrack through the failure points, which in
5781      turn happens only if we have not yet matched the entire string. */
5782   unsigned best_regs_set = false;
5783 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5784   const CHAR_T **best_regstart, **best_regend;
5785 #endif
5786 
5787   /* Logically, this is `best_regend[0]'.  But we don't want to have to
5788      allocate space for that if we're not allocating space for anything
5789      else (see below).  Also, we never need info about register 0 for
5790      any of the other register vectors, and it seems rather a kludge to
5791      treat `best_regend' differently than the rest.  So we keep track of
5792      the end of the best match so far in a separate variable.  We
5793      initialize this to NULL so that when we backtrack the first time
5794      and need to test it, it's not garbage.  */
5795   const CHAR_T *match_end = NULL;
5796 
5797   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5798   int set_regs_matched_done = 0;
5799 
5800   /* Used when we pop values we don't care about.  */
5801 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5802   const CHAR_T **reg_dummy;
5803   PREFIX(register_info_type) *reg_info_dummy;
5804 #endif
5805 
5806 #ifdef DEBUG
5807   /* Counts the total number of registers pushed.  */
5808   unsigned num_regs_pushed = 0;
5809 #endif
5810 
5811   /* Definitions for state transitions.  More efficiently for gcc.  */
5812 #ifdef __GNUC__
5813 # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
5814 #  define NEXT \
5815       do								      \
5816 	{								      \
5817 	  int offset;							      \
5818 	  const void *__unbounded ptr;					      \
5819 	  offset = (p == pend						      \
5820 		    ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);   \
5821 	  ptr = &&end_of_pattern + offset;				      \
5822 	  goto *ptr;							      \
5823 	}								      \
5824       while (0)
5825 #  define REF(x) \
5826   &&label_##x - &&end_of_pattern
5827 #  define JUMP_TABLE_TYPE const int
5828 # else
5829 #  define NEXT \
5830       do								      \
5831 	{								      \
5832 	  const void *__unbounded ptr;					      \
5833 	  ptr = (p == pend ? &&end_of_pattern				      \
5834 		 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);	      \
5835 	  goto *ptr;							      \
5836 	}								      \
5837       while (0)
5838 #  define REF(x) \
5839   &&label_##x
5840 #  define JUMP_TABLE_TYPE const void *const
5841 # endif
5842 # define CASE(x) label_##x
5843   static JUMP_TABLE_TYPE jmptable[] =
5844     {
5845     REF (no_op),
5846     REF (succeed),
5847     REF (exactn),
5848 # ifdef MBS_SUPPORT
5849     REF (exactn_bin),
5850 # endif
5851     REF (anychar),
5852     REF (charset),
5853     REF (charset_not),
5854     REF (start_memory),
5855     REF (stop_memory),
5856     REF (duplicate),
5857     REF (begline),
5858     REF (endline),
5859     REF (begbuf),
5860     REF (endbuf),
5861     REF (jump),
5862     REF (jump_past_alt),
5863     REF (on_failure_jump),
5864     REF (on_failure_keep_string_jump),
5865     REF (pop_failure_jump),
5866     REF (maybe_pop_jump),
5867     REF (dummy_failure_jump),
5868     REF (push_dummy_failure),
5869     REF (succeed_n),
5870     REF (jump_n),
5871     REF (set_number_at),
5872     REF (wordchar),
5873     REF (notwordchar),
5874     REF (wordbeg),
5875     REF (wordend),
5876     REF (wordbound),
5877     REF (notwordbound)
5878 # ifdef emacs
5879     ,REF (before_dot),
5880     REF (at_dot),
5881     REF (after_dot),
5882     REF (syntaxspec),
5883     REF (notsyntaxspec)
5884 # endif
5885     };
5886 #else
5887 # define NEXT \
5888   break
5889 # define CASE(x) \
5890   case x
5891 #endif
5892 
5893   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5894 
5895   INIT_FAIL_STACK ();
5896 
5897 #ifdef MATCH_MAY_ALLOCATE
5898   /* Do not bother to initialize all the register variables if there are
5899      no groups in the pattern, as it takes a fair amount of time.  If
5900      there are groups, we include space for register 0 (the whole
5901      pattern), even though we never use it, since it simplifies the
5902      array indexing.  We should fix this.  */
5903   if (bufp->re_nsub)
5904     {
5905       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5906       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5907       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5908       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5909       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5910       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5911       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5912       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5913       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5914 
5915       if (!(regstart && regend && old_regstart && old_regend && reg_info
5916             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5917         {
5918           FREE_VARIABLES ();
5919           return -2;
5920         }
5921     }
5922   else
5923     {
5924       /* We must initialize all our variables to NULL, so that
5925          `FREE_VARIABLES' doesn't try to free them.  */
5926       regstart = regend = old_regstart = old_regend = best_regstart
5927         = best_regend = reg_dummy = NULL;
5928       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5929     }
5930 #endif /* MATCH_MAY_ALLOCATE */
5931 
5932   /* The starting position is bogus.  */
5933 #ifdef WCHAR
5934   if (pos < 0 || pos > csize1 + csize2)
5935 #else /* BYTE */
5936   if (pos < 0 || pos > size1 + size2)
5937 #endif
5938     {
5939       FREE_VARIABLES ();
5940       return -1;
5941     }
5942 
5943 #ifdef WCHAR
5944   /* Allocate wchar_t array for string1 and string2 and
5945      fill them with converted string.  */
5946   if (string1 == NULL && string2 == NULL)
5947     {
5948       /* We need seting up buffers here.  */
5949 
5950       /* We must free wcs buffers in this function.  */
5951       cant_free_wcs_buf = 0;
5952 
5953       if (csize1 != 0)
5954 	{
5955 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5956 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5957 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
5958 	  if (!string1 || !mbs_offset1 || !is_binary)
5959 	    {
5960 	      FREE_VAR (string1);
5961 	      FREE_VAR (mbs_offset1);
5962 	      FREE_VAR (is_binary);
5963 	      return -2;
5964 	    }
5965 	}
5966       if (csize2 != 0)
5967 	{
5968 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5969 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5970 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
5971 	  if (!string2 || !mbs_offset2 || !is_binary)
5972 	    {
5973 	      FREE_VAR (string1);
5974 	      FREE_VAR (mbs_offset1);
5975 	      FREE_VAR (string2);
5976 	      FREE_VAR (mbs_offset2);
5977 	      FREE_VAR (is_binary);
5978 	      return -2;
5979 	    }
5980 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5981 				     mbs_offset2, is_binary);
5982 	  string2[size2] = L'\0'; /* for a sentinel  */
5983 	  FREE_VAR (is_binary);
5984 	}
5985     }
5986 
5987   /* We need to cast pattern to (wchar_t*), because we casted this compiled
5988      pattern to (char*) in regex_compile.  */
5989   p = pattern = (CHAR_T*)bufp->buffer;
5990   pend = (CHAR_T*)(bufp->buffer + bufp->used);
5991 
5992 #endif /* WCHAR */
5993 
5994   /* Initialize subexpression text positions to -1 to mark ones that no
5995      start_memory/stop_memory has been seen for. Also initialize the
5996      register information struct.  */
5997   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5998     {
5999       regstart[mcnt] = regend[mcnt]
6000         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
6001 
6002       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
6003       IS_ACTIVE (reg_info[mcnt]) = 0;
6004       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6005       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6006     }
6007 
6008   /* We move `string1' into `string2' if the latter's empty -- but not if
6009      `string1' is null.  */
6010   if (size2 == 0 && string1 != NULL)
6011     {
6012       string2 = string1;
6013       size2 = size1;
6014       string1 = 0;
6015       size1 = 0;
6016 #ifdef WCHAR
6017       mbs_offset2 = mbs_offset1;
6018       csize2 = csize1;
6019       mbs_offset1 = NULL;
6020       csize1 = 0;
6021 #endif
6022     }
6023   end1 = string1 + size1;
6024   end2 = string2 + size2;
6025 
6026   /* Compute where to stop matching, within the two strings.  */
6027 #ifdef WCHAR
6028   if (stop <= csize1)
6029     {
6030       mcnt = count_mbs_length(mbs_offset1, stop);
6031       end_match_1 = string1 + mcnt;
6032       end_match_2 = string2;
6033     }
6034   else
6035     {
6036       if (stop > csize1 + csize2)
6037 	stop = csize1 + csize2;
6038       end_match_1 = end1;
6039       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
6040       end_match_2 = string2 + mcnt;
6041     }
6042   if (mcnt < 0)
6043     { /* count_mbs_length return error.  */
6044       FREE_VARIABLES ();
6045       return -1;
6046     }
6047 #else
6048   if (stop <= size1)
6049     {
6050       end_match_1 = string1 + stop;
6051       end_match_2 = string2;
6052     }
6053   else
6054     {
6055       end_match_1 = end1;
6056       end_match_2 = string2 + stop - size1;
6057     }
6058 #endif /* WCHAR */
6059 
6060   /* `p' scans through the pattern as `d' scans through the data.
6061      `dend' is the end of the input string that `d' points within.  `d'
6062      is advanced into the following input string whenever necessary, but
6063      this happens before fetching; therefore, at the beginning of the
6064      loop, `d' can be pointing at the end of a string, but it cannot
6065      equal `string2'.  */
6066 #ifdef WCHAR
6067   if (size1 > 0 && pos <= csize1)
6068     {
6069       mcnt = count_mbs_length(mbs_offset1, pos);
6070       d = string1 + mcnt;
6071       dend = end_match_1;
6072     }
6073   else
6074     {
6075       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
6076       d = string2 + mcnt;
6077       dend = end_match_2;
6078     }
6079 
6080   if (mcnt < 0)
6081     { /* count_mbs_length return error.  */
6082       FREE_VARIABLES ();
6083       return -1;
6084     }
6085 #else
6086   if (size1 > 0 && pos <= size1)
6087     {
6088       d = string1 + pos;
6089       dend = end_match_1;
6090     }
6091   else
6092     {
6093       d = string2 + pos - size1;
6094       dend = end_match_2;
6095     }
6096 #endif /* WCHAR */
6097 
6098   DEBUG_PRINT1 ("The compiled pattern is:\n");
6099   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
6100   DEBUG_PRINT1 ("The string to match is: `");
6101   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
6102   DEBUG_PRINT1 ("'\n");
6103 
6104   /* This loops over pattern commands.  It exits by returning from the
6105      function if the match is complete, or it drops through if the match
6106      fails at this starting point in the input data.  */
6107   for (;;)
6108     {
6109 #ifdef _LIBC
6110       DEBUG_PRINT2 ("\n%p: ", p);
6111 #else
6112       DEBUG_PRINT2 ("\n0x%x: ", p);
6113 #endif
6114 
6115 #ifdef __GNUC__
6116       NEXT;
6117 #else
6118       if (p == pend)
6119 #endif
6120 	{
6121 #ifdef __GNUC__
6122 	end_of_pattern:
6123 #endif
6124 	  /* End of pattern means we might have succeeded.  */
6125 	  DEBUG_PRINT1 ("end of pattern ... ");
6126 
6127 	  /* If we haven't matched the entire string, and we want the
6128 	     longest match, try backtracking.  */
6129 	  if (d != end_match_2)
6130 	    {
6131 	      /* 1 if this match ends in the same string (string1 or string2)
6132 		 as the best previous match.  */
6133 	      boolean same_str_p = (FIRST_STRING_P (match_end)
6134 				    == MATCHING_IN_FIRST_STRING);
6135 	      /* 1 if this match is the best seen so far.  */
6136 	      boolean best_match_p;
6137 
6138 	      /* AIX compiler got confused when this was combined
6139 		 with the previous declaration.  */
6140 	      if (same_str_p)
6141 		best_match_p = d > match_end;
6142 	      else
6143 		best_match_p = !MATCHING_IN_FIRST_STRING;
6144 
6145 	      DEBUG_PRINT1 ("backtracking.\n");
6146 
6147 	      if (!FAIL_STACK_EMPTY ())
6148 		{ /* More failure points to try.  */
6149 
6150 		  /* If exceeds best match so far, save it.  */
6151 		  if (!best_regs_set || best_match_p)
6152 		    {
6153 		      best_regs_set = true;
6154 		      match_end = d;
6155 
6156 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
6157 
6158 		      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6159 			{
6160 			  best_regstart[mcnt] = regstart[mcnt];
6161 			  best_regend[mcnt] = regend[mcnt];
6162 			}
6163 		    }
6164 		  goto fail;
6165 		}
6166 
6167 	      /* If no failure points, don't restore garbage.  And if
6168 		 last match is real best match, don't restore second
6169 		 best one. */
6170 	      else if (best_regs_set && !best_match_p)
6171 		{
6172 		restore_best_regs:
6173 		  /* Restore best match.  It may happen that `dend ==
6174 		     end_match_1' while the restored d is in string2.
6175 		     For example, the pattern `x.*y.*z' against the
6176 		     strings `x-' and `y-z-', if the two strings are
6177 		     not consecutive in memory.  */
6178 		  DEBUG_PRINT1 ("Restoring best registers.\n");
6179 
6180 		  d = match_end;
6181 		  dend = ((d >= string1 && d <= end1)
6182 			  ? end_match_1 : end_match_2);
6183 
6184 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6185 		    {
6186 		      regstart[mcnt] = best_regstart[mcnt];
6187 		      regend[mcnt] = best_regend[mcnt];
6188 		    }
6189 		}
6190 	    } /* d != end_match_2 */
6191 
6192 	succeed_label:
6193 	  DEBUG_PRINT1 ("Accepting match.\n");
6194 	  /* If caller wants register contents data back, do it.  */
6195 	  if (regs && !bufp->no_sub)
6196 	    {
6197 	      /* Have the register data arrays been allocated?  */
6198 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
6199 		{ /* No.  So allocate them with malloc.  We need one
6200 		     extra element beyond `num_regs' for the `-1' marker
6201 		     GNU code uses.  */
6202 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
6203 		  regs->start = TALLOC (regs->num_regs, regoff_t);
6204 		  regs->end = TALLOC (regs->num_regs, regoff_t);
6205 		  if (regs->start == NULL || regs->end == NULL)
6206 		    {
6207 		      FREE_VARIABLES ();
6208 		      return -2;
6209 		    }
6210 		  bufp->regs_allocated = REGS_REALLOCATE;
6211 		}
6212 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
6213 		{ /* Yes.  If we need more elements than were already
6214 		     allocated, reallocate them.  If we need fewer, just
6215 		     leave it alone.  */
6216 		  if (regs->num_regs < num_regs + 1)
6217 		    {
6218 		      regs->num_regs = num_regs + 1;
6219 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
6220 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
6221 		      if (regs->start == NULL || regs->end == NULL)
6222 			{
6223 			  FREE_VARIABLES ();
6224 			  return -2;
6225 			}
6226 		    }
6227 		}
6228 	      else
6229 		{
6230 		  /* These braces fend off a "empty body in an else-statement"
6231 		     warning under GCC when assert expands to nothing.  */
6232 		  assert (bufp->regs_allocated == REGS_FIXED);
6233 		}
6234 
6235 	      /* Convert the pointer data in `regstart' and `regend' to
6236 		 indices.  Register zero has to be set differently,
6237 		 since we haven't kept track of any info for it.  */
6238 	      if (regs->num_regs > 0)
6239 		{
6240 		  regs->start[0] = pos;
6241 #ifdef WCHAR
6242 		  if (MATCHING_IN_FIRST_STRING)
6243 		    regs->end[0] = (mbs_offset1 != NULL ?
6244 				    mbs_offset1[d-string1] : 0);
6245 		  else
6246 		    regs->end[0] = csize1 + (mbs_offset2 != NULL
6247 					     ? mbs_offset2[d-string2] : 0);
6248 #else
6249 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
6250 				  ? ((regoff_t) (d - string1))
6251 				  : ((regoff_t) (d - string2 + size1)));
6252 #endif /* WCHAR */
6253 		}
6254 
6255 	      /* Go through the first `min (num_regs, regs->num_regs)'
6256 		 registers, since that is all we initialized.  */
6257 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6258 		   mcnt++)
6259 		{
6260 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6261 		    regs->start[mcnt] = regs->end[mcnt] = -1;
6262 		  else
6263 		    {
6264 		      regs->start[mcnt]
6265 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6266 		      regs->end[mcnt]
6267 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6268 		    }
6269 		}
6270 
6271 	      /* If the regs structure we return has more elements than
6272 		 were in the pattern, set the extra elements to -1.  If
6273 		 we (re)allocated the registers, this is the case,
6274 		 because we always allocate enough to have at least one
6275 		 -1 at the end.  */
6276 	      for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6277 		regs->start[mcnt] = regs->end[mcnt] = -1;
6278 	    } /* regs && !bufp->no_sub */
6279 
6280 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6281 			nfailure_points_pushed, nfailure_points_popped,
6282 			nfailure_points_pushed - nfailure_points_popped);
6283 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6284 
6285 #ifdef WCHAR
6286 	  if (MATCHING_IN_FIRST_STRING)
6287 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6288 	  else
6289 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6290 	      csize1;
6291 	  mcnt -= pos;
6292 #else
6293 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6294 			    ? string1 : string2 - size1);
6295 #endif /* WCHAR */
6296 
6297 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6298 
6299 	  FREE_VARIABLES ();
6300 	  return mcnt;
6301 	}
6302 
6303 #ifndef __GNUC__
6304       /* Otherwise match next pattern command.  */
6305       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6306 	{
6307 #endif
6308         /* Ignore these.  Used to ignore the n of succeed_n's which
6309            currently have n == 0.  */
6310         CASE (no_op):
6311           DEBUG_PRINT1 ("EXECUTING no_op.\n");
6312           NEXT;
6313 
6314 	CASE (succeed):
6315           DEBUG_PRINT1 ("EXECUTING succeed.\n");
6316 	  goto succeed_label;
6317 
6318         /* Match the next n pattern characters exactly.  The following
6319            byte in the pattern defines n, and the n bytes after that
6320            are the characters to match.  */
6321 	CASE (exactn):
6322 #ifdef MBS_SUPPORT
6323 	CASE (exactn_bin):
6324 #endif
6325 	  mcnt = *p++;
6326           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6327 
6328           /* This is written out as an if-else so we don't waste time
6329              testing `translate' inside the loop.  */
6330           if (translate)
6331 	    {
6332 	      do
6333 		{
6334 		  PREFETCH ();
6335 #ifdef WCHAR
6336 		  if (*d <= 0xff)
6337 		    {
6338 		      if ((UCHAR_T) translate[(unsigned char) *d++]
6339 			  != (UCHAR_T) *p++)
6340 			goto fail;
6341 		    }
6342 		  else
6343 		    {
6344 		      if (*d++ != (CHAR_T) *p++)
6345 			goto fail;
6346 		    }
6347 #else
6348 		  if ((UCHAR_T) translate[(unsigned char) *d++]
6349 		      != (UCHAR_T) *p++)
6350                     goto fail;
6351 #endif /* WCHAR */
6352 		}
6353 	      while (--mcnt);
6354 	    }
6355 	  else
6356 	    {
6357 	      do
6358 		{
6359 		  PREFETCH ();
6360 		  if (*d++ != (CHAR_T) *p++) goto fail;
6361 		}
6362 	      while (--mcnt);
6363 	    }
6364 	  SET_REGS_MATCHED ();
6365           NEXT;
6366 
6367 
6368         /* Match any character except possibly a newline or a null.  */
6369 	CASE (anychar):
6370           DEBUG_PRINT1 ("EXECUTING anychar.\n");
6371 
6372           PREFETCH ();
6373 
6374           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6375               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6376 	    goto fail;
6377 
6378           SET_REGS_MATCHED ();
6379           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
6380           d++;
6381 	  NEXT;
6382 
6383 
6384 	CASE (charset):
6385 	CASE (charset_not):
6386 	  {
6387 	    register UCHAR_T c;
6388 #ifdef WCHAR
6389 	    unsigned int i, char_class_length, coll_symbol_length,
6390               equiv_class_length, ranges_length, chars_length, length;
6391 	    CHAR_T *workp, *workp2, *charset_top;
6392 #define WORK_BUFFER_SIZE 128
6393             CHAR_T str_buf[WORK_BUFFER_SIZE];
6394 # ifdef _LIBC
6395 	    uint32_t nrules;
6396 # endif /* _LIBC */
6397 #endif /* WCHAR */
6398 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
6399 
6400             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
6401 	    PREFETCH ();
6402 	    c = TRANSLATE (*d); /* The character to match.  */
6403 #ifdef WCHAR
6404 # ifdef _LIBC
6405 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6406 # endif /* _LIBC */
6407 	    charset_top = p - 1;
6408 	    char_class_length = *p++;
6409 	    coll_symbol_length = *p++;
6410 	    equiv_class_length = *p++;
6411 	    ranges_length = *p++;
6412 	    chars_length = *p++;
6413 	    /* p points charset[6], so the address of the next instruction
6414 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6415 	       where l=length of char_classes, m=length of collating_symbol,
6416 	       n=equivalence_class, o=length of char_range,
6417 	       p'=length of character.  */
6418 	    workp = p;
6419 	    /* Update p to indicate the next instruction.  */
6420 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
6421               2*ranges_length + chars_length;
6422 
6423             /* match with char_class?  */
6424 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6425 	      {
6426 		wctype_t wctype;
6427 		uintptr_t alignedp = ((uintptr_t)workp
6428 				      + __alignof__(wctype_t) - 1)
6429 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6430 		wctype = *((wctype_t*)alignedp);
6431 		workp += CHAR_CLASS_SIZE;
6432 		if (iswctype((wint_t)c, wctype))
6433 		  goto char_set_matched;
6434 	      }
6435 
6436             /* match with collating_symbol?  */
6437 # ifdef _LIBC
6438 	    if (nrules != 0)
6439 	      {
6440 		const unsigned char *extra = (const unsigned char *)
6441 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6442 
6443 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6444 		     workp++)
6445 		  {
6446 		    int32_t *wextra;
6447 		    wextra = (int32_t*)(extra + *workp++);
6448 		    for (i = 0; i < *wextra; ++i)
6449 		      if (TRANSLATE(d[i]) != wextra[1 + i])
6450 			break;
6451 
6452 		    if (i == *wextra)
6453 		      {
6454 			/* Update d, however d will be incremented at
6455 			   char_set_matched:, we decrement d here.  */
6456 			d += i - 1;
6457 			goto char_set_matched;
6458 		      }
6459 		  }
6460 	      }
6461 	    else /* (nrules == 0) */
6462 # endif
6463 	      /* If we can't look up collation data, we use wcscoll
6464 		 instead.  */
6465 	      {
6466 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6467 		  {
6468 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6469 		    length = wcslen (workp);
6470 
6471 		    /* If wcscoll(the collating symbol, whole string) > 0,
6472 		       any substring of the string never match with the
6473 		       collating symbol.  */
6474 		    if (wcscoll (workp, d) > 0)
6475 		      {
6476 			workp += length + 1;
6477 			continue;
6478 		      }
6479 
6480 		    /* First, we compare the collating symbol with
6481 		       the first character of the string.
6482 		       If it don't match, we add the next character to
6483 		       the compare buffer in turn.  */
6484 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6485 		      {
6486 			int match;
6487 			if (d == dend)
6488 			  {
6489 			    if (dend == end_match_2)
6490 			      break;
6491 			    d = string2;
6492 			    dend = end_match_2;
6493 			  }
6494 
6495 			/* add next character to the compare buffer.  */
6496 			str_buf[i] = TRANSLATE(*d);
6497 			str_buf[i+1] = '\0';
6498 
6499 			match = wcscoll (workp, str_buf);
6500 			if (match == 0)
6501 			  goto char_set_matched;
6502 
6503 			if (match < 0)
6504 			  /* (str_buf > workp) indicate (str_buf + X > workp),
6505 			     because for all X (str_buf + X > str_buf).
6506 			     So we don't need continue this loop.  */
6507 			  break;
6508 
6509 			/* Otherwise(str_buf < workp),
6510 			   (str_buf+next_character) may equals (workp).
6511 			   So we continue this loop.  */
6512 		      }
6513 		    /* not matched */
6514 		    d = backup_d;
6515 		    dend = backup_dend;
6516 		    workp += length + 1;
6517 		  }
6518               }
6519             /* match with equivalence_class?  */
6520 # ifdef _LIBC
6521 	    if (nrules != 0)
6522 	      {
6523                 const CHAR_T *backup_d = d, *backup_dend = dend;
6524 		/* Try to match the equivalence class against
6525 		   those known to the collate implementation.  */
6526 		const int32_t *table;
6527 		const int32_t *weights;
6528 		const int32_t *extra;
6529 		const int32_t *indirect;
6530 		int32_t idx, idx2;
6531 		wint_t *cp;
6532 		size_t len;
6533 
6534 		/* This #include defines a local function!  */
6535 #  include <locale/weightwc.h>
6536 
6537 		table = (const int32_t *)
6538 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6539 		weights = (const wint_t *)
6540 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6541 		extra = (const wint_t *)
6542 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6543 		indirect = (const int32_t *)
6544 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6545 
6546 		/* Write 1 collating element to str_buf, and
6547 		   get its index.  */
6548 		idx2 = 0;
6549 
6550 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6551 		  {
6552 		    cp = (wint_t*)str_buf;
6553 		    if (d == dend)
6554 		      {
6555 			if (dend == end_match_2)
6556 			  break;
6557 			d = string2;
6558 			dend = end_match_2;
6559 		      }
6560 		    str_buf[i] = TRANSLATE(*(d+i));
6561 		    str_buf[i+1] = '\0'; /* sentinel */
6562 		    idx2 = findidx ((const wint_t**)&cp);
6563 		  }
6564 
6565 		/* Update d, however d will be incremented at
6566 		   char_set_matched:, we decrement d here.  */
6567 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6568 		if (d >= dend)
6569 		  {
6570 		    if (dend == end_match_2)
6571 			d = dend;
6572 		    else
6573 		      {
6574 			d = string2;
6575 			dend = end_match_2;
6576 		      }
6577 		  }
6578 
6579 		len = weights[idx2];
6580 
6581 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6582 		     workp++)
6583 		  {
6584 		    idx = (int32_t)*workp;
6585 		    /* We already checked idx != 0 in regex_compile. */
6586 
6587 		    if (idx2 != 0 && len == weights[idx])
6588 		      {
6589 			int cnt = 0;
6590 			while (cnt < len && (weights[idx + 1 + cnt]
6591 					     == weights[idx2 + 1 + cnt]))
6592 			  ++cnt;
6593 
6594 			if (cnt == len)
6595 			  goto char_set_matched;
6596 		      }
6597 		  }
6598 		/* not matched */
6599                 d = backup_d;
6600                 dend = backup_dend;
6601 	      }
6602 	    else /* (nrules == 0) */
6603 # endif
6604 	      /* If we can't look up collation data, we use wcscoll
6605 		 instead.  */
6606 	      {
6607 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6608 		  {
6609 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6610 		    length = wcslen (workp);
6611 
6612 		    /* If wcscoll(the collating symbol, whole string) > 0,
6613 		       any substring of the string never match with the
6614 		       collating symbol.  */
6615 		    if (wcscoll (workp, d) > 0)
6616 		      {
6617 			workp += length + 1;
6618 			break;
6619 		      }
6620 
6621 		    /* First, we compare the equivalence class with
6622 		       the first character of the string.
6623 		       If it don't match, we add the next character to
6624 		       the compare buffer in turn.  */
6625 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6626 		      {
6627 			int match;
6628 			if (d == dend)
6629 			  {
6630 			    if (dend == end_match_2)
6631 			      break;
6632 			    d = string2;
6633 			    dend = end_match_2;
6634 			  }
6635 
6636 			/* add next character to the compare buffer.  */
6637 			str_buf[i] = TRANSLATE(*d);
6638 			str_buf[i+1] = '\0';
6639 
6640 			match = wcscoll (workp, str_buf);
6641 
6642 			if (match == 0)
6643 			  goto char_set_matched;
6644 
6645 			if (match < 0)
6646 			/* (str_buf > workp) indicate (str_buf + X > workp),
6647 			   because for all X (str_buf + X > str_buf).
6648 			   So we don't need continue this loop.  */
6649 			  break;
6650 
6651 			/* Otherwise(str_buf < workp),
6652 			   (str_buf+next_character) may equals (workp).
6653 			   So we continue this loop.  */
6654 		      }
6655 		    /* not matched */
6656 		    d = backup_d;
6657 		    dend = backup_dend;
6658 		    workp += length + 1;
6659 		  }
6660 	      }
6661 
6662             /* match with char_range?  */
6663 # ifdef _LIBC
6664 	    if (nrules != 0)
6665 	      {
6666 		uint32_t collseqval;
6667 		const char *collseq = (const char *)
6668 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6669 
6670 		collseqval = collseq_table_lookup (collseq, c);
6671 
6672 		for (; workp < p - chars_length ;)
6673 		  {
6674 		    uint32_t start_val, end_val;
6675 
6676 		    /* We already compute the collation sequence value
6677 		       of the characters (or collating symbols).  */
6678 		    start_val = (uint32_t) *workp++; /* range_start */
6679 		    end_val = (uint32_t) *workp++; /* range_end */
6680 
6681 		    if (start_val <= collseqval && collseqval <= end_val)
6682 		      goto char_set_matched;
6683 		  }
6684 	      }
6685 	    else
6686 # endif
6687 	      {
6688 		/* We set range_start_char at str_buf[0], range_end_char
6689 		   at str_buf[4], and compared char at str_buf[2].  */
6690 		str_buf[1] = 0;
6691 		str_buf[2] = c;
6692 		str_buf[3] = 0;
6693 		str_buf[5] = 0;
6694 		for (; workp < p - chars_length ;)
6695 		  {
6696 		    wchar_t *range_start_char, *range_end_char;
6697 
6698 		    /* match if (range_start_char <= c <= range_end_char).  */
6699 
6700 		    /* If range_start(or end) < 0, we assume -range_start(end)
6701 		       is the offset of the collating symbol which is specified
6702 		       as the character of the range start(end).  */
6703 
6704 		    /* range_start */
6705 		    if (*workp < 0)
6706 		      range_start_char = charset_top - (*workp++);
6707 		    else
6708 		      {
6709 			str_buf[0] = *workp++;
6710 			range_start_char = str_buf;
6711 		      }
6712 
6713 		    /* range_end */
6714 		    if (*workp < 0)
6715 		      range_end_char = charset_top - (*workp++);
6716 		    else
6717 		      {
6718 			str_buf[4] = *workp++;
6719 			range_end_char = str_buf + 4;
6720 		      }
6721 
6722 		    if (wcscoll (range_start_char, str_buf+2) <= 0
6723 			&& wcscoll (str_buf+2, range_end_char) <= 0)
6724 		      goto char_set_matched;
6725 		  }
6726 	      }
6727 
6728             /* match with char?  */
6729 	    for (; workp < p ; workp++)
6730 	      if (c == *workp)
6731 		goto char_set_matched;
6732 
6733 	    not = !not;
6734 
6735 	  char_set_matched:
6736 	    if (not) goto fail;
6737 #else
6738             /* Cast to `unsigned' instead of `unsigned char' in case the
6739                bit list is a full 32 bytes long.  */
6740 	    if (c < (unsigned) (*p * BYTEWIDTH)
6741 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6742 	      not = !not;
6743 
6744 	    p += 1 + *p;
6745 
6746 	    if (!not) goto fail;
6747 #undef WORK_BUFFER_SIZE
6748 #endif /* WCHAR */
6749 	    SET_REGS_MATCHED ();
6750             d++;
6751 	    NEXT;
6752 	  }
6753 
6754 
6755         /* The beginning of a group is represented by start_memory.
6756            The arguments are the register number in the next byte, and the
6757            number of groups inner to this one in the next.  The text
6758            matched within the group is recorded (in the internal
6759            registers data structure) under the register number.  */
6760         CASE (start_memory):
6761 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6762 			(long int) *p, (long int) p[1]);
6763 
6764           /* Find out if this group can match the empty string.  */
6765 	  p1 = p;		/* To send to group_match_null_string_p.  */
6766 
6767           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6768             REG_MATCH_NULL_STRING_P (reg_info[*p])
6769               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6770 
6771           /* Save the position in the string where we were the last time
6772              we were at this open-group operator in case the group is
6773              operated upon by a repetition operator, e.g., with `(a*)*b'
6774              against `ab'; then we want to ignore where we are now in
6775              the string in case this attempt to match fails.  */
6776           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6777                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6778                              : regstart[*p];
6779 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6780 			 POINTER_TO_OFFSET (old_regstart[*p]));
6781 
6782           regstart[*p] = d;
6783 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6784 
6785           IS_ACTIVE (reg_info[*p]) = 1;
6786           MATCHED_SOMETHING (reg_info[*p]) = 0;
6787 
6788 	  /* Clear this whenever we change the register activity status.  */
6789 	  set_regs_matched_done = 0;
6790 
6791           /* This is the new highest active register.  */
6792           highest_active_reg = *p;
6793 
6794           /* If nothing was active before, this is the new lowest active
6795              register.  */
6796           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6797             lowest_active_reg = *p;
6798 
6799           /* Move past the register number and inner group count.  */
6800           p += 2;
6801 	  just_past_start_mem = p;
6802 
6803           NEXT;
6804 
6805 
6806         /* The stop_memory opcode represents the end of a group.  Its
6807            arguments are the same as start_memory's: the register
6808            number, and the number of inner groups.  */
6809 	CASE (stop_memory):
6810 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6811 			(long int) *p, (long int) p[1]);
6812 
6813           /* We need to save the string position the last time we were at
6814              this close-group operator in case the group is operated
6815              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6816              against `aba'; then we want to ignore where we are now in
6817              the string in case this attempt to match fails.  */
6818           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6819                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
6820 			   : regend[*p];
6821 	  DEBUG_PRINT2 ("      old_regend: %d\n",
6822 			 POINTER_TO_OFFSET (old_regend[*p]));
6823 
6824           regend[*p] = d;
6825 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6826 
6827           /* This register isn't active anymore.  */
6828           IS_ACTIVE (reg_info[*p]) = 0;
6829 
6830 	  /* Clear this whenever we change the register activity status.  */
6831 	  set_regs_matched_done = 0;
6832 
6833           /* If this was the only register active, nothing is active
6834              anymore.  */
6835           if (lowest_active_reg == highest_active_reg)
6836             {
6837               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6838               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6839             }
6840           else
6841             { /* We must scan for the new highest active register, since
6842                  it isn't necessarily one less than now: consider
6843                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6844                  new highest active register is 1.  */
6845               UCHAR_T r = *p - 1;
6846               while (r > 0 && !IS_ACTIVE (reg_info[r]))
6847                 r--;
6848 
6849               /* If we end up at register zero, that means that we saved
6850                  the registers as the result of an `on_failure_jump', not
6851                  a `start_memory', and we jumped to past the innermost
6852                  `stop_memory'.  For example, in ((.)*) we save
6853                  registers 1 and 2 as a result of the *, but when we pop
6854                  back to the second ), we are at the stop_memory 1.
6855                  Thus, nothing is active.  */
6856 	      if (r == 0)
6857                 {
6858                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6859                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6860                 }
6861               else
6862                 highest_active_reg = r;
6863             }
6864 
6865           /* If just failed to match something this time around with a
6866              group that's operated on by a repetition operator, try to
6867              force exit from the ``loop'', and restore the register
6868              information for this group that we had before trying this
6869              last match.  */
6870           if ((!MATCHED_SOMETHING (reg_info[*p])
6871                || just_past_start_mem == p - 1)
6872 	      && (p + 2) < pend)
6873             {
6874               boolean is_a_jump_n = false;
6875 
6876               p1 = p + 2;
6877               mcnt = 0;
6878               switch ((re_opcode_t) *p1++)
6879                 {
6880                   case jump_n:
6881 		    is_a_jump_n = true;
6882                   case pop_failure_jump:
6883 		  case maybe_pop_jump:
6884 		  case jump:
6885 		  case dummy_failure_jump:
6886                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6887 		    if (is_a_jump_n)
6888 		      p1 += OFFSET_ADDRESS_SIZE;
6889                     break;
6890 
6891                   default:
6892                     /* do nothing */ ;
6893                 }
6894 	      p1 += mcnt;
6895 
6896               /* If the next operation is a jump backwards in the pattern
6897 	         to an on_failure_jump right before the start_memory
6898                  corresponding to this stop_memory, exit from the loop
6899                  by forcing a failure after pushing on the stack the
6900                  on_failure_jump's jump in the pattern, and d.  */
6901               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6902                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6903 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6904 		{
6905                   /* If this group ever matched anything, then restore
6906                      what its registers were before trying this last
6907                      failed match, e.g., with `(a*)*b' against `ab' for
6908                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
6909                      against `aba' for regend[3].
6910 
6911                      Also restore the registers for inner groups for,
6912                      e.g., `((a*)(b*))*' against `aba' (register 3 would
6913                      otherwise get trashed).  */
6914 
6915                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6916 		    {
6917 		      unsigned r;
6918 
6919                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6920 
6921 		      /* Restore this and inner groups' (if any) registers.  */
6922                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6923 			   r++)
6924                         {
6925                           regstart[r] = old_regstart[r];
6926 
6927                           /* xx why this test?  */
6928                           if (old_regend[r] >= regstart[r])
6929                             regend[r] = old_regend[r];
6930                         }
6931                     }
6932 		  p1++;
6933                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6934                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6935 
6936                   goto fail;
6937                 }
6938             }
6939 
6940           /* Move past the register number and the inner group count.  */
6941           p += 2;
6942           NEXT;
6943 
6944 
6945 	/* \<digit> has been turned into a `duplicate' command which is
6946            followed by the numeric value of <digit> as the register number.  */
6947         CASE (duplicate):
6948 	  {
6949 	    register const CHAR_T *d2, *dend2;
6950 	    int regno = *p++;   /* Get which register to match against.  */
6951 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6952 
6953 	    /* Can't back reference a group which we've never matched.  */
6954             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6955               goto fail;
6956 
6957             /* Where in input to try to start matching.  */
6958             d2 = regstart[regno];
6959 
6960             /* Where to stop matching; if both the place to start and
6961                the place to stop matching are in the same string, then
6962                set to the place to stop, otherwise, for now have to use
6963                the end of the first string.  */
6964 
6965             dend2 = ((FIRST_STRING_P (regstart[regno])
6966 		      == FIRST_STRING_P (regend[regno]))
6967 		     ? regend[regno] : end_match_1);
6968 	    for (;;)
6969 	      {
6970 		/* If necessary, advance to next segment in register
6971                    contents.  */
6972 		while (d2 == dend2)
6973 		  {
6974 		    if (dend2 == end_match_2) break;
6975 		    if (dend2 == regend[regno]) break;
6976 
6977                     /* End of string1 => advance to string2. */
6978                     d2 = string2;
6979                     dend2 = regend[regno];
6980 		  }
6981 		/* At end of register contents => success */
6982 		if (d2 == dend2) break;
6983 
6984 		/* If necessary, advance to next segment in data.  */
6985 		PREFETCH ();
6986 
6987 		/* How many characters left in this segment to match.  */
6988 		mcnt = dend - d;
6989 
6990 		/* Want how many consecutive characters we can match in
6991                    one shot, so, if necessary, adjust the count.  */
6992                 if (mcnt > dend2 - d2)
6993 		  mcnt = dend2 - d2;
6994 
6995 		/* Compare that many; failure if mismatch, else move
6996                    past them.  */
6997 		if (translate
6998                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6999                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
7000 		  goto fail;
7001 		d += mcnt, d2 += mcnt;
7002 
7003 		/* Do this because we've match some characters.  */
7004 		SET_REGS_MATCHED ();
7005 	      }
7006 	  }
7007 	  NEXT;
7008 
7009 
7010         /* begline matches the empty string at the beginning of the string
7011            (unless `not_bol' is set in `bufp'), and, if
7012            `newline_anchor' is set, after newlines.  */
7013 	CASE (begline):
7014           DEBUG_PRINT1 ("EXECUTING begline.\n");
7015 
7016           if (AT_STRINGS_BEG (d))
7017             {
7018               if (!bufp->not_bol)
7019 		{
7020 		  NEXT;
7021 		}
7022             }
7023           else if (d[-1] == '\n' && bufp->newline_anchor)
7024             {
7025               NEXT;
7026             }
7027           /* In all other cases, we fail.  */
7028           goto fail;
7029 
7030 
7031         /* endline is the dual of begline.  */
7032 	CASE (endline):
7033           DEBUG_PRINT1 ("EXECUTING endline.\n");
7034 
7035           if (AT_STRINGS_END (d))
7036             {
7037               if (!bufp->not_eol)
7038 		{
7039 		  NEXT;
7040 		}
7041             }
7042 
7043           /* We have to ``prefetch'' the next character.  */
7044           else if ((d == end1 ? *string2 : *d) == '\n'
7045                    && bufp->newline_anchor)
7046             {
7047               NEXT;
7048             }
7049           goto fail;
7050 
7051 
7052 	/* Match at the very beginning of the data.  */
7053         CASE (begbuf):
7054           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
7055           if (AT_STRINGS_BEG (d))
7056 	    {
7057 	      NEXT;
7058 	    }
7059           goto fail;
7060 
7061 
7062 	/* Match at the very end of the data.  */
7063         CASE (endbuf):
7064           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
7065 	  if (AT_STRINGS_END (d))
7066 	    {
7067 	      NEXT;
7068 	    }
7069           goto fail;
7070 
7071 
7072         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
7073            pushes NULL as the value for the string on the stack.  Then
7074            `pop_failure_point' will keep the current value for the
7075            string, instead of restoring it.  To see why, consider
7076            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
7077            then the . fails against the \n.  But the next thing we want
7078            to do is match the \n against the \n; if we restored the
7079            string value, we would be back at the foo.
7080 
7081            Because this is used only in specific cases, we don't need to
7082            check all the things that `on_failure_jump' does, to make
7083            sure the right things get saved on the stack.  Hence we don't
7084            share its code.  The only reason to push anything on the
7085            stack at all is that otherwise we would have to change
7086            `anychar's code to do something besides goto fail in this
7087            case; that seems worse than this.  */
7088         CASE (on_failure_keep_string_jump):
7089           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
7090 
7091           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7092 #ifdef _LIBC
7093           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
7094 #else
7095           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
7096 #endif
7097 
7098           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
7099           NEXT;
7100 
7101 
7102 	/* Uses of on_failure_jump:
7103 
7104            Each alternative starts with an on_failure_jump that points
7105            to the beginning of the next alternative.  Each alternative
7106            except the last ends with a jump that in effect jumps past
7107            the rest of the alternatives.  (They really jump to the
7108            ending jump of the following alternative, because tensioning
7109            these jumps is a hassle.)
7110 
7111            Repeats start with an on_failure_jump that points past both
7112            the repetition text and either the following jump or
7113            pop_failure_jump back to this on_failure_jump.  */
7114 	CASE (on_failure_jump):
7115         on_failure:
7116           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
7117 
7118           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7119 #ifdef _LIBC
7120           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
7121 #else
7122           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
7123 #endif
7124 
7125           /* If this on_failure_jump comes right before a group (i.e.,
7126              the original * applied to a group), save the information
7127              for that group and all inner ones, so that if we fail back
7128              to this point, the group's information will be correct.
7129              For example, in \(a*\)*\1, we need the preceding group,
7130              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
7131 
7132           /* We can't use `p' to check ahead because we push
7133              a failure point to `p + mcnt' after we do this.  */
7134           p1 = p;
7135 
7136           /* We need to skip no_op's before we look for the
7137              start_memory in case this on_failure_jump is happening as
7138              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
7139              against aba.  */
7140           while (p1 < pend && (re_opcode_t) *p1 == no_op)
7141             p1++;
7142 
7143           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
7144             {
7145               /* We have a new highest active register now.  This will
7146                  get reset at the start_memory we are about to get to,
7147                  but we will have saved all the registers relevant to
7148                  this repetition op, as described above.  */
7149               highest_active_reg = *(p1 + 1) + *(p1 + 2);
7150               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
7151                 lowest_active_reg = *(p1 + 1);
7152             }
7153 
7154           DEBUG_PRINT1 (":\n");
7155           PUSH_FAILURE_POINT (p + mcnt, d, -2);
7156           NEXT;
7157 
7158 
7159         /* A smart repeat ends with `maybe_pop_jump'.
7160 	   We change it to either `pop_failure_jump' or `jump'.  */
7161         CASE (maybe_pop_jump):
7162           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7163           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
7164           {
7165 	    register UCHAR_T *p2 = p;
7166 
7167             /* Compare the beginning of the repeat with what in the
7168                pattern follows its end. If we can establish that there
7169                is nothing that they would both match, i.e., that we
7170                would have to backtrack because of (as in, e.g., `a*a')
7171                then we can change to pop_failure_jump, because we'll
7172                never have to backtrack.
7173 
7174                This is not true in the case of alternatives: in
7175                `(a|ab)*' we do need to backtrack to the `ab' alternative
7176                (e.g., if the string was `ab').  But instead of trying to
7177                detect that here, the alternative has put on a dummy
7178                failure point which is what we will end up popping.  */
7179 
7180 	    /* Skip over open/close-group commands.
7181 	       If what follows this loop is a ...+ construct,
7182 	       look at what begins its body, since we will have to
7183 	       match at least one of that.  */
7184 	    while (1)
7185 	      {
7186 		if (p2 + 2 < pend
7187 		    && ((re_opcode_t) *p2 == stop_memory
7188 			|| (re_opcode_t) *p2 == start_memory))
7189 		  p2 += 3;
7190 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7191 			 && (re_opcode_t) *p2 == dummy_failure_jump)
7192 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7193 		else
7194 		  break;
7195 	      }
7196 
7197 	    p1 = p + mcnt;
7198 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7199 	       to the `maybe_finalize_jump' of this case.  Examine what
7200 	       follows.  */
7201 
7202             /* If we're at the end of the pattern, we can change.  */
7203             if (p2 == pend)
7204 	      {
7205 		/* Consider what happens when matching ":\(.*\)"
7206 		   against ":/".  I don't really understand this code
7207 		   yet.  */
7208   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7209 		  pop_failure_jump;
7210                 DEBUG_PRINT1
7211                   ("  End of pattern: change to `pop_failure_jump'.\n");
7212               }
7213 
7214             else if ((re_opcode_t) *p2 == exactn
7215 #ifdef MBS_SUPPORT
7216 		     || (re_opcode_t) *p2 == exactn_bin
7217 #endif
7218 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7219 	      {
7220 		register UCHAR_T c
7221                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7222 
7223                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7224 #ifdef MBS_SUPPORT
7225 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7226 #endif
7227 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7228                   {
7229   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7230 		      pop_failure_jump;
7231 #ifdef WCHAR
7232 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
7233 				    (wint_t) c,
7234 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7235 #else
7236 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
7237 				    (char) c,
7238 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
7239 #endif
7240                   }
7241 
7242 #ifndef WCHAR
7243 		else if ((re_opcode_t) p1[3] == charset
7244 			 || (re_opcode_t) p1[3] == charset_not)
7245 		  {
7246 		    int not = (re_opcode_t) p1[3] == charset_not;
7247 
7248 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
7249 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7250 		      not = !not;
7251 
7252                     /* `not' is equal to 1 if c would match, which means
7253                         that we can't change to pop_failure_jump.  */
7254 		    if (!not)
7255                       {
7256   		        p[-3] = (unsigned char) pop_failure_jump;
7257                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7258                       }
7259 		  }
7260 #endif /* not WCHAR */
7261 	      }
7262 #ifndef WCHAR
7263             else if ((re_opcode_t) *p2 == charset)
7264 	      {
7265 		/* We win if the first character of the loop is not part
7266                    of the charset.  */
7267                 if ((re_opcode_t) p1[3] == exactn
7268  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7269  			  && (p2[2 + p1[5] / BYTEWIDTH]
7270  			      & (1 << (p1[5] % BYTEWIDTH)))))
7271 		  {
7272 		    p[-3] = (unsigned char) pop_failure_jump;
7273 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7274                   }
7275 
7276 		else if ((re_opcode_t) p1[3] == charset_not)
7277 		  {
7278 		    int idx;
7279 		    /* We win if the charset_not inside the loop
7280 		       lists every character listed in the charset after.  */
7281 		    for (idx = 0; idx < (int) p2[1]; idx++)
7282 		      if (! (p2[2 + idx] == 0
7283 			     || (idx < (int) p1[4]
7284 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7285 			break;
7286 
7287 		    if (idx == p2[1])
7288                       {
7289   		        p[-3] = (unsigned char) pop_failure_jump;
7290                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7291                       }
7292 		  }
7293 		else if ((re_opcode_t) p1[3] == charset)
7294 		  {
7295 		    int idx;
7296 		    /* We win if the charset inside the loop
7297 		       has no overlap with the one after the loop.  */
7298 		    for (idx = 0;
7299 			 idx < (int) p2[1] && idx < (int) p1[4];
7300 			 idx++)
7301 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
7302 			break;
7303 
7304 		    if (idx == p2[1] || idx == p1[4])
7305                       {
7306   		        p[-3] = (unsigned char) pop_failure_jump;
7307                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7308                       }
7309 		  }
7310 	      }
7311 #endif /* not WCHAR */
7312 	  }
7313 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
7314 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
7315 	    {
7316 	      p[-1] = (UCHAR_T) jump;
7317               DEBUG_PRINT1 ("  Match => jump.\n");
7318 	      goto unconditional_jump;
7319 	    }
7320         /* Note fall through.  */
7321 
7322 
7323 	/* The end of a simple repeat has a pop_failure_jump back to
7324            its matching on_failure_jump, where the latter will push a
7325            failure point.  The pop_failure_jump takes off failure
7326            points put on by this pop_failure_jump's matching
7327            on_failure_jump; we got through the pattern to here from the
7328            matching on_failure_jump, so didn't fail.  */
7329         CASE (pop_failure_jump):
7330           {
7331             /* We need to pass separate storage for the lowest and
7332                highest registers, even though we don't care about the
7333                actual values.  Otherwise, we will restore only one
7334                register from the stack, since lowest will == highest in
7335                `pop_failure_point'.  */
7336             active_reg_t dummy_low_reg, dummy_high_reg;
7337             UCHAR_T *pdummy = NULL;
7338             const CHAR_T *sdummy = NULL;
7339 
7340             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7341             POP_FAILURE_POINT (sdummy, pdummy,
7342                                dummy_low_reg, dummy_high_reg,
7343                                reg_dummy, reg_dummy, reg_info_dummy);
7344           }
7345 	  /* Note fall through.  */
7346 
7347 	unconditional_jump:
7348 #ifdef _LIBC
7349 	  DEBUG_PRINT2 ("\n%p: ", p);
7350 #else
7351 	  DEBUG_PRINT2 ("\n0x%x: ", p);
7352 #endif
7353           /* Note fall through.  */
7354 
7355         /* Unconditionally jump (without popping any failure points).  */
7356         CASE (jump):
7357 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
7358           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7359 	  p += mcnt;				/* Do the jump.  */
7360 #ifdef _LIBC
7361           DEBUG_PRINT2 ("(to %p).\n", p);
7362 #else
7363           DEBUG_PRINT2 ("(to 0x%x).\n", p);
7364 #endif
7365 	  NEXT;
7366 
7367 
7368         /* We need this opcode so we can detect where alternatives end
7369            in `group_match_null_string_p' et al.  */
7370         CASE (jump_past_alt):
7371           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7372           goto unconditional_jump;
7373 
7374 
7375         /* Normally, the on_failure_jump pushes a failure point, which
7376            then gets popped at pop_failure_jump.  We will end up at
7377            pop_failure_jump, also, and with a pattern of, say, `a+', we
7378            are skipping over the on_failure_jump, so we have to push
7379            something meaningless for pop_failure_jump to pop.  */
7380         CASE (dummy_failure_jump):
7381           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7382           /* It doesn't matter what we push for the string here.  What
7383              the code at `fail' tests is the value for the pattern.  */
7384           PUSH_FAILURE_POINT (NULL, NULL, -2);
7385           goto unconditional_jump;
7386 
7387 
7388         /* At the end of an alternative, we need to push a dummy failure
7389            point in case we are followed by a `pop_failure_jump', because
7390            we don't want the failure point for the alternative to be
7391            popped.  For example, matching `(a|ab)*' against `aab'
7392            requires that we match the `ab' alternative.  */
7393         CASE (push_dummy_failure):
7394           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7395           /* See comments just above at `dummy_failure_jump' about the
7396              two zeroes.  */
7397           PUSH_FAILURE_POINT (NULL, NULL, -2);
7398           NEXT;
7399 
7400         /* Have to succeed matching what follows at least n times.
7401            After that, handle like `on_failure_jump'.  */
7402         CASE (succeed_n):
7403           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7404           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7405 
7406           assert (mcnt >= 0);
7407           /* Originally, this is how many times we HAVE to succeed.  */
7408           if (mcnt > 0)
7409             {
7410                mcnt--;
7411 	       p += OFFSET_ADDRESS_SIZE;
7412                STORE_NUMBER_AND_INCR (p, mcnt);
7413 #ifdef _LIBC
7414                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7415 			     , mcnt);
7416 #else
7417                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7418 			     , mcnt);
7419 #endif
7420             }
7421 	  else if (mcnt == 0)
7422             {
7423 #ifdef _LIBC
7424               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
7425 			    p + OFFSET_ADDRESS_SIZE);
7426 #else
7427               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
7428 			    p + OFFSET_ADDRESS_SIZE);
7429 #endif /* _LIBC */
7430 
7431 #ifdef WCHAR
7432 	      p[1] = (UCHAR_T) no_op;
7433 #else
7434 	      p[2] = (UCHAR_T) no_op;
7435               p[3] = (UCHAR_T) no_op;
7436 #endif /* WCHAR */
7437               goto on_failure;
7438             }
7439           NEXT;
7440 
7441         CASE (jump_n):
7442           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7443           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7444 
7445           /* Originally, this is how many times we CAN jump.  */
7446           if (mcnt)
7447             {
7448                mcnt--;
7449                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7450 
7451 #ifdef _LIBC
7452                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7453 			     mcnt);
7454 #else
7455                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7456 			     mcnt);
7457 #endif /* _LIBC */
7458 	       goto unconditional_jump;
7459             }
7460           /* If don't have to jump any more, skip over the rest of command.  */
7461 	  else
7462 	    p += 2 * OFFSET_ADDRESS_SIZE;
7463           NEXT;
7464 
7465 	CASE (set_number_at):
7466 	  {
7467             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7468 
7469             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7470             p1 = p + mcnt;
7471             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7472 #ifdef _LIBC
7473             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7474 #else
7475             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7476 #endif
7477 	    STORE_NUMBER (p1, mcnt);
7478             NEXT;
7479           }
7480 
7481 #if 0
7482 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7483 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7484 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7485 	   macro and introducing temporary variables works around the bug.  */
7486 
7487 	CASE (wordbound):
7488 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7489 	  if (AT_WORD_BOUNDARY (d))
7490 	    {
7491 	      NEXT;
7492 	    }
7493 	  goto fail;
7494 
7495 	CASE (notwordbound):
7496 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7497 	  if (AT_WORD_BOUNDARY (d))
7498 	    goto fail;
7499 	  NEXT;
7500 #else
7501 	CASE (wordbound):
7502 	{
7503 	  boolean prevchar, thischar;
7504 
7505 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7506 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7507 	    {
7508 	      NEXT;
7509 	    }
7510 
7511 	  prevchar = WORDCHAR_P (d - 1);
7512 	  thischar = WORDCHAR_P (d);
7513 	  if (prevchar != thischar)
7514 	    {
7515 	      NEXT;
7516 	    }
7517 	  goto fail;
7518 	}
7519 
7520       CASE (notwordbound):
7521 	{
7522 	  boolean prevchar, thischar;
7523 
7524 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7525 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7526 	    goto fail;
7527 
7528 	  prevchar = WORDCHAR_P (d - 1);
7529 	  thischar = WORDCHAR_P (d);
7530 	  if (prevchar != thischar)
7531 	    goto fail;
7532 	  NEXT;
7533 	}
7534 #endif
7535 
7536 	CASE (wordbeg):
7537           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7538 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7539 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7540 	    {
7541 	      NEXT;
7542 	    }
7543           goto fail;
7544 
7545 	CASE (wordend):
7546           DEBUG_PRINT1 ("EXECUTING wordend.\n");
7547 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7548               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7549 	    {
7550 	      NEXT;
7551 	    }
7552           goto fail;
7553 
7554 #ifdef emacs
7555   	CASE (before_dot):
7556           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7557  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7558   	    goto fail;
7559   	  NEXT;
7560 
7561   	CASE (at_dot):
7562           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7563  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7564   	    goto fail;
7565   	  NEXT;
7566 
7567   	CASE (after_dot):
7568           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7569           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7570   	    goto fail;
7571   	  NEXT;
7572 
7573 	CASE (syntaxspec):
7574           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7575 	  mcnt = *p++;
7576 	  goto matchsyntax;
7577 
7578         CASE (wordchar):
7579           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7580 	  mcnt = (int) Sword;
7581         matchsyntax:
7582 	  PREFETCH ();
7583 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7584 	  d++;
7585 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7586 	    goto fail;
7587           SET_REGS_MATCHED ();
7588 	  NEXT;
7589 
7590 	CASE (notsyntaxspec):
7591           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7592 	  mcnt = *p++;
7593 	  goto matchnotsyntax;
7594 
7595         CASE (notwordchar):
7596           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7597 	  mcnt = (int) Sword;
7598         matchnotsyntax:
7599 	  PREFETCH ();
7600 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7601 	  d++;
7602 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7603 	    goto fail;
7604 	  SET_REGS_MATCHED ();
7605           NEXT;
7606 
7607 #else /* not emacs */
7608 	CASE (wordchar):
7609           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7610 	  PREFETCH ();
7611           if (!WORDCHAR_P (d))
7612             goto fail;
7613 	  SET_REGS_MATCHED ();
7614           d++;
7615 	  NEXT;
7616 
7617 	CASE (notwordchar):
7618           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7619 	  PREFETCH ();
7620 	  if (WORDCHAR_P (d))
7621             goto fail;
7622           SET_REGS_MATCHED ();
7623           d++;
7624 	  NEXT;
7625 #endif /* not emacs */
7626 
7627 #ifndef __GNUC__
7628         default:
7629           abort ();
7630 	}
7631       continue;  /* Successfully executed one pattern command; keep going.  */
7632 #endif
7633 
7634 
7635     /* We goto here if a matching operation fails. */
7636     fail:
7637       if (!FAIL_STACK_EMPTY ())
7638 	{ /* A restart point is known.  Restore to that state.  */
7639           DEBUG_PRINT1 ("\nFAIL:\n");
7640           POP_FAILURE_POINT (d, p,
7641                              lowest_active_reg, highest_active_reg,
7642                              regstart, regend, reg_info);
7643 
7644           /* If this failure point is a dummy, try the next one.  */
7645           if (!p)
7646 	    goto fail;
7647 
7648           /* If we failed to the end of the pattern, don't examine *p.  */
7649 	  assert (p <= pend);
7650           if (p < pend)
7651             {
7652               boolean is_a_jump_n = false;
7653 
7654               /* If failed to a backwards jump that's part of a repetition
7655                  loop, need to pop this failure point and use the next one.  */
7656               switch ((re_opcode_t) *p)
7657                 {
7658                 case jump_n:
7659                   is_a_jump_n = true;
7660                 case maybe_pop_jump:
7661                 case pop_failure_jump:
7662                 case jump:
7663                   p1 = p + 1;
7664                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7665                   p1 += mcnt;
7666 
7667                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7668                       || (!is_a_jump_n
7669                           && (re_opcode_t) *p1 == on_failure_jump))
7670                     goto fail;
7671                   break;
7672                 default:
7673                   /* do nothing */ ;
7674                 }
7675             }
7676 
7677           if (d >= string1 && d <= end1)
7678 	    dend = end_match_1;
7679         }
7680       else
7681         break;   /* Matching at this starting point really fails.  */
7682     } /* for (;;) */
7683 
7684   if (best_regs_set)
7685     goto restore_best_regs;
7686 
7687   FREE_VARIABLES ();
7688 
7689   return -1;         			/* Failure to match.  */
7690 } /* re_match_2 */
7691 
7692 /* Subroutine definitions for re_match_2.  */
7693 
7694 
7695 /* We are passed P pointing to a register number after a start_memory.
7696 
7697    Return true if the pattern up to the corresponding stop_memory can
7698    match the empty string, and false otherwise.
7699 
7700    If we find the matching stop_memory, sets P to point to one past its number.
7701    Otherwise, sets P to an undefined byte less than or equal to END.
7702 
7703    We don't handle duplicates properly (yet).  */
7704 
7705 static boolean
7706 PREFIX(group_match_null_string_p) (p, end, reg_info)
7707     UCHAR_T **p, *end;
PREFIX(register_info_type)7708     PREFIX(register_info_type) *reg_info;
7709 {
7710   int mcnt;
7711   /* Point to after the args to the start_memory.  */
7712   UCHAR_T *p1 = *p + 2;
7713 
7714   while (p1 < end)
7715     {
7716       /* Skip over opcodes that can match nothing, and return true or
7717 	 false, as appropriate, when we get to one that can't, or to the
7718          matching stop_memory.  */
7719 
7720       switch ((re_opcode_t) *p1)
7721         {
7722         /* Could be either a loop or a series of alternatives.  */
7723         case on_failure_jump:
7724           p1++;
7725           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7726 
7727           /* If the next operation is not a jump backwards in the
7728 	     pattern.  */
7729 
7730 	  if (mcnt >= 0)
7731 	    {
7732               /* Go through the on_failure_jumps of the alternatives,
7733                  seeing if any of the alternatives cannot match nothing.
7734                  The last alternative starts with only a jump,
7735                  whereas the rest start with on_failure_jump and end
7736                  with a jump, e.g., here is the pattern for `a|b|c':
7737 
7738                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7739                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7740                  /exactn/1/c
7741 
7742                  So, we have to first go through the first (n-1)
7743                  alternatives and then deal with the last one separately.  */
7744 
7745 
7746               /* Deal with the first (n-1) alternatives, which start
7747                  with an on_failure_jump (see above) that jumps to right
7748                  past a jump_past_alt.  */
7749 
7750               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7751 		     jump_past_alt)
7752                 {
7753                   /* `mcnt' holds how many bytes long the alternative
7754                      is, including the ending `jump_past_alt' and
7755                      its number.  */
7756 
7757                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7758 						(1 + OFFSET_ADDRESS_SIZE),
7759 						reg_info))
7760                     return false;
7761 
7762                   /* Move to right after this alternative, including the
7763 		     jump_past_alt.  */
7764                   p1 += mcnt;
7765 
7766                   /* Break if it's the beginning of an n-th alternative
7767                      that doesn't begin with an on_failure_jump.  */
7768                   if ((re_opcode_t) *p1 != on_failure_jump)
7769                     break;
7770 
7771 		  /* Still have to check that it's not an n-th
7772 		     alternative that starts with an on_failure_jump.  */
7773 		  p1++;
7774                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7775                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7776 		      jump_past_alt)
7777                     {
7778 		      /* Get to the beginning of the n-th alternative.  */
7779                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
7780                       break;
7781                     }
7782                 }
7783 
7784               /* Deal with the last alternative: go back and get number
7785                  of the `jump_past_alt' just before it.  `mcnt' contains
7786                  the length of the alternative.  */
7787               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7788 
7789               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7790                 return false;
7791 
7792               p1 += mcnt;	/* Get past the n-th alternative.  */
7793             } /* if mcnt > 0 */
7794           break;
7795 
7796 
7797         case stop_memory:
7798 	  assert (p1[1] == **p);
7799           *p = p1 + 2;
7800           return true;
7801 
7802 
7803         default:
7804           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7805             return false;
7806         }
7807     } /* while p1 < end */
7808 
7809   return false;
7810 } /* group_match_null_string_p */
7811 
7812 
7813 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7814    It expects P to be the first byte of a single alternative and END one
7815    byte past the last. The alternative can contain groups.  */
7816 
7817 static boolean
7818 PREFIX(alt_match_null_string_p) (p, end, reg_info)
7819     UCHAR_T *p, *end;
PREFIX(register_info_type)7820     PREFIX(register_info_type) *reg_info;
7821 {
7822   int mcnt;
7823   UCHAR_T *p1 = p;
7824 
7825   while (p1 < end)
7826     {
7827       /* Skip over opcodes that can match nothing, and break when we get
7828          to one that can't.  */
7829 
7830       switch ((re_opcode_t) *p1)
7831         {
7832 	/* It's a loop.  */
7833         case on_failure_jump:
7834           p1++;
7835           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7836           p1 += mcnt;
7837           break;
7838 
7839 	default:
7840           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7841             return false;
7842         }
7843     }  /* while p1 < end */
7844 
7845   return true;
7846 } /* alt_match_null_string_p */
7847 
7848 
7849 /* Deals with the ops common to group_match_null_string_p and
7850    alt_match_null_string_p.
7851 
7852    Sets P to one after the op and its arguments, if any.  */
7853 
7854 static boolean
7855 PREFIX(common_op_match_null_string_p) (p, end, reg_info)
7856     UCHAR_T **p, *end;
PREFIX(register_info_type)7857     PREFIX(register_info_type) *reg_info;
7858 {
7859   int mcnt;
7860   boolean ret;
7861   int reg_no;
7862   UCHAR_T *p1 = *p;
7863 
7864   switch ((re_opcode_t) *p1++)
7865     {
7866     case no_op:
7867     case begline:
7868     case endline:
7869     case begbuf:
7870     case endbuf:
7871     case wordbeg:
7872     case wordend:
7873     case wordbound:
7874     case notwordbound:
7875 #ifdef emacs
7876     case before_dot:
7877     case at_dot:
7878     case after_dot:
7879 #endif
7880       break;
7881 
7882     case start_memory:
7883       reg_no = *p1;
7884       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7885       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7886 
7887       /* Have to set this here in case we're checking a group which
7888          contains a group and a back reference to it.  */
7889 
7890       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7891         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7892 
7893       if (!ret)
7894         return false;
7895       break;
7896 
7897     /* If this is an optimized succeed_n for zero times, make the jump.  */
7898     case jump:
7899       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7900       if (mcnt >= 0)
7901         p1 += mcnt;
7902       else
7903         return false;
7904       break;
7905 
7906     case succeed_n:
7907       /* Get to the number of times to succeed.  */
7908       p1 += OFFSET_ADDRESS_SIZE;
7909       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7910 
7911       if (mcnt == 0)
7912         {
7913           p1 -= 2 * OFFSET_ADDRESS_SIZE;
7914           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7915           p1 += mcnt;
7916         }
7917       else
7918         return false;
7919       break;
7920 
7921     case duplicate:
7922       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7923         return false;
7924       break;
7925 
7926     case set_number_at:
7927       p1 += 2 * OFFSET_ADDRESS_SIZE;
7928 
7929     default:
7930       /* All other opcodes mean we cannot match the empty string.  */
7931       return false;
7932   }
7933 
7934   *p = p1;
7935   return true;
7936 } /* common_op_match_null_string_p */
7937 
7938 
7939 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7940    bytes; nonzero otherwise.  */
7941 
7942 static int
7943 PREFIX(bcmp_translate) (s1, s2, len, translate)
7944      const CHAR_T *s1, *s2;
7945      register int len;
7946      RE_TRANSLATE_TYPE translate;
7947 {
7948   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7949   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7950   while (len)
7951     {
7952 #ifdef WCHAR
7953       if (((*p1<=0xff)?translate[*p1++]:*p1++)
7954 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7955 	return 1;
7956 #else /* BYTE */
7957       if (translate[*p1++] != translate[*p2++]) return 1;
7958 #endif /* WCHAR */
7959       len--;
7960     }
7961   return 0;
7962 }
7963 
7964 
7965 #else /* not INSIDE_RECURSION */
7966 
7967 /* Entry points for GNU code.  */
7968 
7969 /* re_compile_pattern is the GNU regular expression compiler: it
7970    compiles PATTERN (of length SIZE) and puts the result in BUFP.
7971    Returns 0 if the pattern was valid, otherwise an error string.
7972 
7973    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7974    are set in BUFP on entry.
7975 
7976    We call regex_compile to do the actual compilation.  */
7977 
7978 const char *
7979 re_compile_pattern (pattern, length, bufp)
7980      const char *pattern;
7981      size_t length;
7982      struct re_pattern_buffer *bufp;
7983 {
7984   reg_errcode_t ret;
7985 
7986   /* GNU code is written to assume at least RE_NREGS registers will be set
7987      (and at least one extra will be -1).  */
7988   bufp->regs_allocated = REGS_UNALLOCATED;
7989 
7990   /* And GNU code determines whether or not to get register information
7991      by passing null for the REGS argument to re_match, etc., not by
7992      setting no_sub.  */
7993   bufp->no_sub = 0;
7994 
7995   /* Match anchors at newline.  */
7996   bufp->newline_anchor = 1;
7997 
7998 # ifdef MBS_SUPPORT
7999   if (MB_CUR_MAX != 1)
8000     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
8001   else
8002 # endif
8003     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
8004 
8005   if (!ret)
8006     return NULL;
8007   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8008 }
8009 #ifdef _LIBC
8010 weak_alias (__re_compile_pattern, re_compile_pattern)
8011 #endif
8012 
8013 /* Entry points compatible with 4.2 BSD regex library.  We don't define
8014    them unless specifically requested.  */
8015 
8016 #if defined _REGEX_RE_COMP || defined _LIBC
8017 
8018 /* BSD has one and only one pattern buffer.  */
8019 static struct re_pattern_buffer re_comp_buf;
8020 
8021 char *
8022 #ifdef _LIBC
8023 /* Make these definitions weak in libc, so POSIX programs can redefine
8024    these names if they don't use our functions, and still use
8025    regcomp/regexec below without link errors.  */
8026 weak_function
8027 #endif
8028 re_comp (s)
8029     const char *s;
8030 {
8031   reg_errcode_t ret;
8032 
8033   if (!s)
8034     {
8035       if (!re_comp_buf.buffer)
8036 	return gettext ("No previous regular expression");
8037       return 0;
8038     }
8039 
8040   if (!re_comp_buf.buffer)
8041     {
8042       re_comp_buf.buffer = (unsigned char *) malloc (200);
8043       if (re_comp_buf.buffer == NULL)
8044         return (char *) gettext (re_error_msgid
8045 				 + re_error_msgid_idx[(int) REG_ESPACE]);
8046       re_comp_buf.allocated = 200;
8047 
8048       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
8049       if (re_comp_buf.fastmap == NULL)
8050 	return (char *) gettext (re_error_msgid
8051 				 + re_error_msgid_idx[(int) REG_ESPACE]);
8052     }
8053 
8054   /* Since `re_exec' always passes NULL for the `regs' argument, we
8055      don't need to initialize the pattern buffer fields which affect it.  */
8056 
8057   /* Match anchors at newlines.  */
8058   re_comp_buf.newline_anchor = 1;
8059 
8060 # ifdef MBS_SUPPORT
8061   if (MB_CUR_MAX != 1)
8062     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8063   else
8064 # endif
8065     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8066 
8067   if (!ret)
8068     return NULL;
8069 
8070   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
8071   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8072 }
8073 
8074 
8075 int
8076 #ifdef _LIBC
8077 weak_function
8078 #endif
8079 re_exec (s)
8080     const char *s;
8081 {
8082   const int len = strlen (s);
8083   return
8084     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
8085 }
8086 
8087 #endif /* _REGEX_RE_COMP */
8088 
8089 /* POSIX.2 functions.  Don't define these for Emacs.  */
8090 
8091 #ifndef emacs
8092 
8093 /* regcomp takes a regular expression as a string and compiles it.
8094 
8095    PREG is a regex_t *.  We do not expect any fields to be initialized,
8096    since POSIX says we shouldn't.  Thus, we set
8097 
8098      `buffer' to the compiled pattern;
8099      `used' to the length of the compiled pattern;
8100      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
8101        REG_EXTENDED bit in CFLAGS is set; otherwise, to
8102        RE_SYNTAX_POSIX_BASIC;
8103      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
8104      `fastmap' to an allocated space for the fastmap;
8105      `fastmap_accurate' to zero;
8106      `re_nsub' to the number of subexpressions in PATTERN.
8107 
8108    PATTERN is the address of the pattern string.
8109 
8110    CFLAGS is a series of bits which affect compilation.
8111 
8112      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
8113      use POSIX basic syntax.
8114 
8115      If REG_NEWLINE is set, then . and [^...] don't match newline.
8116      Also, regexec will try a match beginning after every newline.
8117 
8118      If REG_ICASE is set, then we considers upper- and lowercase
8119      versions of letters to be equivalent when matching.
8120 
8121      If REG_NOSUB is set, then when PREG is passed to regexec, that
8122      routine will report only success or failure, and nothing about the
8123      registers.
8124 
8125    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
8126    the return codes and their meanings.)  */
8127 
8128 int
8129 regcomp (preg, pattern, cflags)
8130     regex_t *preg;
8131     const char *pattern;
8132     int cflags;
8133 {
8134   reg_errcode_t ret;
8135   reg_syntax_t syntax
8136     = (cflags & REG_EXTENDED) ?
8137       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
8138 
8139   /* regex_compile will allocate the space for the compiled pattern.  */
8140   preg->buffer = 0;
8141   preg->allocated = 0;
8142   preg->used = 0;
8143 
8144   /* Try to allocate space for the fastmap.  */
8145   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
8146 
8147   if (cflags & REG_ICASE)
8148     {
8149       unsigned i;
8150 
8151       preg->translate
8152 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
8153 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
8154       if (preg->translate == NULL)
8155         return (int) REG_ESPACE;
8156 
8157       /* Map uppercase characters to corresponding lowercase ones.  */
8158       for (i = 0; i < CHAR_SET_SIZE; i++)
8159         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
8160     }
8161   else
8162     preg->translate = NULL;
8163 
8164   /* If REG_NEWLINE is set, newlines are treated differently.  */
8165   if (cflags & REG_NEWLINE)
8166     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
8167       syntax &= ~RE_DOT_NEWLINE;
8168       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
8169       /* It also changes the matching behavior.  */
8170       preg->newline_anchor = 1;
8171     }
8172   else
8173     preg->newline_anchor = 0;
8174 
8175   preg->no_sub = !!(cflags & REG_NOSUB);
8176 
8177   /* POSIX says a null character in the pattern terminates it, so we
8178      can use strlen here in compiling the pattern.  */
8179 # ifdef MBS_SUPPORT
8180   if (MB_CUR_MAX != 1)
8181     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
8182   else
8183 # endif
8184     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
8185 
8186   /* POSIX doesn't distinguish between an unmatched open-group and an
8187      unmatched close-group: both are REG_EPAREN.  */
8188   if (ret == REG_ERPAREN) ret = REG_EPAREN;
8189 
8190   if (ret == REG_NOERROR && preg->fastmap)
8191     {
8192       /* Compute the fastmap now, since regexec cannot modify the pattern
8193 	 buffer.  */
8194       if (re_compile_fastmap (preg) == -2)
8195 	{
8196 	  /* Some error occurred while computing the fastmap, just forget
8197 	     about it.  */
8198 	  free (preg->fastmap);
8199 	  preg->fastmap = NULL;
8200 	}
8201     }
8202 
8203   return (int) ret;
8204 }
8205 #ifdef _LIBC
8206 weak_alias (__regcomp, regcomp)
8207 #endif
8208 
8209 
8210 /* regexec searches for a given pattern, specified by PREG, in the
8211    string STRING.
8212 
8213    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8214    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
8215    least NMATCH elements, and we set them to the offsets of the
8216    corresponding matched substrings.
8217 
8218    EFLAGS specifies `execution flags' which affect matching: if
8219    REG_NOTBOL is set, then ^ does not match at the beginning of the
8220    string; if REG_NOTEOL is set, then $ does not match at the end.
8221 
8222    We return 0 if we find a match and REG_NOMATCH if not.  */
8223 
8224 int
8225 regexec (preg, string, nmatch, pmatch, eflags)
8226     const regex_t *preg;
8227     const char *string;
8228     size_t nmatch;
8229     regmatch_t pmatch[];
8230     int eflags;
8231 {
8232   int ret;
8233   struct re_registers regs;
8234   regex_t private_preg;
8235   int len = strlen (string);
8236   boolean want_reg_info = !preg->no_sub && nmatch > 0;
8237 
8238   private_preg = *preg;
8239 
8240   private_preg.not_bol = !!(eflags & REG_NOTBOL);
8241   private_preg.not_eol = !!(eflags & REG_NOTEOL);
8242 
8243   /* The user has told us exactly how many registers to return
8244      information about, via `nmatch'.  We have to pass that on to the
8245      matching routines.  */
8246   private_preg.regs_allocated = REGS_FIXED;
8247 
8248   if (want_reg_info)
8249     {
8250       regs.num_regs = nmatch;
8251       regs.start = TALLOC (nmatch * 2, regoff_t);
8252       if (regs.start == NULL)
8253         return (int) REG_NOMATCH;
8254       regs.end = regs.start + nmatch;
8255     }
8256 
8257   /* Perform the searching operation.  */
8258   ret = re_search (&private_preg, string, len,
8259                    /* start: */ 0, /* range: */ len,
8260                    want_reg_info ? &regs : (struct re_registers *) 0);
8261 
8262   /* Copy the register information to the POSIX structure.  */
8263   if (want_reg_info)
8264     {
8265       if (ret >= 0)
8266         {
8267           unsigned r;
8268 
8269           for (r = 0; r < nmatch; r++)
8270             {
8271               pmatch[r].rm_so = regs.start[r];
8272               pmatch[r].rm_eo = regs.end[r];
8273             }
8274         }
8275 
8276       /* If we needed the temporary register info, free the space now.  */
8277       free (regs.start);
8278     }
8279 
8280   /* We want zero return to mean success, unlike `re_search'.  */
8281   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8282 }
8283 #ifdef _LIBC
8284 weak_alias (__regexec, regexec)
8285 #endif
8286 
8287 
8288 /* Returns a message corresponding to an error code, ERRCODE, returned
8289    from either regcomp or regexec.   We don't use PREG here.  */
8290 
8291 size_t
8292 regerror (errcode, preg, errbuf, errbuf_size)
8293     int errcode;
8294     const regex_t *preg;
8295     char *errbuf;
8296     size_t errbuf_size;
8297 {
8298   const char *msg;
8299   size_t msg_size;
8300 
8301   if (errcode < 0
8302       || errcode >= (int) (sizeof (re_error_msgid_idx)
8303 			   / sizeof (re_error_msgid_idx[0])))
8304     /* Only error codes returned by the rest of the code should be passed
8305        to this routine.  If we are given anything else, or if other regex
8306        code generates an invalid error code, then the program has a bug.
8307        Dump core so we can fix it.  */
8308     abort ();
8309 
8310   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
8311 
8312   msg_size = strlen (msg) + 1; /* Includes the null.  */
8313 
8314   if (errbuf_size != 0)
8315     {
8316       if (msg_size > errbuf_size)
8317         {
8318 #if defined HAVE_MEMPCPY || defined _LIBC
8319 	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8320 #else
8321           memcpy (errbuf, msg, errbuf_size - 1);
8322           errbuf[errbuf_size - 1] = 0;
8323 #endif
8324         }
8325       else
8326         memcpy (errbuf, msg, msg_size);
8327     }
8328 
8329   return msg_size;
8330 }
8331 #ifdef _LIBC
8332 weak_alias (__regerror, regerror)
8333 #endif
8334 
8335 
8336 /* Free dynamically allocated space used by PREG.  */
8337 
8338 void
8339 regfree (preg)
8340     regex_t *preg;
8341 {
8342   if (preg->buffer != NULL)
8343     free (preg->buffer);
8344   preg->buffer = NULL;
8345 
8346   preg->allocated = 0;
8347   preg->used = 0;
8348 
8349   if (preg->fastmap != NULL)
8350     free (preg->fastmap);
8351   preg->fastmap = NULL;
8352   preg->fastmap_accurate = 0;
8353 
8354   if (preg->translate != NULL)
8355     free (preg->translate);
8356   preg->translate = NULL;
8357 }
8358 #ifdef _LIBC
8359 weak_alias (__regfree, regfree)
8360 #endif
8361 
8362 #endif /* not emacs  */
8363 
8364 #endif /* not INSIDE_RECURSION */
8365 
8366 
8367 #undef STORE_NUMBER
8368 #undef STORE_NUMBER_AND_INCR
8369 #undef EXTRACT_NUMBER
8370 #undef EXTRACT_NUMBER_AND_INCR
8371 
8372 #undef DEBUG_PRINT_COMPILED_PATTERN
8373 #undef DEBUG_PRINT_DOUBLE_STRING
8374 
8375 #undef INIT_FAIL_STACK
8376 #undef RESET_FAIL_STACK
8377 #undef DOUBLE_FAIL_STACK
8378 #undef PUSH_PATTERN_OP
8379 #undef PUSH_FAILURE_POINTER
8380 #undef PUSH_FAILURE_INT
8381 #undef PUSH_FAILURE_ELT
8382 #undef POP_FAILURE_POINTER
8383 #undef POP_FAILURE_INT
8384 #undef POP_FAILURE_ELT
8385 #undef DEBUG_PUSH
8386 #undef DEBUG_POP
8387 #undef PUSH_FAILURE_POINT
8388 #undef POP_FAILURE_POINT
8389 
8390 #undef REG_UNSET_VALUE
8391 #undef REG_UNSET
8392 
8393 #undef PATFETCH
8394 #undef PATFETCH_RAW
8395 #undef PATUNFETCH
8396 #undef TRANSLATE
8397 
8398 #undef INIT_BUF_SIZE
8399 #undef GET_BUFFER_SPACE
8400 #undef BUF_PUSH
8401 #undef BUF_PUSH_2
8402 #undef BUF_PUSH_3
8403 #undef STORE_JUMP
8404 #undef STORE_JUMP2
8405 #undef INSERT_JUMP
8406 #undef INSERT_JUMP2
8407 #undef EXTEND_BUFFER
8408 #undef GET_UNSIGNED_NUMBER
8409 #undef FREE_STACK_RETURN
8410 
8411 # undef POINTER_TO_OFFSET
8412 # undef MATCHING_IN_FRST_STRING
8413 # undef PREFETCH
8414 # undef AT_STRINGS_BEG
8415 # undef AT_STRINGS_END
8416 # undef WORDCHAR_P
8417 # undef FREE_VAR
8418 # undef FREE_VARIABLES
8419 # undef NO_HIGHEST_ACTIVE_REG
8420 # undef NO_LOWEST_ACTIVE_REG
8421 
8422 # undef CHAR_T
8423 # undef UCHAR_T
8424 # undef COMPILED_BUFFER_VAR
8425 # undef OFFSET_ADDRESS_SIZE
8426 # undef CHAR_CLASS_SIZE
8427 # undef PREFIX
8428 # undef ARG_PREFIX
8429 # undef PUT_CHAR
8430 # undef BYTE
8431 # undef WCHAR
8432 
8433 # define DEFINED_ONCE
8434