xref: /netbsd-src/external/gpl2/gettext/dist/gettext-tools/libgrep/regex.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P1003.2/D11.2, except for some of the
4    internationalization features.)
5 
6    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
7    2002, 2003, 2004, 2006 Free Software Foundation, Inc.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2, or (at your option)
12    any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License along
20    with this program; if not, write to the Free Software Foundation,
21    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
22 
23 /* AIX requires this to be the first thing in the file. */
24 #if defined _AIX && !defined REGEX_MALLOC
25   #pragma alloca
26 #endif
27 
28 #undef	_GNU_SOURCE
29 #define _GNU_SOURCE
30 
31 #ifdef HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34 
35 #ifndef INSIDE_RECURSION
36 
37 # include <stddef.h>
38 
39 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
40 
41 /* For platform which support the ISO C amendement 1 functionality we
42    support user defined character classes.  */
43 # if defined _LIBC || WIDE_CHAR_SUPPORT
44 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
45 #  include <wchar.h>
46 #  include <wctype.h>
47 # endif
48 
49 # ifdef _LIBC
50 /* We have to keep the namespace clean.  */
51 #  define regfree(preg) __regfree (preg)
52 #  define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
53 #  define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
54 #  define regerror(errcode, preg, errbuf, errbuf_size) \
55 	__regerror(errcode, preg, errbuf, errbuf_size)
56 #  define re_set_registers(bu, re, nu, st, en) \
57 	__re_set_registers (bu, re, nu, st, en)
58 #  define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
59 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
60 #  define re_match(bufp, string, size, pos, regs) \
61 	__re_match (bufp, string, size, pos, regs)
62 #  define re_search(bufp, string, size, startpos, range, regs) \
63 	__re_search (bufp, string, size, startpos, range, regs)
64 #  define re_compile_pattern(pattern, length, bufp) \
65 	__re_compile_pattern (pattern, length, bufp)
66 #  define re_set_syntax(syntax) __re_set_syntax (syntax)
67 #  define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
68 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
69 #  define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
70 
71 #  define btowc __btowc
72 #  define iswctype __iswctype
73 #  define mbrtowc __mbrtowc
74 #  define wcslen __wcslen
75 #  define wcscoll __wcscoll
76 #  define wcrtomb __wcrtomb
77 #  define mempcpy __mempcpy
78 
79 /* We are also using some library internals.  */
80 #  include <locale/localeinfo.h>
81 #  include <locale/elem-hash.h>
82 #  include <langinfo.h>
83 #  include <locale/coll-lookup.h>
84 # endif
85 
86 # ifdef _LIBC
87 #  include <libintl.h>
88 #  undef gettext
89 #  define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
90    /* This define is so xgettext can find the internationalizable strings.  */
91 #  define gettext_noop(msgid) msgid
92 # else
93 /* This is for other GNU distributions with internationalized messages.  */
94 #  include "gettext.h"
95 # endif
96 
97 /* Support for bounded pointers.  */
98 # if !defined _LIBC && !defined __BOUNDED_POINTERS__
99 #  define __bounded	/* nothing */
100 #  define __unbounded	/* nothing */
101 #  define __ptrvalue	/* nothing */
102 # endif
103 
104 /* The `emacs' switch turns on certain matching commands
105    that make sense only in Emacs. */
106 # ifdef emacs
107 
108 #  include "lisp.h"
109 #  include "buffer.h"
110 #  include "syntax.h"
111 
112 # else  /* not emacs */
113 
114 /* If we are not linking with Emacs proper,
115    we can't use the relocating allocator
116    even if config.h says that we can.  */
117 #  undef REL_ALLOC
118 
119 #  include <stdlib.h>
120 
121 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
122    If nothing else has been done, use the method below.  */
123 #  ifdef INHIBIT_STRING_HEADER
124 #   if !(defined HAVE_BZERO && defined HAVE_BCOPY)
125 #    if !defined bzero && !defined bcopy
126 #     undef INHIBIT_STRING_HEADER
127 #    endif
128 #   endif
129 #  endif
130 
131 /* This is the normal way of making sure we have a bcopy and a bzero.
132    This is used in most programs--a few other programs avoid this
133    by defining INHIBIT_STRING_HEADER.  */
134 #  ifndef INHIBIT_STRING_HEADER
135 #   include <string.h>
136 #   ifndef bzero
137 #    ifndef _LIBC
138 #     define bzero(s, n)	(memset (s, '\0', n), (s))
139 #    else
140 #     define bzero(s, n)	__bzero (s, n)
141 #    endif
142 #   endif
143 #  endif
144 
145 /* Define the syntax stuff for \<, \>, etc.  */
146 
147 /* This must be nonzero for the wordchar and notwordchar pattern
148    commands in re_match_2.  */
149 #  ifndef Sword
150 #   define Sword 1
151 #  endif
152 
153 #  ifdef SWITCH_ENUM_BUG
154 #   define SWITCH_ENUM_CAST(x) ((int)(x))
155 #  else
156 #   define SWITCH_ENUM_CAST(x) (x)
157 #  endif
158 
159 # endif /* not emacs */
160 
161 # include <limits.h>
162 
163 # ifndef MB_LEN_MAX
164 #  define MB_LEN_MAX 1
165 # endif
166 
167 /* Get the interface, including the syntax bits.  */
168 # include <regex.h>
169 
170 /* isalpha etc. are used for the character classes.  */
171 # include <ctype.h>
172 
173 /* Jim Meyering writes:
174 
175    "... Some ctype macros are valid only for character codes that
176    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
177    using /bin/cc or gcc but without giving an ansi option).  So, all
178    ctype uses should be through macros like ISPRINT...  If
179    STDC_HEADERS is defined, then autoconf has verified that the ctype
180    macros don't need to be guarded with references to isascii. ...
181    Defining isascii to 1 should let any compiler worth its salt
182    eliminate the && through constant folding."
183    Solaris defines some of these symbols so we must undefine them first.  */
184 
185 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
186 #  define IN_CTYPE_DOMAIN(c) 1
187 # else
188 #  define IN_CTYPE_DOMAIN(c) isascii(c)
189 # endif
190 
191 # ifdef isblank
192 #  define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
193 # else
194 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
195 # endif
196 # ifdef isgraph
197 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
198 # else
199 #  define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
200 # endif
201 
202 # undef ISPRINT
203 # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
204 # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
205 # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
206 # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
207 # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
208 # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
209 # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
210 # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
211 # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
212 # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
213 
214 # ifdef _tolower
215 #  define TOLOWER(c) _tolower(c)
216 # else
217 #  define TOLOWER(c) tolower(c)
218 # endif
219 
220 # ifndef emacs
221 /* How many characters in the character set.  */
222 #  define CHAR_SET_SIZE 256
223 
224 #  ifdef SYNTAX_TABLE
225 
226 extern char *re_syntax_table;
227 
228 #  else /* not SYNTAX_TABLE */
229 
230 static char re_syntax_table[CHAR_SET_SIZE];
231 
232 static void
init_syntax_once(void)233 init_syntax_once (void)
234 {
235    register int c;
236    static int done = 0;
237 
238    if (done)
239      return;
240    bzero (re_syntax_table, sizeof re_syntax_table);
241 
242    for (c = 0; c < CHAR_SET_SIZE; ++c)
243      if (ISALNUM (c))
244 	re_syntax_table[c] = Sword;
245 
246    re_syntax_table['_'] = Sword;
247 
248    done = 1;
249 }
250 
251 #  endif /* not SYNTAX_TABLE */
252 
253 #  define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
254 
255 # endif /* emacs */
256 
257 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
258    use `alloca' instead of `malloc'.  This is because using malloc in
259    re_search* or re_match* could cause memory leaks when C-g is used in
260    Emacs; also, malloc is slower and causes storage fragmentation.  On
261    the other hand, malloc is more portable, and easier to debug.
262 
263    Because we sometimes use alloca, some routines have to be macros,
264    not functions -- `alloca'-allocated space disappears at the end of the
265    function it is called in.  */
266 
267 # ifdef REGEX_MALLOC
268 
269 #  define REGEX_ALLOCATE malloc
270 #  define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
271 #  define REGEX_FREE free
272 
273 # else /* not REGEX_MALLOC  */
274 
275 /* Emacs already defines alloca, sometimes.  */
276 #  ifndef alloca
277 
278 /* Make alloca work the best possible way.  */
279 #   include <alloca.h>
280 
281 #  endif /* not alloca */
282 
283 #  define REGEX_ALLOCATE alloca
284 
285 /* Assumes a `char *destination' variable.  */
286 #  define REGEX_REALLOCATE(source, osize, nsize)			\
287   (destination = (char *) alloca (nsize),				\
288    memcpy (destination, source, osize))
289 
290 /* No need to do anything to free, after alloca.  */
291 #  define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
292 
293 # endif /* not REGEX_MALLOC */
294 
295 /* Define how to allocate the failure stack.  */
296 
297 # if defined REL_ALLOC && defined REGEX_MALLOC
298 
299 #  define REGEX_ALLOCATE_STACK(size)				\
300   r_alloc (&failure_stack_ptr, (size))
301 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
302   r_re_alloc (&failure_stack_ptr, (nsize))
303 #  define REGEX_FREE_STACK(ptr)					\
304   r_alloc_free (&failure_stack_ptr)
305 
306 # else /* not using relocating allocator */
307 
308 #  ifdef REGEX_MALLOC
309 
310 #   define REGEX_ALLOCATE_STACK malloc
311 #   define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
312 #   define REGEX_FREE_STACK free
313 
314 #  else /* not REGEX_MALLOC */
315 
316 #   define REGEX_ALLOCATE_STACK alloca
317 
318 #   define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
319    REGEX_REALLOCATE (source, osize, nsize)
320 /* No need to explicitly free anything.  */
321 #   define REGEX_FREE_STACK(arg)
322 
323 #  endif /* not REGEX_MALLOC */
324 # endif /* not using relocating allocator */
325 
326 
327 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
328    `string1' or just past its end.  This works if PTR is NULL, which is
329    a good thing.  */
330 # define FIRST_STRING_P(ptr) 					\
331   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
332 
333 /* (Re)Allocate N items of type T using malloc, or fail.  */
334 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
335 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
336 # define RETALLOC_IF(addr, n, t) \
337   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
338 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
339 
340 # define BYTEWIDTH 8 /* In bits.  */
341 
342 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
343 
344 # undef MAX
345 # undef MIN
346 # define MAX(a, b) ((a) > (b) ? (a) : (b))
347 # define MIN(a, b) ((a) < (b) ? (a) : (b))
348 
349 typedef char boolean;
350 # define false 0
351 # define true 1
352 
353 static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
354 					 reg_syntax_t syntax,
355 					 struct re_pattern_buffer *bufp);
356 
357 static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
358 				     const char *string1, int size1,
359 				     const char *string2, int size2,
360 				     int pos,
361 				     struct re_registers *regs,
362 				     int stop);
363 static int byte_re_search_2 (struct re_pattern_buffer *bufp,
364 			     const char *string1, int size1,
365 			     const char *string2, int size2,
366 			     int startpos, int range,
367 			     struct re_registers *regs, int stop);
368 static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
369 
370 #ifdef MBS_SUPPORT
371 static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
372 					reg_syntax_t syntax,
373 					struct re_pattern_buffer *bufp);
374 
375 
376 static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
377 				    const char *cstring1, int csize1,
378 				    const char *cstring2, int csize2,
379 				    int pos,
380 				    struct re_registers *regs,
381 				    int stop,
382 				    wchar_t *string1, int size1,
383 				    wchar_t *string2, int size2,
384 				    int *mbs_offset1, int *mbs_offset2);
385 static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
386 			    const char *string1, int size1,
387 			    const char *string2, int size2,
388 			    int startpos, int range,
389 			    struct re_registers *regs, int stop);
390 static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
391 #endif
392 
393 /* These are the command codes that appear in compiled regular
394    expressions.  Some opcodes are followed by argument bytes.  A
395    command code can specify any interpretation whatsoever for its
396    arguments.  Zero bytes may appear in the compiled regular expression.  */
397 
398 typedef enum
399 {
400   no_op = 0,
401 
402   /* Succeed right away--no more backtracking.  */
403   succeed,
404 
405         /* Followed by one byte giving n, then by n literal bytes.  */
406   exactn,
407 
408 # ifdef MBS_SUPPORT
409 	/* Same as exactn, but contains binary data.  */
410   exactn_bin,
411 # endif
412 
413         /* Matches any (more or less) character.  */
414   anychar,
415 
416         /* Matches any one char belonging to specified set.  First
417            following byte is number of bitmap bytes.  Then come bytes
418            for a bitmap saying which chars are in.  Bits in each byte
419            are ordered low-bit-first.  A character is in the set if its
420            bit is 1.  A character too large to have a bit in the map is
421            automatically not in the set.  */
422         /* ifdef MBS_SUPPORT, following element is length of character
423 	   classes, length of collating symbols, length of equivalence
424 	   classes, length of character ranges, and length of characters.
425 	   Next, character class element, collating symbols elements,
426 	   equivalence class elements, range elements, and character
427 	   elements follow.
428 	   See regex_compile function.  */
429   charset,
430 
431         /* Same parameters as charset, but match any character that is
432            not one of those specified.  */
433   charset_not,
434 
435         /* Start remembering the text that is matched, for storing in a
436            register.  Followed by one byte with the register number, in
437            the range 0 to one less than the pattern buffer's re_nsub
438            field.  Then followed by one byte with the number of groups
439            inner to this one.  (This last has to be part of the
440            start_memory only because we need it in the on_failure_jump
441            of re_match_2.)  */
442   start_memory,
443 
444         /* Stop remembering the text that is matched and store it in a
445            memory register.  Followed by one byte with the register
446            number, in the range 0 to one less than `re_nsub' in the
447            pattern buffer, and one byte with the number of inner groups,
448            just like `start_memory'.  (We need the number of inner
449            groups here because we don't have any easy way of finding the
450            corresponding start_memory when we're at a stop_memory.)  */
451   stop_memory,
452 
453         /* Match a duplicate of something remembered. Followed by one
454            byte containing the register number.  */
455   duplicate,
456 
457         /* Fail unless at beginning of line.  */
458   begline,
459 
460         /* Fail unless at end of line.  */
461   endline,
462 
463         /* Succeeds if at beginning of buffer (if emacs) or at beginning
464            of string to be matched (if not).  */
465   begbuf,
466 
467         /* Analogously, for end of buffer/string.  */
468   endbuf,
469 
470         /* Followed by two byte relative address to which to jump.  */
471   jump,
472 
473 	/* Same as jump, but marks the end of an alternative.  */
474   jump_past_alt,
475 
476         /* Followed by two-byte relative address of place to resume at
477            in case of failure.  */
478         /* ifdef MBS_SUPPORT, the size of address is 1.  */
479   on_failure_jump,
480 
481         /* Like on_failure_jump, but pushes a placeholder instead of the
482            current string position when executed.  */
483   on_failure_keep_string_jump,
484 
485         /* Throw away latest failure point and then jump to following
486            two-byte relative address.  */
487         /* ifdef MBS_SUPPORT, the size of address is 1.  */
488   pop_failure_jump,
489 
490         /* Change to pop_failure_jump if know won't have to backtrack to
491            match; otherwise change to jump.  This is used to jump
492            back to the beginning of a repeat.  If what follows this jump
493            clearly won't match what the repeat does, such that we can be
494            sure that there is no use backtracking out of repetitions
495            already matched, then we change it to a pop_failure_jump.
496            Followed by two-byte address.  */
497         /* ifdef MBS_SUPPORT, the size of address is 1.  */
498   maybe_pop_jump,
499 
500         /* Jump to following two-byte address, and push a dummy failure
501            point. This failure point will be thrown away if an attempt
502            is made to use it for a failure.  A `+' construct makes this
503            before the first repeat.  Also used as an intermediary kind
504            of jump when compiling an alternative.  */
505         /* ifdef MBS_SUPPORT, the size of address is 1.  */
506   dummy_failure_jump,
507 
508 	/* Push a dummy failure point and continue.  Used at the end of
509 	   alternatives.  */
510   push_dummy_failure,
511 
512         /* Followed by two-byte relative address and two-byte number n.
513            After matching N times, jump to the address upon failure.  */
514         /* ifdef MBS_SUPPORT, the size of address is 1.  */
515   succeed_n,
516 
517         /* Followed by two-byte relative address, and two-byte number n.
518            Jump to the address N times, then fail.  */
519         /* ifdef MBS_SUPPORT, the size of address is 1.  */
520   jump_n,
521 
522         /* Set the following two-byte relative address to the
523            subsequent two-byte number.  The address *includes* the two
524            bytes of number.  */
525         /* ifdef MBS_SUPPORT, the size of address is 1.  */
526   set_number_at,
527 
528   wordchar,	/* Matches any word-constituent character.  */
529   notwordchar,	/* Matches any char that is not a word-constituent.  */
530 
531   wordbeg,	/* Succeeds if at word beginning.  */
532   wordend,	/* Succeeds if at word end.  */
533 
534   wordbound,	/* Succeeds if at a word boundary.  */
535   notwordbound	/* Succeeds if not at a word boundary.  */
536 
537 # ifdef emacs
538   ,before_dot,	/* Succeeds if before point.  */
539   at_dot,	/* Succeeds if at point.  */
540   after_dot,	/* Succeeds if after point.  */
541 
542 	/* Matches any character whose syntax is specified.  Followed by
543            a byte which contains a syntax code, e.g., Sword.  */
544   syntaxspec,
545 
546 	/* Matches any character whose syntax is not that specified.  */
547   notsyntaxspec
548 # endif /* emacs */
549 } re_opcode_t;
550 #endif /* not INSIDE_RECURSION */
551 
552 
553 #ifdef BYTE
554 # define CHAR_T char
555 # define UCHAR_T unsigned char
556 # define COMPILED_BUFFER_VAR bufp->buffer
557 # define OFFSET_ADDRESS_SIZE 2
558 # define PREFIX(name) byte_##name
559 # define ARG_PREFIX(name) name
560 # define PUT_CHAR(c) putchar (c)
561 #else
562 # ifdef WCHAR
563 #  define CHAR_T wchar_t
564 #  define UCHAR_T wchar_t
565 #  define COMPILED_BUFFER_VAR wc_buffer
566 #  define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
567 #  define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
568 #  define PREFIX(name) wcs_##name
569 #  define ARG_PREFIX(name) c##name
570 /* Should we use wide stream??  */
571 #  define PUT_CHAR(c) printf ("%C", c);
572 #  define TRUE 1
573 #  define FALSE 0
574 # else
575 #  ifdef MBS_SUPPORT
576 #   define WCHAR
577 #   define INSIDE_RECURSION
578 #   include "regex.c"
579 #   undef INSIDE_RECURSION
580 #  endif
581 #  define BYTE
582 #  define INSIDE_RECURSION
583 #  include "regex.c"
584 #  undef INSIDE_RECURSION
585 # endif
586 #endif
587 
588 #if USE_UNLOCKED_IO
589 # include "unlocked-io.h"
590 #endif
591 
592 #ifdef INSIDE_RECURSION
593 /* Common operations on the compiled pattern.  */
594 
595 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
596 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
597 
598 # ifdef WCHAR
599 #  define STORE_NUMBER(destination, number)				\
600   do {									\
601     *(destination) = (UCHAR_T)(number);				\
602   } while (0)
603 # else /* BYTE */
604 #  define STORE_NUMBER(destination, number)				\
605   do {									\
606     (destination)[0] = (number) & 0377;					\
607     (destination)[1] = (number) >> 8;					\
608   } while (0)
609 # endif /* WCHAR */
610 
611 /* Same as STORE_NUMBER, except increment DESTINATION to
612    the byte after where the number is stored.  Therefore, DESTINATION
613    must be an lvalue.  */
614 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
615 
616 # define STORE_NUMBER_AND_INCR(destination, number)			\
617   do {									\
618     STORE_NUMBER (destination, number);					\
619     (destination) += OFFSET_ADDRESS_SIZE;				\
620   } while (0)
621 
622 /* Put into DESTINATION a number stored in two contiguous bytes starting
623    at SOURCE.  */
624 /* ifdef MBS_SUPPORT, we store NUMBER in 1 element.  */
625 
626 # ifdef WCHAR
627 #  define EXTRACT_NUMBER(destination, source)				\
628   do {									\
629     (destination) = *(source);						\
630   } while (0)
631 # else /* BYTE */
632 #  define EXTRACT_NUMBER(destination, source)				\
633   do {									\
634     (destination) = *(source) & 0377;					\
635     (destination) += (signed char) (*((source) + 1)) << 8;		\
636   } while (0)
637 # endif
638 
639 # ifdef DEBUG
640 static void
PREFIX(extract_number)641 PREFIX(extract_number) (int *dest, UCHAR_T *source)
642 {
643 #  ifdef WCHAR
644   *dest = *source;
645 #  else /* BYTE */
646   signed char temp = source[1];
647   *dest = *source & 0377;
648   *dest += temp << 8;
649 #  endif
650 }
651 
652 #  ifndef EXTRACT_MACROS /* To debug the macros.  */
653 #   undef EXTRACT_NUMBER
654 #   define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
655 #  endif /* not EXTRACT_MACROS */
656 
657 # endif /* DEBUG */
658 
659 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
660    SOURCE must be an lvalue.  */
661 
662 # define EXTRACT_NUMBER_AND_INCR(destination, source)			\
663   do {									\
664     EXTRACT_NUMBER (destination, source);				\
665     (source) += OFFSET_ADDRESS_SIZE; 					\
666   } while (0)
667 
668 # ifdef DEBUG
669 static void
PREFIX(extract_number_and_incr)670 PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
671 {
672   PREFIX(extract_number) (destination, *source);
673   *source += OFFSET_ADDRESS_SIZE;
674 }
675 
676 #  ifndef EXTRACT_MACROS
677 #   undef EXTRACT_NUMBER_AND_INCR
678 #   define EXTRACT_NUMBER_AND_INCR(dest, src) \
679   PREFIX(extract_number_and_incr) (&dest, &src)
680 #  endif /* not EXTRACT_MACROS */
681 
682 # endif /* DEBUG */
683 
684 
685 
686 /* If DEBUG is defined, Regex prints many voluminous messages about what
687    it is doing (if the variable `debug' is nonzero).  If linked with the
688    main program in `iregex.c', you can enter patterns and strings
689    interactively.  And if linked with the main program in `main.c' and
690    the other test files, you can run the already-written tests.  */
691 
692 # ifdef DEBUG
693 
694 #  ifndef DEFINED_ONCE
695 
696 /* We use standard I/O for debugging.  */
697 #   include <stdio.h>
698 
699 /* It is useful to test things that ``must'' be true when debugging.  */
700 #   include <assert.h>
701 
702 static int debug;
703 
704 #   define DEBUG_STATEMENT(e) e
705 #   define DEBUG_PRINT1(x) if (debug) printf (x)
706 #   define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
707 #   define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
708 #   define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
709 #  endif /* not DEFINED_ONCE */
710 
711 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 			\
712   if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
713 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)		\
714   if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
715 
716 
717 /* Print the fastmap in human-readable form.  */
718 
719 #  ifndef DEFINED_ONCE
720 void
print_fastmap(char * fastmap)721 print_fastmap (char *fastmap)
722 {
723   unsigned was_a_range = 0;
724   unsigned i = 0;
725 
726   while (i < (1 << BYTEWIDTH))
727     {
728       if (fastmap[i++])
729 	{
730 	  was_a_range = 0;
731           putchar (i - 1);
732           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
733             {
734               was_a_range = 1;
735               i++;
736             }
737 	  if (was_a_range)
738             {
739               printf ("-");
740               putchar (i - 1);
741             }
742         }
743     }
744   putchar ('\n');
745 }
746 #  endif /* not DEFINED_ONCE */
747 
748 
749 /* Print a compiled pattern string in human-readable form, starting at
750    the START pointer into it and ending just before the pointer END.  */
751 
752 void
PREFIX(print_partial_compiled_pattern)753 PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
754 {
755   int mcnt, mcnt2;
756   UCHAR_T *p1;
757   UCHAR_T *p = start;
758   UCHAR_T *pend = end;
759 
760   if (start == NULL)
761     {
762       printf ("(null)\n");
763       return;
764     }
765 
766   /* Loop over pattern commands.  */
767   while (p < pend)
768     {
769 #  ifdef _LIBC
770       printf ("%td:\t", p - start);
771 #  else
772       printf ("%ld:\t", (long int) (p - start));
773 #  endif
774 
775       switch ((re_opcode_t) *p++)
776 	{
777         case no_op:
778           printf ("/no_op");
779           break;
780 
781 	case exactn:
782 	  mcnt = *p++;
783           printf ("/exactn/%d", mcnt);
784           do
785 	    {
786               putchar ('/');
787 	      PUT_CHAR (*p++);
788             }
789           while (--mcnt);
790           break;
791 
792 #  ifdef MBS_SUPPORT
793 	case exactn_bin:
794 	  mcnt = *p++;
795 	  printf ("/exactn_bin/%d", mcnt);
796           do
797 	    {
798 	      printf("/%lx", (long int) *p++);
799             }
800           while (--mcnt);
801           break;
802 #  endif /* MBS_SUPPORT */
803 
804 	case start_memory:
805           mcnt = *p++;
806           printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
807           break;
808 
809 	case stop_memory:
810           mcnt = *p++;
811 	  printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
812           break;
813 
814 	case duplicate:
815 	  printf ("/duplicate/%ld", (long int) *p++);
816 	  break;
817 
818 	case anychar:
819 	  printf ("/anychar");
820 	  break;
821 
822 	case charset:
823         case charset_not:
824           {
825 #  ifdef WCHAR
826 	    int i, length;
827 	    wchar_t *workp = p;
828 	    printf ("/charset [%s",
829 	            (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
830 	    p += 5;
831 	    length = *workp++; /* the length of char_classes */
832 	    for (i=0 ; i<length ; i++)
833 	      printf("[:%lx:]", (long int) *p++);
834 	    length = *workp++; /* the length of collating_symbol */
835 	    for (i=0 ; i<length ;)
836 	      {
837 		printf("[.");
838 		while(*p != 0)
839 		  PUT_CHAR((i++,*p++));
840 		i++,p++;
841 		printf(".]");
842 	      }
843 	    length = *workp++; /* the length of equivalence_class */
844 	    for (i=0 ; i<length ;)
845 	      {
846 		printf("[=");
847 		while(*p != 0)
848 		  PUT_CHAR((i++,*p++));
849 		i++,p++;
850 		printf("=]");
851 	      }
852 	    length = *workp++; /* the length of char_range */
853 	    for (i=0 ; i<length ; i++)
854 	      {
855 		wchar_t range_start = *p++;
856 		wchar_t range_end = *p++;
857 		printf("%C-%C", range_start, range_end);
858 	      }
859 	    length = *workp++; /* the length of char */
860 	    for (i=0 ; i<length ; i++)
861 	      printf("%C", *p++);
862 	    putchar (']');
863 #  else
864             register int c, last = -100;
865 	    register int in_range = 0;
866 
867 	    printf ("/charset [%s",
868 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
869 
870             assert (p + *p < pend);
871 
872             for (c = 0; c < 256; c++)
873 	      if (c / 8 < *p
874 		  && (p[1 + (c/8)] & (1 << (c % 8))))
875 		{
876 		  /* Are we starting a range?  */
877 		  if (last + 1 == c && ! in_range)
878 		    {
879 		      putchar ('-');
880 		      in_range = 1;
881 		    }
882 		  /* Have we broken a range?  */
883 		  else if (last + 1 != c && in_range)
884               {
885 		      putchar (last);
886 		      in_range = 0;
887 		    }
888 
889 		  if (! in_range)
890 		    putchar (c);
891 
892 		  last = c;
893               }
894 
895 	    if (in_range)
896 	      putchar (last);
897 
898 	    putchar (']');
899 
900 	    p += 1 + *p;
901 #  endif /* WCHAR */
902 	  }
903 	  break;
904 
905 	case begline:
906 	  printf ("/begline");
907           break;
908 
909 	case endline:
910           printf ("/endline");
911           break;
912 
913 	case on_failure_jump:
914           PREFIX(extract_number_and_incr) (&mcnt, &p);
915 #  ifdef _LIBC
916   	  printf ("/on_failure_jump to %td", p + mcnt - start);
917 #  else
918   	  printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
919 #  endif
920           break;
921 
922 	case on_failure_keep_string_jump:
923           PREFIX(extract_number_and_incr) (&mcnt, &p);
924 #  ifdef _LIBC
925   	  printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
926 #  else
927   	  printf ("/on_failure_keep_string_jump to %ld",
928 		  (long int) (p + mcnt - start));
929 #  endif
930           break;
931 
932 	case dummy_failure_jump:
933           PREFIX(extract_number_and_incr) (&mcnt, &p);
934 #  ifdef _LIBC
935   	  printf ("/dummy_failure_jump to %td", p + mcnt - start);
936 #  else
937   	  printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
938 #  endif
939           break;
940 
941 	case push_dummy_failure:
942           printf ("/push_dummy_failure");
943           break;
944 
945         case maybe_pop_jump:
946           PREFIX(extract_number_and_incr) (&mcnt, &p);
947 #  ifdef _LIBC
948   	  printf ("/maybe_pop_jump to %td", p + mcnt - start);
949 #  else
950   	  printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
951 #  endif
952 	  break;
953 
954         case pop_failure_jump:
955 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
956 #  ifdef _LIBC
957   	  printf ("/pop_failure_jump to %td", p + mcnt - start);
958 #  else
959   	  printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
960 #  endif
961 	  break;
962 
963         case jump_past_alt:
964 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
965 #  ifdef _LIBC
966   	  printf ("/jump_past_alt to %td", p + mcnt - start);
967 #  else
968   	  printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
969 #  endif
970 	  break;
971 
972         case jump:
973 	  PREFIX(extract_number_and_incr) (&mcnt, &p);
974 #  ifdef _LIBC
975   	  printf ("/jump to %td", p + mcnt - start);
976 #  else
977   	  printf ("/jump to %ld", (long int) (p + mcnt - start));
978 #  endif
979 	  break;
980 
981         case succeed_n:
982           PREFIX(extract_number_and_incr) (&mcnt, &p);
983 	  p1 = p + mcnt;
984           PREFIX(extract_number_and_incr) (&mcnt2, &p);
985 #  ifdef _LIBC
986 	  printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
987 #  else
988 	  printf ("/succeed_n to %ld, %d times",
989 		  (long int) (p1 - start), mcnt2);
990 #  endif
991           break;
992 
993         case jump_n:
994           PREFIX(extract_number_and_incr) (&mcnt, &p);
995 	  p1 = p + mcnt;
996           PREFIX(extract_number_and_incr) (&mcnt2, &p);
997 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
998           break;
999 
1000         case set_number_at:
1001           PREFIX(extract_number_and_incr) (&mcnt, &p);
1002 	  p1 = p + mcnt;
1003           PREFIX(extract_number_and_incr) (&mcnt2, &p);
1004 #  ifdef _LIBC
1005 	  printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1006 #  else
1007 	  printf ("/set_number_at location %ld to %d",
1008 		  (long int) (p1 - start), mcnt2);
1009 #  endif
1010           break;
1011 
1012         case wordbound:
1013 	  printf ("/wordbound");
1014 	  break;
1015 
1016 	case notwordbound:
1017 	  printf ("/notwordbound");
1018           break;
1019 
1020 	case wordbeg:
1021 	  printf ("/wordbeg");
1022 	  break;
1023 
1024 	case wordend:
1025 	  printf ("/wordend");
1026 	  break;
1027 
1028 #  ifdef emacs
1029 	case before_dot:
1030 	  printf ("/before_dot");
1031           break;
1032 
1033 	case at_dot:
1034 	  printf ("/at_dot");
1035           break;
1036 
1037 	case after_dot:
1038 	  printf ("/after_dot");
1039           break;
1040 
1041 	case syntaxspec:
1042           printf ("/syntaxspec");
1043 	  mcnt = *p++;
1044 	  printf ("/%d", mcnt);
1045           break;
1046 
1047 	case notsyntaxspec:
1048           printf ("/notsyntaxspec");
1049 	  mcnt = *p++;
1050 	  printf ("/%d", mcnt);
1051 	  break;
1052 #  endif /* emacs */
1053 
1054 	case wordchar:
1055 	  printf ("/wordchar");
1056           break;
1057 
1058 	case notwordchar:
1059 	  printf ("/notwordchar");
1060           break;
1061 
1062 	case begbuf:
1063 	  printf ("/begbuf");
1064           break;
1065 
1066 	case endbuf:
1067 	  printf ("/endbuf");
1068           break;
1069 
1070         default:
1071           printf ("?%ld", (long int) *(p-1));
1072 	}
1073 
1074       putchar ('\n');
1075     }
1076 
1077 #  ifdef _LIBC
1078   printf ("%td:\tend of pattern.\n", p - start);
1079 #  else
1080   printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1081 #  endif
1082 }
1083 
1084 
1085 void
PREFIX(print_compiled_pattern)1086 PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
1087 {
1088   UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1089 
1090   PREFIX(print_partial_compiled_pattern) (buffer, buffer
1091 				  + bufp->used / sizeof(UCHAR_T));
1092   printf ("%ld bytes used/%ld bytes allocated.\n",
1093 	  bufp->used, bufp->allocated);
1094 
1095   if (bufp->fastmap_accurate && bufp->fastmap)
1096     {
1097       printf ("fastmap: ");
1098       print_fastmap (bufp->fastmap);
1099     }
1100 
1101 #  ifdef _LIBC
1102   printf ("re_nsub: %Zd\t", bufp->re_nsub);
1103 #  else
1104   printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1105 #  endif
1106   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1107   printf ("can_be_null: %d\t", bufp->can_be_null);
1108   printf ("newline_anchor: %d\n", bufp->newline_anchor);
1109   printf ("no_sub: %d\t", bufp->no_sub);
1110   printf ("not_bol: %d\t", bufp->not_bol);
1111   printf ("not_eol: %d\t", bufp->not_eol);
1112   printf ("syntax: %lx\n", bufp->syntax);
1113   /* Perhaps we should print the translate table?  */
1114 }
1115 
1116 
1117 void
PREFIX(print_double_string)1118 PREFIX(print_double_string) (const CHAR_T *where,
1119 			     const CHAR_T *string1,
1120 			     const CHAR_T *string2,
1121 			     int size1,
1122 			     int size2)
1123 {
1124   int this_char;
1125 
1126   if (where == NULL)
1127     printf ("(null)");
1128   else
1129     {
1130       int cnt;
1131 
1132       if (FIRST_STRING_P (where))
1133         {
1134           for (this_char = where - string1; this_char < size1; this_char++)
1135 	    PUT_CHAR (string1[this_char]);
1136 
1137           where = string2;
1138         }
1139 
1140       cnt = 0;
1141       for (this_char = where - string2; this_char < size2; this_char++)
1142 	{
1143 	  PUT_CHAR (string2[this_char]);
1144 	  if (++cnt > 100)
1145 	    {
1146 	      fputs ("...", stdout);
1147 	      break;
1148 	    }
1149 	}
1150     }
1151 }
1152 
1153 #  ifndef DEFINED_ONCE
1154 void
printchar(c)1155 printchar (c)
1156      int c;
1157 {
1158   putc (c, stderr);
1159 }
1160 #  endif
1161 
1162 # else /* not DEBUG */
1163 
1164 #  ifndef DEFINED_ONCE
1165 #   undef assert
1166 #   define assert(e)
1167 
1168 #   define DEBUG_STATEMENT(e)
1169 #   define DEBUG_PRINT1(x)
1170 #   define DEBUG_PRINT2(x1, x2)
1171 #   define DEBUG_PRINT3(x1, x2, x3)
1172 #   define DEBUG_PRINT4(x1, x2, x3, x4)
1173 #  endif /* not DEFINED_ONCE */
1174 #  define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1175 #  define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1176 
1177 # endif /* not DEBUG */
1178 
1179 
1180 
1181 # ifdef WCHAR
1182 /* This  convert a multibyte string to a wide character string.
1183    And write their correspondances to offset_buffer(see below)
1184    and write whether each wchar_t is binary data to is_binary.
1185    This assume invalid multibyte sequences as binary data.
1186    We assume offset_buffer and is_binary is already allocated
1187    enough space.  */
1188 
1189 static size_t
convert_mbs_to_wcs(CHAR_T * dest,const unsigned char * src,size_t len,int * offset_buffer,char * is_binary)1190 convert_mbs_to_wcs (CHAR_T *dest,
1191 		    const unsigned char* src,
1192 
1193 		    /* The length of multibyte string.  */
1194 		    size_t len,
1195 
1196 		    /* Correspondences between src(char string) and
1197 		       dest(wchar_t string) for optimization.  E.g.:
1198 		       src  = "xxxyzz"
1199 		       dest = {'X', 'Y', 'Z'}
1200 			 (each "xxx", "y" and "zz" represent one
1201 			  multibyte character corresponding to 'X',
1202 			  'Y' and 'Z'.)
1203 		       offset_buffer = {0, 0+3("xxx"), 0+3+1("y"),
1204 					0+3+1+2("zz")}
1205 				     = {0, 3, 4, 6} */
1206 		    int *offset_buffer,
1207 
1208 		    char *is_binary)
1209 {
1210   wchar_t *pdest = dest;
1211   const unsigned char *psrc = src;
1212   size_t wc_count = 0;
1213 
1214   mbstate_t mbs;
1215   int i, consumed;
1216   size_t mb_remain = len;
1217   size_t mb_count = 0;
1218 
1219   /* Initialize the conversion state.  */
1220   memset (&mbs, 0, sizeof (mbstate_t));
1221 
1222   offset_buffer[0] = 0;
1223   for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1224 	 psrc += consumed)
1225     {
1226       consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1227 
1228       if (consumed <= 0)
1229 	/* failed to convert. maybe src contains binary data.
1230 	   So we consume 1 byte manualy.  */
1231 	{
1232 	  *pdest = *psrc;
1233 	  consumed = 1;
1234 	  is_binary[wc_count] = TRUE;
1235 	}
1236       else
1237 	is_binary[wc_count] = FALSE;
1238       /* In sjis encoding, we use yen sign as escape character in
1239 	 place of reverse solidus. So we convert 0x5c(yen sign in
1240 	 sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1241 	 solidus in UCS2).  */
1242       if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1243 	*pdest = (wchar_t) *psrc;
1244 
1245       offset_buffer[wc_count + 1] = mb_count += consumed;
1246     }
1247 
1248   /* Fill remain of the buffer with sentinel.  */
1249   for (i = wc_count + 1 ; i <= len ; i++)
1250     offset_buffer[i] = mb_count + 1;
1251 
1252   return wc_count;
1253 }
1254 
1255 # endif /* WCHAR */
1256 
1257 #else /* not INSIDE_RECURSION */
1258 
1259 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1260    also be assigned to arbitrarily: each pattern buffer stores its own
1261    syntax, so it can be changed between regex compilations.  */
1262 /* This has no initializer because initialized variables in Emacs
1263    become read-only after dumping.  */
1264 reg_syntax_t re_syntax_options;
1265 
1266 
1267 /* Specify the precise syntax of regexps for compilation.  This provides
1268    for compatibility for various utilities which historically have
1269    different, incompatible syntaxes.
1270 
1271    The argument SYNTAX is a bit mask comprised of the various bits
1272    defined in regex.h.  We return the old syntax.  */
1273 
1274 reg_syntax_t
re_set_syntax(reg_syntax_t syntax)1275 re_set_syntax (reg_syntax_t syntax)
1276 {
1277   reg_syntax_t ret = re_syntax_options;
1278 
1279   re_syntax_options = syntax;
1280 # ifdef DEBUG
1281   if (syntax & RE_DEBUG)
1282     debug = 1;
1283   else if (debug) /* was on but now is not */
1284     debug = 0;
1285 # endif /* DEBUG */
1286   return ret;
1287 }
1288 # ifdef _LIBC
1289 weak_alias (__re_set_syntax, re_set_syntax)
1290 # endif
1291 
1292 /* This table gives an error message for each of the error codes listed
1293    in regex.h.  Obviously the order here has to be same as there.
1294    POSIX doesn't require that we do anything for REG_NOERROR,
1295    but why not be nice?  */
1296 
1297 static const char re_error_msgid[] =
1298   {
1299 # define REG_NOERROR_IDX	0
1300     gettext_noop ("Success")	/* REG_NOERROR */
1301     "\0"
1302 # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1303     gettext_noop ("No match")	/* REG_NOMATCH */
1304     "\0"
1305 # define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
1306     gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1307     "\0"
1308 # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1309     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1310     "\0"
1311 # define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1312     gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1313     "\0"
1314 # define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
1315     gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1316     "\0"
1317 # define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
1318     gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1319     "\0"
1320 # define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
1321     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
1322     "\0"
1323 # define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1324     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1325     "\0"
1326 # define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1327     gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1328     "\0"
1329 # define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
1330     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1331     "\0"
1332 # define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1333     gettext_noop ("Invalid range end")	/* REG_ERANGE */
1334     "\0"
1335 # define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
1336     gettext_noop ("Memory exhausted") /* REG_ESPACE */
1337     "\0"
1338 # define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
1339     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1340     "\0"
1341 # define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1342     gettext_noop ("Premature end of regular expression") /* REG_EEND */
1343     "\0"
1344 # define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
1345     gettext_noop ("Regular expression too big") /* REG_ESIZE */
1346     "\0"
1347 # define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
1348     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1349   };
1350 
1351 static const size_t re_error_msgid_idx[] =
1352   {
1353     REG_NOERROR_IDX,
1354     REG_NOMATCH_IDX,
1355     REG_BADPAT_IDX,
1356     REG_ECOLLATE_IDX,
1357     REG_ECTYPE_IDX,
1358     REG_EESCAPE_IDX,
1359     REG_ESUBREG_IDX,
1360     REG_EBRACK_IDX,
1361     REG_EPAREN_IDX,
1362     REG_EBRACE_IDX,
1363     REG_BADBR_IDX,
1364     REG_ERANGE_IDX,
1365     REG_ESPACE_IDX,
1366     REG_BADRPT_IDX,
1367     REG_EEND_IDX,
1368     REG_ESIZE_IDX,
1369     REG_ERPAREN_IDX
1370   };
1371 
1372 #endif /* INSIDE_RECURSION */
1373 
1374 #ifndef DEFINED_ONCE
1375 /* Avoiding alloca during matching, to placate r_alloc.  */
1376 
1377 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1378    searching and matching functions should not call alloca.  On some
1379    systems, alloca is implemented in terms of malloc, and if we're
1380    using the relocating allocator routines, then malloc could cause a
1381    relocation, which might (if the strings being searched are in the
1382    ralloc heap) shift the data out from underneath the regexp
1383    routines.
1384 
1385    Here's another reason to avoid allocation: Emacs
1386    processes input from X in a signal handler; processing X input may
1387    call malloc; if input arrives while a matching routine is calling
1388    malloc, then we're scrod.  But Emacs can't just block input while
1389    calling matching routines; then we don't notice interrupts when
1390    they come in.  So, Emacs blocks input around all regexp calls
1391    except the matching calls, which it leaves unprotected, in the
1392    faith that they will not malloc.  */
1393 
1394 /* Normally, this is fine.  */
1395 # define MATCH_MAY_ALLOCATE
1396 
1397 /* When using GNU C, we are not REALLY using the C alloca, no matter
1398    what config.h may say.  So don't take precautions for it.  */
1399 # ifdef __GNUC__
1400 #  undef C_ALLOCA
1401 # endif
1402 
1403 /* The match routines may not allocate if (1) they would do it with malloc
1404    and (2) it's not safe for them to use malloc.
1405    Note that if REL_ALLOC is defined, matching would not use malloc for the
1406    failure stack, but we would still use it for the register vectors;
1407    so REL_ALLOC should not affect this.  */
1408 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1409 #  undef MATCH_MAY_ALLOCATE
1410 # endif
1411 #endif /* not DEFINED_ONCE */
1412 
1413 #ifdef INSIDE_RECURSION
1414 /* Failure stack declarations and macros; both re_compile_fastmap and
1415    re_match_2 use a failure stack.  These have to be macros because of
1416    REGEX_ALLOCATE_STACK.  */
1417 
1418 
1419 /* Number of failure points for which to initially allocate space
1420    when matching.  If this number is exceeded, we allocate more
1421    space, so it is not a hard limit.  */
1422 # ifndef INIT_FAILURE_ALLOC
1423 #  define INIT_FAILURE_ALLOC 5
1424 # endif
1425 
1426 /* Roughly the maximum number of failure points on the stack.  Would be
1427    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1428    This is a variable only so users of regex can assign to it; we never
1429    change it ourselves.  */
1430 
1431 # ifdef INT_IS_16BIT
1432 
1433 #  ifndef DEFINED_ONCE
1434 #   if defined MATCH_MAY_ALLOCATE
1435 /* 4400 was enough to cause a crash on Alpha OSF/1,
1436    whose default stack limit is 2mb.  */
1437 long int re_max_failures = 4000;
1438 #   else
1439 long int re_max_failures = 2000;
1440 #   endif
1441 #  endif
1442 
PREFIX(fail_stack_elt)1443 union PREFIX(fail_stack_elt)
1444 {
1445   UCHAR_T *pointer;
1446   long int integer;
1447 };
1448 
1449 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1450 
1451 typedef struct
1452 {
1453   PREFIX(fail_stack_elt_t) *stack;
1454   unsigned long int size;
1455   unsigned long int avail;		/* Offset of next open position.  */
1456 } PREFIX(fail_stack_type);
1457 
1458 # else /* not INT_IS_16BIT */
1459 
1460 #  ifndef DEFINED_ONCE
1461 #   if defined MATCH_MAY_ALLOCATE
1462 /* 4400 was enough to cause a crash on Alpha OSF/1,
1463    whose default stack limit is 2mb.  */
1464 int re_max_failures = 4000;
1465 #   else
1466 int re_max_failures = 2000;
1467 #   endif
1468 #  endif
1469 
PREFIX(fail_stack_elt)1470 union PREFIX(fail_stack_elt)
1471 {
1472   UCHAR_T *pointer;
1473   int integer;
1474 };
1475 
1476 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1477 
1478 typedef struct
1479 {
1480   PREFIX(fail_stack_elt_t) *stack;
1481   unsigned size;
1482   unsigned avail;			/* Offset of next open position.  */
1483 } PREFIX(fail_stack_type);
1484 
1485 # endif /* INT_IS_16BIT */
1486 
1487 # ifndef DEFINED_ONCE
1488 #  define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1489 #  define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1490 #  define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1491 # endif
1492 
1493 
1494 /* Define macros to initialize and free the failure stack.
1495    Do `return -2' if the alloc fails.  */
1496 
1497 # ifdef MATCH_MAY_ALLOCATE
1498 #  define INIT_FAIL_STACK()						\
1499   do {									\
1500     fail_stack.stack = (PREFIX(fail_stack_elt_t) *)		\
1501       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1502 									\
1503     if (fail_stack.stack == NULL)				\
1504       return -2;							\
1505 									\
1506     fail_stack.size = INIT_FAILURE_ALLOC;			\
1507     fail_stack.avail = 0;					\
1508   } while (0)
1509 
1510 #  define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1511 # else
1512 #  define INIT_FAIL_STACK()						\
1513   do {									\
1514     fail_stack.avail = 0;					\
1515   } while (0)
1516 
1517 #  define RESET_FAIL_STACK()
1518 # endif
1519 
1520 
1521 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1522 
1523    Return 1 if succeeds, and 0 if either ran out of memory
1524    allocating space for it or it was already too large.
1525 
1526    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1527 
1528 # define DOUBLE_FAIL_STACK(fail_stack)					\
1529   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1530    ? 0									\
1531    : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *)			\
1532         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1533           (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)),	\
1534           ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1535 									\
1536       (fail_stack).stack == NULL					\
1537       ? 0								\
1538       : ((fail_stack).size <<= 1, 					\
1539          1)))
1540 
1541 
1542 /* Push pointer POINTER on FAIL_STACK.
1543    Return 1 if was able to do so and 0 if ran out of memory allocating
1544    space to do so.  */
1545 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1546   ((FAIL_STACK_FULL ()							\
1547     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1548    ? 0									\
1549    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1550       1))
1551 
1552 /* Push a pointer value onto the failure stack.
1553    Assumes the variable `fail_stack'.  Probably should only
1554    be called from within `PUSH_FAILURE_POINT'.  */
1555 # define PUSH_FAILURE_POINTER(item)					\
1556   fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1557 
1558 /* This pushes an integer-valued item onto the failure stack.
1559    Assumes the variable `fail_stack'.  Probably should only
1560    be called from within `PUSH_FAILURE_POINT'.  */
1561 # define PUSH_FAILURE_INT(item)					\
1562   fail_stack.stack[fail_stack.avail++].integer = (item)
1563 
1564 /* Push a fail_stack_elt_t value onto the failure stack.
1565    Assumes the variable `fail_stack'.  Probably should only
1566    be called from within `PUSH_FAILURE_POINT'.  */
1567 # define PUSH_FAILURE_ELT(item)					\
1568   fail_stack.stack[fail_stack.avail++] =  (item)
1569 
1570 /* These three POP... operations complement the three PUSH... operations.
1571    All assume that `fail_stack' is nonempty.  */
1572 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1573 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1574 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1575 
1576 /* Used to omit pushing failure point id's when we're not debugging.  */
1577 # ifdef DEBUG
1578 #  define DEBUG_PUSH PUSH_FAILURE_INT
1579 #  define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1580 # else
1581 #  define DEBUG_PUSH(item)
1582 #  define DEBUG_POP(item_addr)
1583 # endif
1584 
1585 
1586 /* Push the information about the state we will need
1587    if we ever fail back to it.
1588 
1589    Requires variables fail_stack, regstart, regend, reg_info, and
1590    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1591    be declared.
1592 
1593    Does `return FAILURE_CODE' if runs out of memory.  */
1594 
1595 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1596   do {									\
1597     char *destination;							\
1598     /* Must be int, so when we don't save any registers, the arithmetic	\
1599        of 0 + -1 isn't done as unsigned.  */				\
1600     /* Can't be int, since there is not a shred of a guarantee that int	\
1601        is wide enough to hold a value of something to which pointer can	\
1602        be assigned */							\
1603     active_reg_t this_reg;						\
1604     									\
1605     DEBUG_STATEMENT (failure_id++);					\
1606     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1607     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1608     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1609     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1610 									\
1611     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1612     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1613 									\
1614     /* Ensure we have enough space allocated for what we will push.  */	\
1615     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1616       {									\
1617         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1618           return failure_code;						\
1619 									\
1620         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1621 		       (fail_stack).size);				\
1622         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1623       }									\
1624 									\
1625     /* Push the info, starting with the registers.  */			\
1626     DEBUG_PRINT1 ("\n");						\
1627 									\
1628     if (1)								\
1629       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1630 	   this_reg++)							\
1631 	{								\
1632 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1633 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1634 									\
1635 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1636 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1637 									\
1638 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1639 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1640 									\
1641 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1642 			reg_info[this_reg].word.pointer);		\
1643 	  DEBUG_PRINT2 (" match_null=%d",				\
1644 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1645 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1646 	  DEBUG_PRINT2 (" matched_something=%d",			\
1647 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1648 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1649 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1650 	  DEBUG_PRINT1 ("\n");						\
1651 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1652 	}								\
1653 									\
1654     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1655     PUSH_FAILURE_INT (lowest_active_reg);				\
1656 									\
1657     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1658     PUSH_FAILURE_INT (highest_active_reg);				\
1659 									\
1660     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1661     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1662     PUSH_FAILURE_POINTER (pattern_place);				\
1663 									\
1664     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1665     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1666 				 size2);				\
1667     DEBUG_PRINT1 ("'\n");						\
1668     PUSH_FAILURE_POINTER (string_place);				\
1669 									\
1670     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1671     DEBUG_PUSH (failure_id);						\
1672   } while (0)
1673 
1674 # ifndef DEFINED_ONCE
1675 /* This is the number of items that are pushed and popped on the stack
1676    for each register.  */
1677 #  define NUM_REG_ITEMS  3
1678 
1679 /* Individual items aside from the registers.  */
1680 #  ifdef DEBUG
1681 #   define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1682 #  else
1683 #   define NUM_NONREG_ITEMS 4
1684 #  endif
1685 
1686 /* We push at most this many items on the stack.  */
1687 /* We used to use (num_regs - 1), which is the number of registers
1688    this regexp will save; but that was changed to 5
1689    to avoid stack overflow for a regexp with lots of parens.  */
1690 #  define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1691 
1692 /* We actually push this many items.  */
1693 #  define NUM_FAILURE_ITEMS				\
1694   (((0							\
1695      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1696     * NUM_REG_ITEMS)					\
1697    + NUM_NONREG_ITEMS)
1698 
1699 /* How many items can still be added to the stack without overflowing it.  */
1700 #  define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1701 # endif /* not DEFINED_ONCE */
1702 
1703 
1704 /* Pops what PUSH_FAIL_STACK pushes.
1705 
1706    We restore into the parameters, all of which should be lvalues:
1707      STR -- the saved data position.
1708      PAT -- the saved pattern position.
1709      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1710      REGSTART, REGEND -- arrays of string positions.
1711      REG_INFO -- array of information about each subexpression.
1712 
1713    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1714    `pend', `string1', `size1', `string2', and `size2'.  */
1715 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1716 {									\
1717   DEBUG_STATEMENT (unsigned failure_id;)				\
1718   active_reg_t this_reg;						\
1719   const UCHAR_T *string_temp;						\
1720 									\
1721   assert (!FAIL_STACK_EMPTY ());					\
1722 									\
1723   /* Remove failure points and point to how many regs pushed.  */	\
1724   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1725   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1726   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1727 									\
1728   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1729 									\
1730   DEBUG_POP (&failure_id);						\
1731   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1732 									\
1733   /* If the saved string location is NULL, it came from an		\
1734      on_failure_keep_string_jump opcode, and we want to throw away the	\
1735      saved NULL, thus retaining our current position in the string.  */	\
1736   string_temp = POP_FAILURE_POINTER ();					\
1737   if (string_temp != NULL)						\
1738     str = (const CHAR_T *) string_temp;					\
1739 									\
1740   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1741   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1742   DEBUG_PRINT1 ("'\n");							\
1743 									\
1744   pat = (UCHAR_T *) POP_FAILURE_POINTER ();				\
1745   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1746   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1747 									\
1748   /* Restore register info.  */						\
1749   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1750   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1751 									\
1752   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1753   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1754 									\
1755   if (1)								\
1756     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1757       {									\
1758 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1759 									\
1760 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1761 	DEBUG_PRINT2 ("      info: %p\n",				\
1762 		      reg_info[this_reg].word.pointer);			\
1763 									\
1764 	regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1765 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1766 									\
1767 	regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER ();	\
1768 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1769       }									\
1770   else									\
1771     {									\
1772       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1773 	{								\
1774 	  reg_info[this_reg].word.integer = 0;				\
1775 	  regend[this_reg] = 0;						\
1776 	  regstart[this_reg] = 0;					\
1777 	}								\
1778       highest_active_reg = high_reg;					\
1779     }									\
1780 									\
1781   set_regs_matched_done = 0;						\
1782   DEBUG_STATEMENT (nfailure_points_popped++);				\
1783 } /* POP_FAILURE_POINT */
1784 
1785 /* Structure for per-register (a.k.a. per-group) information.
1786    Other register information, such as the
1787    starting and ending positions (which are addresses), and the list of
1788    inner groups (which is a bits list) are maintained in separate
1789    variables.
1790 
1791    We are making a (strictly speaking) nonportable assumption here: that
1792    the compiler will pack our bit fields into something that fits into
1793    the type of `word', i.e., is something that fits into one item on the
1794    failure stack.  */
1795 
1796 
1797 /* Declarations and macros for re_match_2.  */
1798 
1799 typedef union
1800 {
1801   PREFIX(fail_stack_elt_t) word;
1802   struct
1803   {
1804       /* This field is one if this group can match the empty string,
1805          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1806 # define MATCH_NULL_UNSET_VALUE 3
1807     unsigned match_null_string_p : 2;
1808     unsigned is_active : 1;
1809     unsigned matched_something : 1;
1810     unsigned ever_matched_something : 1;
1811   } bits;
1812 } PREFIX(register_info_type);
1813 
1814 # ifndef DEFINED_ONCE
1815 #  define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1816 #  define IS_ACTIVE(R)  ((R).bits.is_active)
1817 #  define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1818 #  define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1819 
1820 
1821 /* Call this when have matched a real character; it sets `matched' flags
1822    for the subexpressions which we are currently inside.  Also records
1823    that those subexprs have matched.  */
1824 #  define SET_REGS_MATCHED()						\
1825   do									\
1826     {									\
1827       if (!set_regs_matched_done)					\
1828 	{								\
1829 	  active_reg_t r;						\
1830 	  set_regs_matched_done = 1;					\
1831 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1832 	    {								\
1833 	      MATCHED_SOMETHING (reg_info[r])				\
1834 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1835 		= 1;							\
1836 	    }								\
1837 	}								\
1838     }									\
1839   while (0)
1840 # endif /* not DEFINED_ONCE */
1841 
1842 /* Registers are set to a sentinel when they haven't yet matched.  */
1843 static CHAR_T PREFIX(reg_unset_dummy);
1844 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1845 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1846 
1847 /* Subroutine declarations and macros for regex_compile.  */
1848 static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
1849 static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
1850 			       int arg1, int arg2);
1851 static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
1852 				int arg, UCHAR_T *end);
1853 static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
1854 				int arg1, int arg2, UCHAR_T *end);
1855 static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
1856 					 const CHAR_T *p,
1857 					 reg_syntax_t syntax);
1858 static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
1859 					 const CHAR_T *pend,
1860 					 reg_syntax_t syntax);
1861 # ifdef WCHAR
1862 static reg_errcode_t wcs_compile_range (CHAR_T range_start,
1863 					const CHAR_T **p_ptr,
1864 					const CHAR_T *pend,
1865 					char *translate,
1866 					reg_syntax_t syntax,
1867 					UCHAR_T *b,
1868 					CHAR_T *char_set);
1869 static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
1870 # else /* BYTE */
1871 static reg_errcode_t byte_compile_range (unsigned int range_start,
1872 					 const char **p_ptr,
1873 					 const char *pend,
1874 					 char *translate,
1875 					 reg_syntax_t syntax,
1876 					 unsigned char *b);
1877 # endif /* WCHAR */
1878 
1879 /* Fetch the next character in the uncompiled pattern---translating it
1880    if necessary.  Also cast from a signed character in the constant
1881    string passed to us by the user to an unsigned char that we can use
1882    as an array index (in, e.g., `translate').  */
1883 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1884    because it is impossible to allocate 4GB array for some encodings
1885    which have 4 byte character_set like UCS4.  */
1886 # ifndef PATFETCH
1887 #  ifdef WCHAR
1888 #   define PATFETCH(c)							\
1889   do {if (p == pend) return REG_EEND;					\
1890     c = (UCHAR_T) *p++;							\
1891     if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c];		\
1892   } while (0)
1893 #  else /* BYTE */
1894 #   define PATFETCH(c)							\
1895   do {if (p == pend) return REG_EEND;					\
1896     c = (unsigned char) *p++;						\
1897     if (translate) c = (unsigned char) translate[c];			\
1898   } while (0)
1899 #  endif /* WCHAR */
1900 # endif
1901 
1902 /* Fetch the next character in the uncompiled pattern, with no
1903    translation.  */
1904 # define PATFETCH_RAW(c)						\
1905   do {if (p == pend) return REG_EEND;					\
1906     c = (UCHAR_T) *p++; 	       					\
1907   } while (0)
1908 
1909 /* Go backwards one character in the pattern.  */
1910 # define PATUNFETCH p--
1911 
1912 
1913 /* If `translate' is non-null, return translate[D], else just D.  We
1914    cast the subscript to translate because some data is declared as
1915    `char *', to avoid warnings when a string constant is passed.  But
1916    when we use a character as a subscript we must make it unsigned.  */
1917 /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1918    because it is impossible to allocate 4GB array for some encodings
1919    which have 4 byte character_set like UCS4.  */
1920 
1921 # ifndef TRANSLATE
1922 #  ifdef WCHAR
1923 #   define TRANSLATE(d) \
1924   ((translate && ((UCHAR_T) (d)) <= 0xff) \
1925    ? (char) translate[(unsigned char) (d)] : (d))
1926 # else /* BYTE */
1927 #   define TRANSLATE(d) \
1928   (translate ? (char) translate[(unsigned char) (d)] : (d))
1929 #  endif /* WCHAR */
1930 # endif
1931 
1932 
1933 /* Macros for outputting the compiled pattern into `buffer'.  */
1934 
1935 /* If the buffer isn't allocated when it comes in, use this.  */
1936 # define INIT_BUF_SIZE  (32 * sizeof(UCHAR_T))
1937 
1938 /* Make sure we have at least N more bytes of space in buffer.  */
1939 # ifdef WCHAR
1940 #  define GET_BUFFER_SPACE(n)						\
1941     while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR	\
1942             + (n)*sizeof(CHAR_T)) > bufp->allocated)			\
1943       EXTEND_BUFFER ()
1944 # else /* BYTE */
1945 #  define GET_BUFFER_SPACE(n)						\
1946     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1947       EXTEND_BUFFER ()
1948 # endif /* WCHAR */
1949 
1950 /* Make sure we have one more byte of buffer space and then add C to it.  */
1951 # define BUF_PUSH(c)							\
1952   do {									\
1953     GET_BUFFER_SPACE (1);						\
1954     *b++ = (UCHAR_T) (c);						\
1955   } while (0)
1956 
1957 
1958 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1959 # define BUF_PUSH_2(c1, c2)						\
1960   do {									\
1961     GET_BUFFER_SPACE (2);						\
1962     *b++ = (UCHAR_T) (c1);						\
1963     *b++ = (UCHAR_T) (c2);						\
1964   } while (0)
1965 
1966 
1967 /* As with BUF_PUSH_2, except for three bytes.  */
1968 # define BUF_PUSH_3(c1, c2, c3)						\
1969   do {									\
1970     GET_BUFFER_SPACE (3);						\
1971     *b++ = (UCHAR_T) (c1);						\
1972     *b++ = (UCHAR_T) (c2);						\
1973     *b++ = (UCHAR_T) (c3);						\
1974   } while (0)
1975 
1976 /* Store a jump with opcode OP at LOC to location TO.  We store a
1977    relative address offset by the three bytes the jump itself occupies.  */
1978 # define STORE_JUMP(op, loc, to) \
1979  PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
1980 
1981 /* Likewise, for a two-argument jump.  */
1982 # define STORE_JUMP2(op, loc, to, arg) \
1983   PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
1984 
1985 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1986 # define INSERT_JUMP(op, loc, to) \
1987   PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
1988 
1989 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1990 # define INSERT_JUMP2(op, loc, to, arg) \
1991   PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
1992 	      arg, b)
1993 
1994 /* This is not an arbitrary limit: the arguments which represent offsets
1995    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1996    be too small, many things would have to change.  */
1997 /* Any other compiler which, like MSC, has allocation limit below 2^16
1998    bytes will have to use approach similar to what was done below for
1999    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
2000    reallocating to 0 bytes.  Such thing is not going to work too well.
2001    You have been warned!!  */
2002 # ifndef DEFINED_ONCE
2003 #  if defined _MSC_VER  && !defined WIN32
2004 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2005    The REALLOC define eliminates a flurry of conversion warnings,
2006    but is not required. */
2007 #   define MAX_BUF_SIZE  65500L
2008 #   define REALLOC(p,s) realloc ((p), (size_t) (s))
2009 #  else
2010 #   define MAX_BUF_SIZE (1L << 16)
2011 #   define REALLOC(p,s) realloc ((p), (s))
2012 #  endif
2013 
2014 /* Extend the buffer by twice its current size via realloc and
2015    reset the pointers that pointed into the old block to point to the
2016    correct places in the new one.  If extending the buffer results in it
2017    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
2018 #  if __BOUNDED_POINTERS__
2019 #   define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2020 #   define MOVE_BUFFER_POINTER(P) \
2021   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2022 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND	\
2023   else						\
2024     {						\
2025       SET_HIGH_BOUND (b);			\
2026       SET_HIGH_BOUND (begalt);			\
2027       if (fixup_alt_jump)			\
2028 	SET_HIGH_BOUND (fixup_alt_jump);	\
2029       if (laststart)				\
2030 	SET_HIGH_BOUND (laststart);		\
2031       if (pending_exact)			\
2032 	SET_HIGH_BOUND (pending_exact);		\
2033     }
2034 #  else
2035 #   define MOVE_BUFFER_POINTER(P) (P) += incr
2036 #   define ELSE_EXTEND_BUFFER_HIGH_BOUND
2037 #  endif
2038 # endif /* not DEFINED_ONCE */
2039 
2040 # ifdef WCHAR
2041 #  define EXTEND_BUFFER()						\
2042   do {									\
2043     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2044     int wchar_count;							\
2045     if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE)		\
2046       return REG_ESIZE;							\
2047     bufp->allocated <<= 1;						\
2048     if (bufp->allocated > MAX_BUF_SIZE)					\
2049       bufp->allocated = MAX_BUF_SIZE;					\
2050     /* How many characters the new buffer can have?  */			\
2051     wchar_count = bufp->allocated / sizeof(UCHAR_T);			\
2052     if (wchar_count == 0) wchar_count = 1;				\
2053     /* Truncate the buffer to CHAR_T align.  */			\
2054     bufp->allocated = wchar_count * sizeof(UCHAR_T);			\
2055     RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T);		\
2056     bufp->buffer = (char*)COMPILED_BUFFER_VAR;				\
2057     if (COMPILED_BUFFER_VAR == NULL)					\
2058       return REG_ESPACE;						\
2059     /* If the buffer moved, move all the pointers into it.  */		\
2060     if (old_buffer != COMPILED_BUFFER_VAR)				\
2061       {									\
2062 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2063 	MOVE_BUFFER_POINTER (b);					\
2064 	MOVE_BUFFER_POINTER (begalt);					\
2065 	if (fixup_alt_jump)						\
2066 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2067 	if (laststart)							\
2068 	  MOVE_BUFFER_POINTER (laststart);				\
2069 	if (pending_exact)						\
2070 	  MOVE_BUFFER_POINTER (pending_exact);				\
2071       }									\
2072     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2073   } while (0)
2074 # else /* BYTE */
2075 #  define EXTEND_BUFFER()						\
2076   do {									\
2077     UCHAR_T *old_buffer = COMPILED_BUFFER_VAR;				\
2078     if (bufp->allocated == MAX_BUF_SIZE)				\
2079       return REG_ESIZE;							\
2080     bufp->allocated <<= 1;						\
2081     if (bufp->allocated > MAX_BUF_SIZE)					\
2082       bufp->allocated = MAX_BUF_SIZE;					\
2083     bufp->buffer = REALLOC (COMPILED_BUFFER_VAR, bufp->allocated);	\
2084     if (COMPILED_BUFFER_VAR == NULL)					\
2085       return REG_ESPACE;						\
2086     /* If the buffer moved, move all the pointers into it.  */		\
2087     if (old_buffer != COMPILED_BUFFER_VAR)				\
2088       {									\
2089 	int incr = COMPILED_BUFFER_VAR - old_buffer;			\
2090 	MOVE_BUFFER_POINTER (b);					\
2091 	MOVE_BUFFER_POINTER (begalt);					\
2092 	if (fixup_alt_jump)						\
2093 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
2094 	if (laststart)							\
2095 	  MOVE_BUFFER_POINTER (laststart);				\
2096 	if (pending_exact)						\
2097 	  MOVE_BUFFER_POINTER (pending_exact);				\
2098       }									\
2099     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
2100   } while (0)
2101 # endif /* WCHAR */
2102 
2103 # ifndef DEFINED_ONCE
2104 /* Since we have one byte reserved for the register number argument to
2105    {start,stop}_memory, the maximum number of groups we can report
2106    things about is what fits in that byte.  */
2107 #  define MAX_REGNUM 255
2108 
2109 /* But patterns can have more than `MAX_REGNUM' registers.  We just
2110    ignore the excess.  */
2111 typedef unsigned regnum_t;
2112 
2113 
2114 /* Macros for the compile stack.  */
2115 
2116 /* Since offsets can go either forwards or backwards, this type needs to
2117    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
2118 /* int may be not enough when sizeof(int) == 2.  */
2119 typedef long pattern_offset_t;
2120 
2121 typedef struct
2122 {
2123   pattern_offset_t begalt_offset;
2124   pattern_offset_t fixup_alt_jump;
2125   pattern_offset_t inner_group_offset;
2126   pattern_offset_t laststart_offset;
2127   regnum_t regnum;
2128 } compile_stack_elt_t;
2129 
2130 
2131 typedef struct
2132 {
2133   compile_stack_elt_t *stack;
2134   unsigned size;
2135   unsigned avail;			/* Offset of next open position.  */
2136 } compile_stack_type;
2137 
2138 
2139 #  define INIT_COMPILE_STACK_SIZE 32
2140 
2141 #  define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
2142 #  define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
2143 
2144 /* The next available element.  */
2145 #  define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2146 
2147 # endif /* not DEFINED_ONCE */
2148 
2149 /* Set the bit for character C in a list.  */
2150 # ifndef DEFINED_ONCE
2151 #  define SET_LIST_BIT(c)                               \
2152   (b[((unsigned char) (c)) / BYTEWIDTH]               \
2153    |= 1 << (((unsigned char) c) % BYTEWIDTH))
2154 # endif /* DEFINED_ONCE */
2155 
2156 /* Get the next unsigned number in the uncompiled pattern.  */
2157 # define GET_UNSIGNED_NUMBER(num) \
2158   {									\
2159     while (p != pend)							\
2160       {									\
2161 	PATFETCH (c);							\
2162 	if (c < '0' || c > '9')						\
2163 	  break;							\
2164 	if (num <= RE_DUP_MAX)						\
2165 	  {								\
2166 	    if (num < 0)						\
2167 	      num = 0;							\
2168 	    num = num * 10 + c - '0';					\
2169 	  }								\
2170       }									\
2171   }
2172 
2173 # ifndef DEFINED_ONCE
2174 #  if defined _LIBC || WIDE_CHAR_SUPPORT
2175 /* The GNU C library provides support for user-defined character classes
2176    and the functions from ISO C amendement 1.  */
2177 #   ifdef CHARCLASS_NAME_MAX
2178 #    define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2179 #   else
2180 /* This shouldn't happen but some implementation might still have this
2181    problem.  Use a reasonable default value.  */
2182 #    define CHAR_CLASS_MAX_LENGTH 256
2183 #   endif
2184 
2185 #   ifdef _LIBC
2186 #    define IS_CHAR_CLASS(string) __wctype (string)
2187 #   else
2188 #    define IS_CHAR_CLASS(string) wctype (string)
2189 #   endif
2190 #  else
2191 #   define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
2192 
2193 #   define IS_CHAR_CLASS(string)					\
2194    (STREQ (string, "alpha") || STREQ (string, "upper")			\
2195     || STREQ (string, "lower") || STREQ (string, "digit")		\
2196     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
2197     || STREQ (string, "space") || STREQ (string, "print")		\
2198     || STREQ (string, "punct") || STREQ (string, "graph")		\
2199     || STREQ (string, "cntrl") || STREQ (string, "blank"))
2200 #  endif
2201 # endif /* DEFINED_ONCE */
2202 
2203 # ifndef MATCH_MAY_ALLOCATE
2204 
2205 /* If we cannot allocate large objects within re_match_2_internal,
2206    we make the fail stack and register vectors global.
2207    The fail stack, we grow to the maximum size when a regexp
2208    is compiled.
2209    The register vectors, we adjust in size each time we
2210    compile a regexp, according to the number of registers it needs.  */
2211 
2212 static PREFIX(fail_stack_type) fail_stack;
2213 
2214 /* Size with which the following vectors are currently allocated.
2215    That is so we can make them bigger as needed,
2216    but never make them smaller.  */
2217 #  ifdef DEFINED_ONCE
2218 static int regs_allocated_size;
2219 
2220 static const char **     regstart, **     regend;
2221 static const char ** old_regstart, ** old_regend;
2222 static const char **best_regstart, **best_regend;
2223 static const char **reg_dummy;
2224 #  endif /* DEFINED_ONCE */
2225 
2226 static PREFIX(register_info_type) *PREFIX(reg_info);
2227 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2228 
2229 /* Make the register vectors big enough for NUM_REGS registers,
2230    but don't make them smaller.  */
2231 
2232 static void
PREFIX(regex_grow_registers)2233 PREFIX(regex_grow_registers) (int num_regs)
2234 {
2235   if (num_regs > regs_allocated_size)
2236     {
2237       RETALLOC_IF (regstart,	 num_regs, const char *);
2238       RETALLOC_IF (regend,	 num_regs, const char *);
2239       RETALLOC_IF (old_regstart, num_regs, const char *);
2240       RETALLOC_IF (old_regend,	 num_regs, const char *);
2241       RETALLOC_IF (best_regstart, num_regs, const char *);
2242       RETALLOC_IF (best_regend,	 num_regs, const char *);
2243       RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2244       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2245       RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2246 
2247       regs_allocated_size = num_regs;
2248     }
2249 }
2250 
2251 # endif /* not MATCH_MAY_ALLOCATE */
2252 
2253 # ifndef DEFINED_ONCE
2254 static boolean group_in_compile_stack (compile_stack_type
2255 				       compile_stack,
2256 				       regnum_t regnum);
2257 # endif /* not DEFINED_ONCE */
2258 
2259 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2260    Returns one of error codes defined in `regex.h', or zero for success.
2261 
2262    Assumes the `allocated' (and perhaps `buffer') and `translate'
2263    fields are set in BUFP on entry.
2264 
2265    If it succeeds, results are put in BUFP (if it returns an error, the
2266    contents of BUFP are undefined):
2267      `buffer' is the compiled pattern;
2268      `syntax' is set to SYNTAX;
2269      `used' is set to the length of the compiled pattern;
2270      `fastmap_accurate' is zero;
2271      `re_nsub' is the number of subexpressions in PATTERN;
2272      `not_bol' and `not_eol' are zero;
2273 
2274    The `fastmap' and `newline_anchor' fields are neither
2275    examined nor set.  */
2276 
2277 /* Return, freeing storage we allocated.  */
2278 # ifdef WCHAR
2279 #  define FREE_STACK_RETURN(value)		\
2280   return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2281 # else
2282 #  define FREE_STACK_RETURN(value)		\
2283   return (free (compile_stack.stack), value)
2284 # endif /* WCHAR */
2285 
2286 static reg_errcode_t
PREFIX(regex_compile)2287 PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
2288 		       size_t ARG_PREFIX(size),
2289 		       reg_syntax_t syntax,
2290 		       struct re_pattern_buffer *bufp)
2291 {
2292   /* We fetch characters from PATTERN here.  Even though PATTERN is
2293      `char *' (i.e., signed), we declare these variables as unsigned, so
2294      they can be reliably used as array indices.  */
2295   register UCHAR_T c, c1;
2296 
2297 #ifdef WCHAR
2298   /* A temporary space to keep wchar_t pattern and compiled pattern.  */
2299   CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2300   size_t size;
2301   /* offset buffer for optimization. See convert_mbs_to_wc.  */
2302   int *mbs_offset = NULL;
2303   /* It hold whether each wchar_t is binary data or not.  */
2304   char *is_binary = NULL;
2305   /* A flag whether exactn is handling binary data or not.  */
2306   char is_exactn_bin = FALSE;
2307 #endif /* WCHAR */
2308 
2309   /* A random temporary spot in PATTERN.  */
2310   const CHAR_T *p1;
2311 
2312   /* Points to the end of the buffer, where we should append.  */
2313   register UCHAR_T *b;
2314 
2315   /* Keeps track of unclosed groups.  */
2316   compile_stack_type compile_stack;
2317 
2318   /* Points to the current (ending) position in the pattern.  */
2319 #ifdef WCHAR
2320   const CHAR_T *p;
2321   const CHAR_T *pend;
2322 #else /* BYTE */
2323   const CHAR_T *p = pattern;
2324   const CHAR_T *pend = pattern + size;
2325 #endif /* WCHAR */
2326 
2327   /* How to translate the characters in the pattern.  */
2328   RE_TRANSLATE_TYPE translate = bufp->translate;
2329 
2330   /* Address of the count-byte of the most recently inserted `exactn'
2331      command.  This makes it possible to tell if a new exact-match
2332      character can be added to that command or if the character requires
2333      a new `exactn' command.  */
2334   UCHAR_T *pending_exact = 0;
2335 
2336   /* Address of start of the most recently finished expression.
2337      This tells, e.g., postfix * where to find the start of its
2338      operand.  Reset at the beginning of groups and alternatives.  */
2339   UCHAR_T *laststart = 0;
2340 
2341   /* Address of beginning of regexp, or inside of last group.  */
2342   UCHAR_T *begalt;
2343 
2344   /* Address of the place where a forward jump should go to the end of
2345      the containing expression.  Each alternative of an `or' -- except the
2346      last -- ends with a forward jump of this sort.  */
2347   UCHAR_T *fixup_alt_jump = 0;
2348 
2349   /* Counts open-groups as they are encountered.  Remembered for the
2350      matching close-group on the compile stack, so the same register
2351      number is put in the stop_memory as the start_memory.  */
2352   regnum_t regnum = 0;
2353 
2354 #ifdef WCHAR
2355   /* Initialize the wchar_t PATTERN and offset_buffer.  */
2356   p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2357   mbs_offset = TALLOC(csize + 1, int);
2358   is_binary = TALLOC(csize + 1, char);
2359   if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2360     {
2361       free(pattern);
2362       free(mbs_offset);
2363       free(is_binary);
2364       return REG_ESPACE;
2365     }
2366   pattern[csize] = L'\0';	/* sentinel */
2367   size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2368   pend = p + size;
2369   if (size < 0)
2370     {
2371       free(pattern);
2372       free(mbs_offset);
2373       free(is_binary);
2374       return REG_BADPAT;
2375     }
2376 #endif
2377 
2378 #ifdef DEBUG
2379   DEBUG_PRINT1 ("\nCompiling pattern: ");
2380   if (debug)
2381     {
2382       unsigned debug_count;
2383 
2384       for (debug_count = 0; debug_count < size; debug_count++)
2385         PUT_CHAR (pattern[debug_count]);
2386       putchar ('\n');
2387     }
2388 #endif /* DEBUG */
2389 
2390   /* Initialize the compile stack.  */
2391   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2392   if (compile_stack.stack == NULL)
2393     {
2394 #ifdef WCHAR
2395       free(pattern);
2396       free(mbs_offset);
2397       free(is_binary);
2398 #endif
2399       return REG_ESPACE;
2400     }
2401 
2402   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2403   compile_stack.avail = 0;
2404 
2405   /* Initialize the pattern buffer.  */
2406   bufp->syntax = syntax;
2407   bufp->fastmap_accurate = 0;
2408   bufp->not_bol = bufp->not_eol = 0;
2409 
2410   /* Set `used' to zero, so that if we return an error, the pattern
2411      printer (for debugging) will think there's no pattern.  We reset it
2412      at the end.  */
2413   bufp->used = 0;
2414 
2415   /* Always count groups, whether or not bufp->no_sub is set.  */
2416   bufp->re_nsub = 0;
2417 
2418 #if !defined emacs && !defined SYNTAX_TABLE
2419   /* Initialize the syntax table.  */
2420    init_syntax_once ();
2421 #endif
2422 
2423   if (bufp->allocated == 0)
2424     {
2425       if (bufp->buffer)
2426 	{ /* If zero allocated, but buffer is non-null, try to realloc
2427              enough space.  This loses if buffer's address is bogus, but
2428              that is the user's responsibility.  */
2429 #ifdef WCHAR
2430 	  /* Free bufp->buffer and allocate an array for wchar_t pattern
2431 	     buffer.  */
2432           free(bufp->buffer);
2433           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2434 					UCHAR_T);
2435 #else
2436           RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2437 #endif /* WCHAR */
2438         }
2439       else
2440         { /* Caller did not allocate a buffer.  Do it for them.  */
2441           COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2442 					UCHAR_T);
2443         }
2444 
2445       if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2446 #ifdef WCHAR
2447       bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2448 #endif /* WCHAR */
2449       bufp->allocated = INIT_BUF_SIZE;
2450     }
2451 #ifdef WCHAR
2452   else
2453     COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2454 #endif
2455 
2456   begalt = b = COMPILED_BUFFER_VAR;
2457 
2458   /* Loop through the uncompiled pattern until we're at the end.  */
2459   while (p != pend)
2460     {
2461       PATFETCH (c);
2462 
2463       switch (c)
2464         {
2465         case '^':
2466           {
2467             if (   /* If at start of pattern, it's an operator.  */
2468                    p == pattern + 1
2469                    /* If context independent, it's an operator.  */
2470                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2471                    /* Otherwise, depends on what's come before.  */
2472                 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2473               BUF_PUSH (begline);
2474             else
2475               goto normal_char;
2476           }
2477           break;
2478 
2479 
2480         case '$':
2481           {
2482             if (   /* If at end of pattern, it's an operator.  */
2483                    p == pend
2484                    /* If context independent, it's an operator.  */
2485                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2486                    /* Otherwise, depends on what's next.  */
2487                 || PREFIX(at_endline_loc_p) (p, pend, syntax))
2488                BUF_PUSH (endline);
2489              else
2490                goto normal_char;
2491            }
2492            break;
2493 
2494 
2495 	case '+':
2496         case '?':
2497           if ((syntax & RE_BK_PLUS_QM)
2498               || (syntax & RE_LIMITED_OPS))
2499             goto normal_char;
2500         handle_plus:
2501         case '*':
2502           /* If there is no previous pattern... */
2503           if (!laststart)
2504             {
2505               if (syntax & RE_CONTEXT_INVALID_OPS)
2506                 FREE_STACK_RETURN (REG_BADRPT);
2507               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2508                 goto normal_char;
2509             }
2510 
2511           {
2512             /* Are we optimizing this jump?  */
2513             boolean keep_string_p = false;
2514 
2515             /* 1 means zero (many) matches is allowed.  */
2516             char zero_times_ok = 0, many_times_ok = 0;
2517 
2518             /* If there is a sequence of repetition chars, collapse it
2519                down to just one (the right one).  We can't combine
2520                interval operators with these because of, e.g., `a{2}*',
2521                which should only match an even number of `a's.  */
2522 
2523             for (;;)
2524               {
2525                 zero_times_ok |= c != '+';
2526                 many_times_ok |= c != '?';
2527 
2528                 if (p == pend)
2529                   break;
2530 
2531                 PATFETCH (c);
2532 
2533                 if (c == '*'
2534                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2535                   ;
2536 
2537                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2538                   {
2539                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2540 
2541                     PATFETCH (c1);
2542                     if (!(c1 == '+' || c1 == '?'))
2543                       {
2544                         PATUNFETCH;
2545                         PATUNFETCH;
2546                         break;
2547                       }
2548 
2549                     c = c1;
2550                   }
2551                 else
2552                   {
2553                     PATUNFETCH;
2554                     break;
2555                   }
2556 
2557                 /* If we get here, we found another repeat character.  */
2558                }
2559 
2560             /* Star, etc. applied to an empty pattern is equivalent
2561                to an empty pattern.  */
2562             if (!laststart)
2563               break;
2564 
2565             /* Now we know whether or not zero matches is allowed
2566                and also whether or not two or more matches is allowed.  */
2567             if (many_times_ok)
2568               { /* More than one repetition is allowed, so put in at the
2569                    end a backward relative jump from `b' to before the next
2570                    jump we're going to put in below (which jumps from
2571                    laststart to after this jump).
2572 
2573                    But if we are at the `*' in the exact sequence `.*\n',
2574                    insert an unconditional jump backwards to the .,
2575                    instead of the beginning of the loop.  This way we only
2576                    push a failure point once, instead of every time
2577                    through the loop.  */
2578                 assert (p - 1 > pattern);
2579 
2580                 /* Allocate the space for the jump.  */
2581                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2582 
2583                 /* We know we are not at the first character of the pattern,
2584                    because laststart was nonzero.  And we've already
2585                    incremented `p', by the way, to be the character after
2586                    the `*'.  Do we have to do something analogous here
2587                    for null bytes, because of RE_DOT_NOT_NULL?  */
2588                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2589 		    && zero_times_ok
2590                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2591                     && !(syntax & RE_DOT_NEWLINE))
2592                   { /* We have .*\n.  */
2593                     STORE_JUMP (jump, b, laststart);
2594                     keep_string_p = true;
2595                   }
2596                 else
2597                   /* Anything else.  */
2598                   STORE_JUMP (maybe_pop_jump, b, laststart -
2599 			      (1 + OFFSET_ADDRESS_SIZE));
2600 
2601                 /* We've added more stuff to the buffer.  */
2602                 b += 1 + OFFSET_ADDRESS_SIZE;
2603               }
2604 
2605             /* On failure, jump from laststart to b + 3, which will be the
2606                end of the buffer after this jump is inserted.  */
2607 	    /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2608 	       'b + 3'.  */
2609             GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2610             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2611                                        : on_failure_jump,
2612                          laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2613             pending_exact = 0;
2614             b += 1 + OFFSET_ADDRESS_SIZE;
2615 
2616             if (!zero_times_ok)
2617               {
2618                 /* At least one repetition is required, so insert a
2619                    `dummy_failure_jump' before the initial
2620                    `on_failure_jump' instruction of the loop. This
2621                    effects a skip over that instruction the first time
2622                    we hit that loop.  */
2623                 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2624                 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2625 			     2 + 2 * OFFSET_ADDRESS_SIZE);
2626                 b += 1 + OFFSET_ADDRESS_SIZE;
2627               }
2628             }
2629 	  break;
2630 
2631 
2632 	case '.':
2633           laststart = b;
2634           BUF_PUSH (anychar);
2635           break;
2636 
2637 
2638         case '[':
2639           {
2640             boolean had_char_class = false;
2641 #ifdef WCHAR
2642 	    CHAR_T range_start = 0xffffffff;
2643 #else
2644 	    unsigned int range_start = 0xffffffff;
2645 #endif
2646             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2647 
2648 #ifdef WCHAR
2649 	    /* We assume a charset(_not) structure as a wchar_t array.
2650 	       charset[0] = (re_opcode_t) charset(_not)
2651                charset[1] = l (= length of char_classes)
2652                charset[2] = m (= length of collating_symbols)
2653                charset[3] = n (= length of equivalence_classes)
2654 	       charset[4] = o (= length of char_ranges)
2655 	       charset[5] = p (= length of chars)
2656 
2657                charset[6] = char_class (wctype_t)
2658                charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2659                          ...
2660                charset[l+5]  = char_class (wctype_t)
2661 
2662                charset[l+6]  = collating_symbol (wchar_t)
2663                             ...
2664                charset[l+m+5]  = collating_symbol (wchar_t)
2665 					ifdef _LIBC we use the index if
2666 					_NL_COLLATE_SYMB_EXTRAMB instead of
2667 					wchar_t string.
2668 
2669                charset[l+m+6]  = equivalence_classes (wchar_t)
2670                               ...
2671                charset[l+m+n+5]  = equivalence_classes (wchar_t)
2672 					ifdef _LIBC we use the index in
2673 					_NL_COLLATE_WEIGHT instead of
2674 					wchar_t string.
2675 
2676 	       charset[l+m+n+6] = range_start
2677 	       charset[l+m+n+7] = range_end
2678 	                       ...
2679 	       charset[l+m+n+2o+4] = range_start
2680 	       charset[l+m+n+2o+5] = range_end
2681 					ifdef _LIBC we use the value looked up
2682 					in _NL_COLLATE_COLLSEQ instead of
2683 					wchar_t character.
2684 
2685 	       charset[l+m+n+2o+6] = char
2686 	                          ...
2687 	       charset[l+m+n+2o+p+5] = char
2688 
2689 	     */
2690 
2691 	    /* We need at least 6 spaces: the opcode, the length of
2692                char_classes, the length of collating_symbols, the length of
2693                equivalence_classes, the length of char_ranges, the length of
2694                chars.  */
2695 	    GET_BUFFER_SPACE (6);
2696 
2697 	    /* Save b as laststart. And We use laststart as the pointer
2698 	       to the first element of the charset here.
2699 	       In other words, laststart[i] indicates charset[i].  */
2700             laststart = b;
2701 
2702             /* We test `*p == '^' twice, instead of using an if
2703                statement, so we only need one BUF_PUSH.  */
2704             BUF_PUSH (*p == '^' ? charset_not : charset);
2705             if (*p == '^')
2706               p++;
2707 
2708             /* Push the length of char_classes, the length of
2709                collating_symbols, the length of equivalence_classes, the
2710                length of char_ranges and the length of chars.  */
2711             BUF_PUSH_3 (0, 0, 0);
2712             BUF_PUSH_2 (0, 0);
2713 
2714             /* Remember the first position in the bracket expression.  */
2715             p1 = p;
2716 
2717             /* charset_not matches newline according to a syntax bit.  */
2718             if ((re_opcode_t) b[-6] == charset_not
2719                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2720 	      {
2721 		BUF_PUSH('\n');
2722 		laststart[5]++; /* Update the length of characters  */
2723 	      }
2724 
2725             /* Read in characters and ranges, setting map bits.  */
2726             for (;;)
2727               {
2728                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2729 
2730                 PATFETCH (c);
2731 
2732                 /* \ might escape characters inside [...] and [^...].  */
2733                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2734                   {
2735                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2736 
2737                     PATFETCH (c1);
2738 		    BUF_PUSH(c1);
2739 		    laststart[5]++; /* Update the length of chars  */
2740 		    range_start = c1;
2741                     continue;
2742                   }
2743 
2744                 /* Could be the end of the bracket expression.  If it's
2745                    not (i.e., when the bracket expression is `[]' so
2746                    far), the ']' character bit gets set way below.  */
2747                 if (c == ']' && p != p1 + 1)
2748                   break;
2749 
2750                 /* Look ahead to see if it's a range when the last thing
2751                    was a character class.  */
2752                 if (had_char_class && c == '-' && *p != ']')
2753                   FREE_STACK_RETURN (REG_ERANGE);
2754 
2755                 /* Look ahead to see if it's a range when the last thing
2756                    was a character: if this is a hyphen not at the
2757                    beginning or the end of a list, then it's the range
2758                    operator.  */
2759                 if (c == '-'
2760                     && !(p - 2 >= pattern && p[-2] == '[')
2761                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2762                     && *p != ']')
2763                   {
2764                     reg_errcode_t ret;
2765 		    /* Allocate the space for range_start and range_end.  */
2766 		    GET_BUFFER_SPACE (2);
2767 		    /* Update the pointer to indicate end of buffer.  */
2768                     b += 2;
2769                     ret = wcs_compile_range (range_start, &p, pend, translate,
2770                                          syntax, b, laststart);
2771                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2772                     range_start = 0xffffffff;
2773                   }
2774                 else if (p[0] == '-' && p[1] != ']')
2775                   { /* This handles ranges made up of characters only.  */
2776                     reg_errcode_t ret;
2777 
2778 		    /* Move past the `-'.  */
2779                     PATFETCH (c1);
2780 		    /* Allocate the space for range_start and range_end.  */
2781 		    GET_BUFFER_SPACE (2);
2782 		    /* Update the pointer to indicate end of buffer.  */
2783                     b += 2;
2784                     ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2785                                          laststart);
2786                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2787 		    range_start = 0xffffffff;
2788                   }
2789 
2790                 /* See if we're at the beginning of a possible character
2791                    class.  */
2792                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2793                   { /* Leave room for the null.  */
2794                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2795 
2796                     PATFETCH (c);
2797                     c1 = 0;
2798 
2799                     /* If pattern is `[[:'.  */
2800                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801 
2802                     for (;;)
2803                       {
2804                         PATFETCH (c);
2805                         if ((c == ':' && *p == ']') || p == pend)
2806                           break;
2807 			if (c1 < CHAR_CLASS_MAX_LENGTH)
2808 			  str[c1++] = c;
2809 			else
2810 			  /* This is in any case an invalid class name.  */
2811 			  str[0] = '\0';
2812                       }
2813                     str[c1] = '\0';
2814 
2815                     /* If isn't a word bracketed by `[:' and `:]':
2816                        undo the ending character, the letters, and leave
2817                        the leading `:' and `[' (but store them as character).  */
2818                     if (c == ':' && *p == ']')
2819                       {
2820 			wctype_t wt;
2821 			uintptr_t alignedp;
2822 
2823 			/* Query the character class as wctype_t.  */
2824 			wt = IS_CHAR_CLASS (str);
2825 			if (wt == 0)
2826 			  FREE_STACK_RETURN (REG_ECTYPE);
2827 
2828                         /* Throw away the ] at the end of the character
2829                            class.  */
2830                         PATFETCH (c);
2831 
2832                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2833 
2834 			/* Allocate the space for character class.  */
2835                         GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2836 			/* Update the pointer to indicate end of buffer.  */
2837                         b += CHAR_CLASS_SIZE;
2838 			/* Move data which follow character classes
2839 			    not to violate the data.  */
2840                         insert_space(CHAR_CLASS_SIZE,
2841 				     laststart + 6 + laststart[1],
2842 				     b - 1);
2843 			alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2844 				    + __alignof__(wctype_t) - 1)
2845 			  	    & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2846 			/* Store the character class.  */
2847                         *((wctype_t*)alignedp) = wt;
2848                         /* Update length of char_classes */
2849                         laststart[1] += CHAR_CLASS_SIZE;
2850 
2851                         had_char_class = true;
2852                       }
2853                     else
2854                       {
2855                         c1++;
2856                         while (c1--)
2857                           PATUNFETCH;
2858                         BUF_PUSH ('[');
2859                         BUF_PUSH (':');
2860                         laststart[5] += 2; /* Update the length of characters  */
2861 			range_start = ':';
2862                         had_char_class = false;
2863                       }
2864                   }
2865                 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2866 							  || *p == '.'))
2867 		  {
2868 		    CHAR_T str[128];	/* Should be large enough.  */
2869 		    CHAR_T delim = *p; /* '=' or '.'  */
2870 # ifdef _LIBC
2871 		    uint32_t nrules =
2872 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2873 # endif
2874 		    PATFETCH (c);
2875 		    c1 = 0;
2876 
2877 		    /* If pattern is `[[=' or '[[.'.  */
2878 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2879 
2880 		    for (;;)
2881 		      {
2882 			PATFETCH (c);
2883 			if ((c == delim && *p == ']') || p == pend)
2884 			  break;
2885 			if (c1 < sizeof (str) - 1)
2886 			  str[c1++] = c;
2887 			else
2888 			  /* This is in any case an invalid class name.  */
2889 			  str[0] = '\0';
2890                       }
2891 		    str[c1] = '\0';
2892 
2893 		    if (c == delim && *p == ']' && str[0] != '\0')
2894 		      {
2895                         unsigned int i, offset;
2896 			/* If we have no collation data we use the default
2897 			   collation in which each character is in a class
2898 			   by itself.  It also means that ASCII is the
2899 			   character set and therefore we cannot have character
2900 			   with more than one byte in the multibyte
2901 			   representation.  */
2902 
2903                         /* If not defined _LIBC, we push the name and
2904 			   `\0' for the sake of matching performance.  */
2905 			int datasize = c1 + 1;
2906 
2907 # ifdef _LIBC
2908 			int32_t idx = 0;
2909 			if (nrules == 0)
2910 # endif
2911 			  {
2912 			    if (c1 != 1)
2913 			      FREE_STACK_RETURN (REG_ECOLLATE);
2914 			  }
2915 # ifdef _LIBC
2916 			else
2917 			  {
2918 			    const int32_t *table;
2919 			    const int32_t *weights;
2920 			    const int32_t *extra;
2921 			    const int32_t *indirect;
2922 			    wint_t *cp;
2923 
2924 			    /* This #include defines a local function!  */
2925 #  include <locale/weightwc.h>
2926 
2927 			    if(delim == '=')
2928 			      {
2929 				/* We push the index for equivalence class.  */
2930 				cp = (wint_t*)str;
2931 
2932 				table = (const int32_t *)
2933 				  _NL_CURRENT (LC_COLLATE,
2934 					       _NL_COLLATE_TABLEWC);
2935 				weights = (const int32_t *)
2936 				  _NL_CURRENT (LC_COLLATE,
2937 					       _NL_COLLATE_WEIGHTWC);
2938 				extra = (const int32_t *)
2939 				  _NL_CURRENT (LC_COLLATE,
2940 					       _NL_COLLATE_EXTRAWC);
2941 				indirect = (const int32_t *)
2942 				  _NL_CURRENT (LC_COLLATE,
2943 					       _NL_COLLATE_INDIRECTWC);
2944 
2945 				idx = findidx ((const wint_t**)&cp);
2946 				if (idx == 0 || cp < (wint_t*) str + c1)
2947 				  /* This is no valid character.  */
2948 				  FREE_STACK_RETURN (REG_ECOLLATE);
2949 
2950 				str[0] = (wchar_t)idx;
2951 			      }
2952 			    else /* delim == '.' */
2953 			      {
2954 				/* We push collation sequence value
2955 				   for collating symbol.  */
2956 				int32_t table_size;
2957 				const int32_t *symb_table;
2958 				const unsigned char *extra;
2959 				int32_t idx;
2960 				int32_t elem;
2961 				int32_t second;
2962 				int32_t hash;
2963 				char char_str[c1];
2964 
2965 				/* We have to convert the name to a single-byte
2966 				   string.  This is possible since the names
2967 				   consist of ASCII characters and the internal
2968 				   representation is UCS4.  */
2969 				for (i = 0; i < c1; ++i)
2970 				  char_str[i] = str[i];
2971 
2972 				table_size =
2973 				  _NL_CURRENT_WORD (LC_COLLATE,
2974 						    _NL_COLLATE_SYMB_HASH_SIZEMB);
2975 				symb_table = (const int32_t *)
2976 				  _NL_CURRENT (LC_COLLATE,
2977 					       _NL_COLLATE_SYMB_TABLEMB);
2978 				extra = (const unsigned char *)
2979 				  _NL_CURRENT (LC_COLLATE,
2980 					       _NL_COLLATE_SYMB_EXTRAMB);
2981 
2982 				/* Locate the character in the hashing table.  */
2983 				hash = elem_hash (char_str, c1);
2984 
2985 				idx = 0;
2986 				elem = hash % table_size;
2987 				second = hash % (table_size - 2);
2988 				while (symb_table[2 * elem] != 0)
2989 				  {
2990 				    /* First compare the hashing value.  */
2991 				    if (symb_table[2 * elem] == hash
2992 					&& c1 == extra[symb_table[2 * elem + 1]]
2993 					&& memcmp (char_str,
2994 						   &extra[symb_table[2 * elem + 1]
2995 							 + 1], c1) == 0)
2996 				      {
2997 					/* Yep, this is the entry.  */
2998 					idx = symb_table[2 * elem + 1];
2999 					idx += 1 + extra[idx];
3000 					break;
3001 				      }
3002 
3003 				    /* Next entry.  */
3004 				    elem += second;
3005 				  }
3006 
3007 				if (symb_table[2 * elem] != 0)
3008 				  {
3009 				    /* Compute the index of the byte sequence
3010 				       in the table.  */
3011 				    idx += 1 + extra[idx];
3012 				    /* Adjust for the alignment.  */
3013 				    idx = (idx + 3) & ~3;
3014 
3015 				    str[0] = (wchar_t) idx + 4;
3016 				  }
3017 				else if (symb_table[2 * elem] == 0 && c1 == 1)
3018 				  {
3019 				    /* No valid character.  Match it as a
3020 				       single byte character.  */
3021 				    had_char_class = false;
3022 				    BUF_PUSH(str[0]);
3023 				    /* Update the length of characters  */
3024 				    laststart[5]++;
3025 				    range_start = str[0];
3026 
3027 				    /* Throw away the ] at the end of the
3028 				       collating symbol.  */
3029 				    PATFETCH (c);
3030 				    /* exit from the switch block.  */
3031 				    continue;
3032 				  }
3033 				else
3034 				  FREE_STACK_RETURN (REG_ECOLLATE);
3035 			      }
3036 			    datasize = 1;
3037 			  }
3038 # endif
3039                         /* Throw away the ] at the end of the equivalence
3040                            class (or collating symbol).  */
3041                         PATFETCH (c);
3042 
3043 			/* Allocate the space for the equivalence class
3044 			   (or collating symbol) (and '\0' if needed).  */
3045                         GET_BUFFER_SPACE(datasize);
3046 			/* Update the pointer to indicate end of buffer.  */
3047                         b += datasize;
3048 
3049 			if (delim == '=')
3050 			  { /* equivalence class  */
3051 			    /* Calculate the offset of char_ranges,
3052 			       which is next to equivalence_classes.  */
3053 			    offset = laststart[1] + laststart[2]
3054 			      + laststart[3] +6;
3055 			    /* Insert space.  */
3056 			    insert_space(datasize, laststart + offset, b - 1);
3057 
3058 			    /* Write the equivalence_class and \0.  */
3059 			    for (i = 0 ; i < datasize ; i++)
3060 			      laststart[offset + i] = str[i];
3061 
3062 			    /* Update the length of equivalence_classes.  */
3063 			    laststart[3] += datasize;
3064 			    had_char_class = true;
3065 			  }
3066 			else /* delim == '.' */
3067 			  { /* collating symbol  */
3068 			    /* Calculate the offset of the equivalence_classes,
3069 			       which is next to collating_symbols.  */
3070 			    offset = laststart[1] + laststart[2] + 6;
3071 			    /* Insert space and write the collationg_symbol
3072 			       and \0.  */
3073 			    insert_space(datasize, laststart + offset, b-1);
3074 			    for (i = 0 ; i < datasize ; i++)
3075 			      laststart[offset + i] = str[i];
3076 
3077 			    /* In re_match_2_internal if range_start < -1, we
3078 			       assume -range_start is the offset of the
3079 			       collating symbol which is specified as
3080 			       the character of the range start.  So we assign
3081 			       -(laststart[1] + laststart[2] + 6) to
3082 			       range_start.  */
3083 			    range_start = -(laststart[1] + laststart[2] + 6);
3084 			    /* Update the length of collating_symbol.  */
3085 			    laststart[2] += datasize;
3086 			    had_char_class = false;
3087 			  }
3088 		      }
3089                     else
3090                       {
3091                         c1++;
3092                         while (c1--)
3093                           PATUNFETCH;
3094                         BUF_PUSH ('[');
3095                         BUF_PUSH (delim);
3096                         laststart[5] += 2; /* Update the length of characters  */
3097 			range_start = delim;
3098                         had_char_class = false;
3099                       }
3100 		  }
3101                 else
3102                   {
3103                     had_char_class = false;
3104 		    BUF_PUSH(c);
3105 		    laststart[5]++;  /* Update the length of characters  */
3106 		    range_start = c;
3107                   }
3108 	      }
3109 
3110 #else /* BYTE */
3111             /* Ensure that we have enough space to push a charset: the
3112                opcode, the length count, and the bitset; 34 bytes in all.  */
3113 	    GET_BUFFER_SPACE (34);
3114 
3115             laststart = b;
3116 
3117             /* We test `*p == '^' twice, instead of using an if
3118                statement, so we only need one BUF_PUSH.  */
3119             BUF_PUSH (*p == '^' ? charset_not : charset);
3120             if (*p == '^')
3121               p++;
3122 
3123             /* Remember the first position in the bracket expression.  */
3124             p1 = p;
3125 
3126             /* Push the number of bytes in the bitmap.  */
3127             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3128 
3129             /* Clear the whole map.  */
3130             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3131 
3132             /* charset_not matches newline according to a syntax bit.  */
3133             if ((re_opcode_t) b[-2] == charset_not
3134                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3135               SET_LIST_BIT ('\n');
3136 
3137             /* Read in characters and ranges, setting map bits.  */
3138             for (;;)
3139               {
3140                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3141 
3142                 PATFETCH (c);
3143 
3144                 /* \ might escape characters inside [...] and [^...].  */
3145                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3146                   {
3147                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3148 
3149                     PATFETCH (c1);
3150                     SET_LIST_BIT (c1);
3151 		    range_start = c1;
3152                     continue;
3153                   }
3154 
3155                 /* Could be the end of the bracket expression.  If it's
3156                    not (i.e., when the bracket expression is `[]' so
3157                    far), the ']' character bit gets set way below.  */
3158                 if (c == ']' && p != p1 + 1)
3159                   break;
3160 
3161                 /* Look ahead to see if it's a range when the last thing
3162                    was a character class.  */
3163                 if (had_char_class && c == '-' && *p != ']')
3164                   FREE_STACK_RETURN (REG_ERANGE);
3165 
3166                 /* Look ahead to see if it's a range when the last thing
3167                    was a character: if this is a hyphen not at the
3168                    beginning or the end of a list, then it's the range
3169                    operator.  */
3170                 if (c == '-'
3171                     && !(p - 2 >= pattern && p[-2] == '[')
3172                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3173                     && *p != ']')
3174                   {
3175                     reg_errcode_t ret
3176                       = byte_compile_range (range_start, &p, pend, translate,
3177 					    syntax, b);
3178                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3179 		    range_start = 0xffffffff;
3180                   }
3181 
3182                 else if (p[0] == '-' && p[1] != ']')
3183                   { /* This handles ranges made up of characters only.  */
3184                     reg_errcode_t ret;
3185 
3186 		    /* Move past the `-'.  */
3187                     PATFETCH (c1);
3188 
3189                     ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3190                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3191 		    range_start = 0xffffffff;
3192                   }
3193 
3194                 /* See if we're at the beginning of a possible character
3195                    class.  */
3196 
3197                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3198                   { /* Leave room for the null.  */
3199                     char str[CHAR_CLASS_MAX_LENGTH + 1];
3200 
3201                     PATFETCH (c);
3202                     c1 = 0;
3203 
3204                     /* If pattern is `[[:'.  */
3205                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3206 
3207                     for (;;)
3208                       {
3209                         PATFETCH (c);
3210                         if ((c == ':' && *p == ']') || p == pend)
3211                           break;
3212 			if (c1 < CHAR_CLASS_MAX_LENGTH)
3213 			  str[c1++] = c;
3214 			else
3215 			  /* This is in any case an invalid class name.  */
3216 			  str[0] = '\0';
3217                       }
3218                     str[c1] = '\0';
3219 
3220                     /* If isn't a word bracketed by `[:' and `:]':
3221                        undo the ending character, the letters, and leave
3222                        the leading `:' and `[' (but set bits for them).  */
3223                     if (c == ':' && *p == ']')
3224                       {
3225 # if defined _LIBC || WIDE_CHAR_SUPPORT
3226                         boolean is_lower = STREQ (str, "lower");
3227                         boolean is_upper = STREQ (str, "upper");
3228 			wctype_t wt;
3229                         int ch;
3230 
3231 			wt = IS_CHAR_CLASS (str);
3232 			if (wt == 0)
3233 			  FREE_STACK_RETURN (REG_ECTYPE);
3234 
3235                         /* Throw away the ] at the end of the character
3236                            class.  */
3237                         PATFETCH (c);
3238 
3239                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3240 
3241                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3242 			  {
3243 			    if (iswctype (btowc (ch), wt))
3244 			      SET_LIST_BIT (ch);
3245 
3246 			    if (translate && (is_upper || is_lower)
3247 				&& (ISUPPER (ch) || ISLOWER (ch)))
3248 			      SET_LIST_BIT (ch);
3249 			  }
3250 
3251                         had_char_class = true;
3252 # else
3253                         int ch;
3254                         boolean is_alnum = STREQ (str, "alnum");
3255                         boolean is_alpha = STREQ (str, "alpha");
3256                         boolean is_blank = STREQ (str, "blank");
3257                         boolean is_cntrl = STREQ (str, "cntrl");
3258                         boolean is_digit = STREQ (str, "digit");
3259                         boolean is_graph = STREQ (str, "graph");
3260                         boolean is_lower = STREQ (str, "lower");
3261                         boolean is_print = STREQ (str, "print");
3262                         boolean is_punct = STREQ (str, "punct");
3263                         boolean is_space = STREQ (str, "space");
3264                         boolean is_upper = STREQ (str, "upper");
3265                         boolean is_xdigit = STREQ (str, "xdigit");
3266 
3267                         if (!IS_CHAR_CLASS (str))
3268 			  FREE_STACK_RETURN (REG_ECTYPE);
3269 
3270                         /* Throw away the ] at the end of the character
3271                            class.  */
3272                         PATFETCH (c);
3273 
3274                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3275 
3276                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3277                           {
3278 			    /* This was split into 3 if's to
3279 			       avoid an arbitrary limit in some compiler.  */
3280                             if (   (is_alnum  && ISALNUM (ch))
3281                                 || (is_alpha  && ISALPHA (ch))
3282                                 || (is_blank  && ISBLANK (ch))
3283                                 || (is_cntrl  && ISCNTRL (ch)))
3284 			      SET_LIST_BIT (ch);
3285 			    if (   (is_digit  && ISDIGIT (ch))
3286                                 || (is_graph  && ISGRAPH (ch))
3287                                 || (is_lower  && ISLOWER (ch))
3288                                 || (is_print  && ISPRINT (ch)))
3289 			      SET_LIST_BIT (ch);
3290 			    if (   (is_punct  && ISPUNCT (ch))
3291                                 || (is_space  && ISSPACE (ch))
3292                                 || (is_upper  && ISUPPER (ch))
3293                                 || (is_xdigit && ISXDIGIT (ch)))
3294 			      SET_LIST_BIT (ch);
3295 			    if (   translate && (is_upper || is_lower)
3296 				&& (ISUPPER (ch) || ISLOWER (ch)))
3297 			      SET_LIST_BIT (ch);
3298                           }
3299                         had_char_class = true;
3300 # endif	/* libc || wctype.h */
3301                       }
3302                     else
3303                       {
3304                         c1++;
3305                         while (c1--)
3306                           PATUNFETCH;
3307                         SET_LIST_BIT ('[');
3308                         SET_LIST_BIT (':');
3309 			range_start = ':';
3310                         had_char_class = false;
3311                       }
3312                   }
3313                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3314 		  {
3315 		    unsigned char str[MB_LEN_MAX + 1];
3316 # ifdef _LIBC
3317 		    uint32_t nrules =
3318 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3319 # endif
3320 
3321 		    PATFETCH (c);
3322 		    c1 = 0;
3323 
3324 		    /* If pattern is `[[='.  */
3325 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3326 
3327 		    for (;;)
3328 		      {
3329 			PATFETCH (c);
3330 			if ((c == '=' && *p == ']') || p == pend)
3331 			  break;
3332 			if (c1 < MB_LEN_MAX)
3333 			  str[c1++] = c;
3334 			else
3335 			  /* This is in any case an invalid class name.  */
3336 			  str[0] = '\0';
3337                       }
3338 		    str[c1] = '\0';
3339 
3340 		    if (c == '=' && *p == ']' && str[0] != '\0')
3341 		      {
3342 			/* If we have no collation data we use the default
3343 			   collation in which each character is in a class
3344 			   by itself.  It also means that ASCII is the
3345 			   character set and therefore we cannot have character
3346 			   with more than one byte in the multibyte
3347 			   representation.  */
3348 # ifdef _LIBC
3349 			if (nrules == 0)
3350 # endif
3351 			  {
3352 			    if (c1 != 1)
3353 			      FREE_STACK_RETURN (REG_ECOLLATE);
3354 
3355 			    /* Throw away the ] at the end of the equivalence
3356 			       class.  */
3357 			    PATFETCH (c);
3358 
3359 			    /* Set the bit for the character.  */
3360 			    SET_LIST_BIT (str[0]);
3361 			  }
3362 # ifdef _LIBC
3363 			else
3364 			  {
3365 			    /* Try to match the byte sequence in `str' against
3366 			       those known to the collate implementation.
3367 			       First find out whether the bytes in `str' are
3368 			       actually from exactly one character.  */
3369 			    const int32_t *table;
3370 			    const unsigned char *weights;
3371 			    const unsigned char *extra;
3372 			    const int32_t *indirect;
3373 			    int32_t idx;
3374 			    const unsigned char *cp = str;
3375 			    int ch;
3376 
3377 			    /* This #include defines a local function!  */
3378 #  include <locale/weight.h>
3379 
3380 			    table = (const int32_t *)
3381 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3382 			    weights = (const unsigned char *)
3383 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3384 			    extra = (const unsigned char *)
3385 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3386 			    indirect = (const int32_t *)
3387 			      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3388 
3389 			    idx = findidx (&cp);
3390 			    if (idx == 0 || cp < str + c1)
3391 			      /* This is no valid character.  */
3392 			      FREE_STACK_RETURN (REG_ECOLLATE);
3393 
3394 			    /* Throw away the ] at the end of the equivalence
3395 			       class.  */
3396 			    PATFETCH (c);
3397 
3398 			    /* Now we have to go throught the whole table
3399 			       and find all characters which have the same
3400 			       first level weight.
3401 
3402 			       XXX Note that this is not entirely correct.
3403 			       we would have to match multibyte sequences
3404 			       but this is not possible with the current
3405 			       implementation.  */
3406 			    for (ch = 1; ch < 256; ++ch)
3407 			      /* XXX This test would have to be changed if we
3408 				 would allow matching multibyte sequences.  */
3409 			      if (table[ch] > 0)
3410 				{
3411 				  int32_t idx2 = table[ch];
3412 				  size_t len = weights[idx2];
3413 
3414 				  /* Test whether the lenghts match.  */
3415 				  if (weights[idx] == len)
3416 				    {
3417 				      /* They do.  New compare the bytes of
3418 					 the weight.  */
3419 				      size_t cnt = 0;
3420 
3421 				      while (cnt < len
3422 					     && (weights[idx + 1 + cnt]
3423 						 == weights[idx2 + 1 + cnt]))
3424 					++cnt;
3425 
3426 				      if (cnt == len)
3427 					/* They match.  Mark the character as
3428 					   acceptable.  */
3429 					SET_LIST_BIT (ch);
3430 				    }
3431 				}
3432 			  }
3433 # endif
3434 			had_char_class = true;
3435 		      }
3436                     else
3437                       {
3438                         c1++;
3439                         while (c1--)
3440                           PATUNFETCH;
3441                         SET_LIST_BIT ('[');
3442                         SET_LIST_BIT ('=');
3443 			range_start = '=';
3444                         had_char_class = false;
3445                       }
3446 		  }
3447                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3448 		  {
3449 		    unsigned char str[128];	/* Should be large enough.  */
3450 # ifdef _LIBC
3451 		    uint32_t nrules =
3452 		      _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3453 # endif
3454 
3455 		    PATFETCH (c);
3456 		    c1 = 0;
3457 
3458 		    /* If pattern is `[[.'.  */
3459 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3460 
3461 		    for (;;)
3462 		      {
3463 			PATFETCH (c);
3464 			if ((c == '.' && *p == ']') || p == pend)
3465 			  break;
3466 			if (c1 < sizeof (str))
3467 			  str[c1++] = c;
3468 			else
3469 			  /* This is in any case an invalid class name.  */
3470 			  str[0] = '\0';
3471                       }
3472 		    str[c1] = '\0';
3473 
3474 		    if (c == '.' && *p == ']' && str[0] != '\0')
3475 		      {
3476 			/* If we have no collation data we use the default
3477 			   collation in which each character is the name
3478 			   for its own class which contains only the one
3479 			   character.  It also means that ASCII is the
3480 			   character set and therefore we cannot have character
3481 			   with more than one byte in the multibyte
3482 			   representation.  */
3483 # ifdef _LIBC
3484 			if (nrules == 0)
3485 # endif
3486 			  {
3487 			    if (c1 != 1)
3488 			      FREE_STACK_RETURN (REG_ECOLLATE);
3489 
3490 			    /* Throw away the ] at the end of the equivalence
3491 			       class.  */
3492 			    PATFETCH (c);
3493 
3494 			    /* Set the bit for the character.  */
3495 			    SET_LIST_BIT (str[0]);
3496 			    range_start = ((const unsigned char *) str)[0];
3497 			  }
3498 # ifdef _LIBC
3499 			else
3500 			  {
3501 			    /* Try to match the byte sequence in `str' against
3502 			       those known to the collate implementation.
3503 			       First find out whether the bytes in `str' are
3504 			       actually from exactly one character.  */
3505 			    int32_t table_size;
3506 			    const int32_t *symb_table;
3507 			    const unsigned char *extra;
3508 			    int32_t idx;
3509 			    int32_t elem;
3510 			    int32_t second;
3511 			    int32_t hash;
3512 
3513 			    table_size =
3514 			      _NL_CURRENT_WORD (LC_COLLATE,
3515 						_NL_COLLATE_SYMB_HASH_SIZEMB);
3516 			    symb_table = (const int32_t *)
3517 			      _NL_CURRENT (LC_COLLATE,
3518 					   _NL_COLLATE_SYMB_TABLEMB);
3519 			    extra = (const unsigned char *)
3520 			      _NL_CURRENT (LC_COLLATE,
3521 					   _NL_COLLATE_SYMB_EXTRAMB);
3522 
3523 			    /* Locate the character in the hashing table.  */
3524 			    hash = elem_hash (str, c1);
3525 
3526 			    idx = 0;
3527 			    elem = hash % table_size;
3528 			    second = hash % (table_size - 2);
3529 			    while (symb_table[2 * elem] != 0)
3530 			      {
3531 				/* First compare the hashing value.  */
3532 				if (symb_table[2 * elem] == hash
3533 				    && c1 == extra[symb_table[2 * elem + 1]]
3534 				    && memcmp (str,
3535 					       &extra[symb_table[2 * elem + 1]
3536 						     + 1],
3537 					       c1) == 0)
3538 				  {
3539 				    /* Yep, this is the entry.  */
3540 				    idx = symb_table[2 * elem + 1];
3541 				    idx += 1 + extra[idx];
3542 				    break;
3543 				  }
3544 
3545 				/* Next entry.  */
3546 				elem += second;
3547 			      }
3548 
3549 			    if (symb_table[2 * elem] == 0)
3550 			      /* This is no valid character.  */
3551 			      FREE_STACK_RETURN (REG_ECOLLATE);
3552 
3553 			    /* Throw away the ] at the end of the equivalence
3554 			       class.  */
3555 			    PATFETCH (c);
3556 
3557 			    /* Now add the multibyte character(s) we found
3558 			       to the accept list.
3559 
3560 			       XXX Note that this is not entirely correct.
3561 			       we would have to match multibyte sequences
3562 			       but this is not possible with the current
3563 			       implementation.  Also, we have to match
3564 			       collating symbols, which expand to more than
3565 			       one file, as a whole and not allow the
3566 			       individual bytes.  */
3567 			    c1 = extra[idx++];
3568 			    if (c1 == 1)
3569 			      range_start = extra[idx];
3570 			    while (c1-- > 0)
3571 			      {
3572 				SET_LIST_BIT (extra[idx]);
3573 				++idx;
3574 			      }
3575 			  }
3576 # endif
3577 			had_char_class = false;
3578 		      }
3579                     else
3580                       {
3581                         c1++;
3582                         while (c1--)
3583                           PATUNFETCH;
3584                         SET_LIST_BIT ('[');
3585                         SET_LIST_BIT ('.');
3586 			range_start = '.';
3587                         had_char_class = false;
3588                       }
3589 		  }
3590                 else
3591                   {
3592                     had_char_class = false;
3593                     SET_LIST_BIT (c);
3594 		    range_start = c;
3595                   }
3596               }
3597 
3598             /* Discard any (non)matching list bytes that are all 0 at the
3599                end of the map.  Decrease the map-length byte too.  */
3600             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3601               b[-1]--;
3602             b += b[-1];
3603 #endif /* WCHAR */
3604           }
3605           break;
3606 
3607 
3608 	case '(':
3609           if (syntax & RE_NO_BK_PARENS)
3610             goto handle_open;
3611           else
3612             goto normal_char;
3613 
3614 
3615         case ')':
3616           if (syntax & RE_NO_BK_PARENS)
3617             goto handle_close;
3618           else
3619             goto normal_char;
3620 
3621 
3622         case '\n':
3623           if (syntax & RE_NEWLINE_ALT)
3624             goto handle_alt;
3625           else
3626             goto normal_char;
3627 
3628 
3629 	case '|':
3630           if (syntax & RE_NO_BK_VBAR)
3631             goto handle_alt;
3632           else
3633             goto normal_char;
3634 
3635 
3636         case '{':
3637            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3638              goto handle_interval;
3639            else
3640              goto normal_char;
3641 
3642 
3643         case '\\':
3644           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3645 
3646           /* Do not translate the character after the \, so that we can
3647              distinguish, e.g., \B from \b, even if we normally would
3648              translate, e.g., B to b.  */
3649           PATFETCH_RAW (c);
3650 
3651           switch (c)
3652             {
3653             case '(':
3654               if (syntax & RE_NO_BK_PARENS)
3655                 goto normal_backslash;
3656 
3657             handle_open:
3658               bufp->re_nsub++;
3659               regnum++;
3660 
3661               if (COMPILE_STACK_FULL)
3662                 {
3663                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
3664                             compile_stack_elt_t);
3665                   if (compile_stack.stack == NULL) return REG_ESPACE;
3666 
3667                   compile_stack.size <<= 1;
3668                 }
3669 
3670               /* These are the values to restore when we hit end of this
3671                  group.  They are all relative offsets, so that if the
3672                  whole pattern moves because of realloc, they will still
3673                  be valid.  */
3674               COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3675               COMPILE_STACK_TOP.fixup_alt_jump
3676                 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3677               COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3678               COMPILE_STACK_TOP.regnum = regnum;
3679 
3680               /* We will eventually replace the 0 with the number of
3681                  groups inner to this one.  But do not push a
3682                  start_memory for groups beyond the last one we can
3683                  represent in the compiled pattern.  */
3684               if (regnum <= MAX_REGNUM)
3685                 {
3686                   COMPILE_STACK_TOP.inner_group_offset = b
3687 		    - COMPILED_BUFFER_VAR + 2;
3688                   BUF_PUSH_3 (start_memory, regnum, 0);
3689                 }
3690 
3691               compile_stack.avail++;
3692 
3693               fixup_alt_jump = 0;
3694               laststart = 0;
3695               begalt = b;
3696 	      /* If we've reached MAX_REGNUM groups, then this open
3697 		 won't actually generate any code, so we'll have to
3698 		 clear pending_exact explicitly.  */
3699 	      pending_exact = 0;
3700               break;
3701 
3702 
3703             case ')':
3704               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3705 
3706               if (COMPILE_STACK_EMPTY)
3707 		{
3708 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3709 		    goto normal_backslash;
3710 		  else
3711 		    FREE_STACK_RETURN (REG_ERPAREN);
3712 		}
3713 
3714             handle_close:
3715               if (fixup_alt_jump)
3716                 { /* Push a dummy failure point at the end of the
3717                      alternative for a possible future
3718                      `pop_failure_jump' to pop.  See comments at
3719                      `push_dummy_failure' in `re_match_2'.  */
3720                   BUF_PUSH (push_dummy_failure);
3721 
3722                   /* We allocated space for this jump when we assigned
3723                      to `fixup_alt_jump', in the `handle_alt' case below.  */
3724                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3725                 }
3726 
3727               /* See similar code for backslashed left paren above.  */
3728               if (COMPILE_STACK_EMPTY)
3729 		{
3730 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3731 		    goto normal_char;
3732 		  else
3733 		    FREE_STACK_RETURN (REG_ERPAREN);
3734 		}
3735 
3736               /* Since we just checked for an empty stack above, this
3737                  ``can't happen''.  */
3738               assert (compile_stack.avail != 0);
3739               {
3740                 /* We don't just want to restore into `regnum', because
3741                    later groups should continue to be numbered higher,
3742                    as in `(ab)c(de)' -- the second group is #2.  */
3743                 regnum_t this_group_regnum;
3744 
3745                 compile_stack.avail--;
3746                 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3747                 fixup_alt_jump
3748                   = COMPILE_STACK_TOP.fixup_alt_jump
3749                     ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3750                     : 0;
3751                 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3752                 this_group_regnum = COMPILE_STACK_TOP.regnum;
3753 		/* If we've reached MAX_REGNUM groups, then this open
3754 		   won't actually generate any code, so we'll have to
3755 		   clear pending_exact explicitly.  */
3756 		pending_exact = 0;
3757 
3758                 /* We're at the end of the group, so now we know how many
3759                    groups were inside this one.  */
3760                 if (this_group_regnum <= MAX_REGNUM)
3761                   {
3762 		    UCHAR_T *inner_group_loc
3763                       = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3764 
3765                     *inner_group_loc = regnum - this_group_regnum;
3766                     BUF_PUSH_3 (stop_memory, this_group_regnum,
3767                                 regnum - this_group_regnum);
3768                   }
3769               }
3770               break;
3771 
3772 
3773             case '|':					/* `\|'.  */
3774               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3775                 goto normal_backslash;
3776             handle_alt:
3777               if (syntax & RE_LIMITED_OPS)
3778                 goto normal_char;
3779 
3780               /* Insert before the previous alternative a jump which
3781                  jumps to this alternative if the former fails.  */
3782               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3783               INSERT_JUMP (on_failure_jump, begalt,
3784 			   b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3785               pending_exact = 0;
3786               b += 1 + OFFSET_ADDRESS_SIZE;
3787 
3788               /* The alternative before this one has a jump after it
3789                  which gets executed if it gets matched.  Adjust that
3790                  jump so it will jump to this alternative's analogous
3791                  jump (put in below, which in turn will jump to the next
3792                  (if any) alternative's such jump, etc.).  The last such
3793                  jump jumps to the correct final destination.  A picture:
3794                           _____ _____
3795                           |   | |   |
3796                           |   v |   v
3797                          a | b   | c
3798 
3799                  If we are at `b', then fixup_alt_jump right now points to a
3800                  three-byte space after `a'.  We'll put in the jump, set
3801                  fixup_alt_jump to right after `b', and leave behind three
3802                  bytes which we'll fill in when we get to after `c'.  */
3803 
3804               if (fixup_alt_jump)
3805                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3806 
3807               /* Mark and leave space for a jump after this alternative,
3808                  to be filled in later either by next alternative or
3809                  when know we're at the end of a series of alternatives.  */
3810               fixup_alt_jump = b;
3811               GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3812               b += 1 + OFFSET_ADDRESS_SIZE;
3813 
3814               laststart = 0;
3815               begalt = b;
3816               break;
3817 
3818 
3819             case '{':
3820               /* If \{ is a literal.  */
3821               if (!(syntax & RE_INTERVALS)
3822                      /* If we're at `\{' and it's not the open-interval
3823                         operator.  */
3824 		  || (syntax & RE_NO_BK_BRACES))
3825                 goto normal_backslash;
3826 
3827             handle_interval:
3828               {
3829                 /* If got here, then the syntax allows intervals.  */
3830 
3831                 /* At least (most) this many matches must be made.  */
3832                 int lower_bound = -1, upper_bound = -1;
3833 
3834 		/* Place in the uncompiled pattern (i.e., just after
3835 		   the '{') to go back to if the interval is invalid.  */
3836 		const CHAR_T *beg_interval = p;
3837 
3838                 if (p == pend)
3839 		  goto invalid_interval;
3840 
3841                 GET_UNSIGNED_NUMBER (lower_bound);
3842 
3843                 if (c == ',')
3844                   {
3845                     GET_UNSIGNED_NUMBER (upper_bound);
3846 		    if (upper_bound < 0)
3847 		      upper_bound = RE_DUP_MAX;
3848                   }
3849                 else
3850                   /* Interval such as `{1}' => match exactly once. */
3851                   upper_bound = lower_bound;
3852 
3853                 if (! (0 <= lower_bound && lower_bound <= upper_bound))
3854 		  goto invalid_interval;
3855 
3856                 if (!(syntax & RE_NO_BK_BRACES))
3857                   {
3858 		    if (c != '\\' || p == pend)
3859 		      goto invalid_interval;
3860                     PATFETCH (c);
3861                   }
3862 
3863                 if (c != '}')
3864 		  goto invalid_interval;
3865 
3866                 /* If it's invalid to have no preceding re.  */
3867                 if (!laststart)
3868                   {
3869 		    if (syntax & RE_CONTEXT_INVALID_OPS
3870 			&& !(syntax & RE_INVALID_INTERVAL_ORD))
3871                       FREE_STACK_RETURN (REG_BADRPT);
3872                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3873                       laststart = b;
3874                     else
3875                       goto unfetch_interval;
3876                   }
3877 
3878                 /* We just parsed a valid interval.  */
3879 
3880                 if (RE_DUP_MAX < upper_bound)
3881 		  FREE_STACK_RETURN (REG_BADBR);
3882 
3883                 /* If the upper bound is zero, don't want to succeed at
3884                    all; jump from `laststart' to `b + 3', which will be
3885 		   the end of the buffer after we insert the jump.  */
3886 		/* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3887 		   instead of 'b + 3'.  */
3888                  if (upper_bound == 0)
3889                    {
3890                      GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3891                      INSERT_JUMP (jump, laststart, b + 1
3892 				  + OFFSET_ADDRESS_SIZE);
3893                      b += 1 + OFFSET_ADDRESS_SIZE;
3894                    }
3895 
3896                  /* Otherwise, we have a nontrivial interval.  When
3897                     we're all done, the pattern will look like:
3898                       set_number_at <jump count> <upper bound>
3899                       set_number_at <succeed_n count> <lower bound>
3900                       succeed_n <after jump addr> <succeed_n count>
3901                       <body of loop>
3902                       jump_n <succeed_n addr> <jump count>
3903                     (The upper bound and `jump_n' are omitted if
3904                     `upper_bound' is 1, though.)  */
3905                  else
3906                    { /* If the upper bound is > 1, we need to insert
3907                         more at the end of the loop.  */
3908                      unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3909 		       (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3910 
3911                      GET_BUFFER_SPACE (nbytes);
3912 
3913                      /* Initialize lower bound of the `succeed_n', even
3914                         though it will be set during matching by its
3915                         attendant `set_number_at' (inserted next),
3916                         because `re_compile_fastmap' needs to know.
3917                         Jump to the `jump_n' we might insert below.  */
3918                      INSERT_JUMP2 (succeed_n, laststart,
3919                                    b + 1 + 2 * OFFSET_ADDRESS_SIZE
3920 				   + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3921 				   , lower_bound);
3922                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3923 
3924                      /* Code to initialize the lower bound.  Insert
3925                         before the `succeed_n'.  The `5' is the last two
3926                         bytes of this `set_number_at', plus 3 bytes of
3927                         the following `succeed_n'.  */
3928 		     /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
3929 			is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
3930 			of the following `succeed_n'.  */
3931                      PREFIX(insert_op2) (set_number_at, laststart, 1
3932 				 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
3933                      b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3934 
3935                      if (upper_bound > 1)
3936                        { /* More than one repetition is allowed, so
3937                             append a backward jump to the `succeed_n'
3938                             that starts this interval.
3939 
3940                             When we've reached this during matching,
3941                             we'll have matched the interval once, so
3942                             jump back only `upper_bound - 1' times.  */
3943                          STORE_JUMP2 (jump_n, b, laststart
3944 				      + 2 * OFFSET_ADDRESS_SIZE + 1,
3945                                       upper_bound - 1);
3946                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3947 
3948                          /* The location we want to set is the second
3949                             parameter of the `jump_n'; that is `b-2' as
3950                             an absolute address.  `laststart' will be
3951                             the `set_number_at' we're about to insert;
3952                             `laststart+3' the number to set, the source
3953                             for the relative address.  But we are
3954                             inserting into the middle of the pattern --
3955                             so everything is getting moved up by 5.
3956                             Conclusion: (b - 2) - (laststart + 3) + 5,
3957                             i.e., b - laststart.
3958 
3959                             We insert this at the beginning of the loop
3960                             so that if we fail during matching, we'll
3961                             reinitialize the bounds.  */
3962                          PREFIX(insert_op2) (set_number_at, laststart,
3963 					     b - laststart,
3964 					     upper_bound - 1, b);
3965                          b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3966                        }
3967                    }
3968                 pending_exact = 0;
3969 		break;
3970 
3971 	      invalid_interval:
3972 		if (!(syntax & RE_INVALID_INTERVAL_ORD))
3973 		  FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
3974 	      unfetch_interval:
3975 		/* Match the characters as literals.  */
3976 		p = beg_interval;
3977 		c = '{';
3978 		if (syntax & RE_NO_BK_BRACES)
3979 		  goto normal_char;
3980 		else
3981 		  goto normal_backslash;
3982 	      }
3983 
3984 #ifdef emacs
3985             /* There is no way to specify the before_dot and after_dot
3986                operators.  rms says this is ok.  --karl  */
3987             case '=':
3988               BUF_PUSH (at_dot);
3989               break;
3990 
3991             case 's':
3992               laststart = b;
3993               PATFETCH (c);
3994               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3995               break;
3996 
3997             case 'S':
3998               laststart = b;
3999               PATFETCH (c);
4000               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4001               break;
4002 #endif /* emacs */
4003 
4004 
4005             case 'w':
4006 	      if (syntax & RE_NO_GNU_OPS)
4007 		goto normal_char;
4008               laststart = b;
4009               BUF_PUSH (wordchar);
4010               break;
4011 
4012 
4013             case 'W':
4014 	      if (syntax & RE_NO_GNU_OPS)
4015 		goto normal_char;
4016               laststart = b;
4017               BUF_PUSH (notwordchar);
4018               break;
4019 
4020 
4021             case '<':
4022 	      if (syntax & RE_NO_GNU_OPS)
4023 		goto normal_char;
4024               BUF_PUSH (wordbeg);
4025               break;
4026 
4027             case '>':
4028 	      if (syntax & RE_NO_GNU_OPS)
4029 		goto normal_char;
4030               BUF_PUSH (wordend);
4031               break;
4032 
4033             case 'b':
4034 	      if (syntax & RE_NO_GNU_OPS)
4035 		goto normal_char;
4036               BUF_PUSH (wordbound);
4037               break;
4038 
4039             case 'B':
4040 	      if (syntax & RE_NO_GNU_OPS)
4041 		goto normal_char;
4042               BUF_PUSH (notwordbound);
4043               break;
4044 
4045             case '`':
4046 	      if (syntax & RE_NO_GNU_OPS)
4047 		goto normal_char;
4048               BUF_PUSH (begbuf);
4049               break;
4050 
4051             case '\'':
4052 	      if (syntax & RE_NO_GNU_OPS)
4053 		goto normal_char;
4054               BUF_PUSH (endbuf);
4055               break;
4056 
4057             case '1': case '2': case '3': case '4': case '5':
4058             case '6': case '7': case '8': case '9':
4059               if (syntax & RE_NO_BK_REFS)
4060                 goto normal_char;
4061 
4062               c1 = c - '0';
4063 
4064               if (c1 > regnum)
4065                 FREE_STACK_RETURN (REG_ESUBREG);
4066 
4067               /* Can't back reference to a subexpression if inside of it.  */
4068               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4069                 goto normal_char;
4070 
4071               laststart = b;
4072               BUF_PUSH_2 (duplicate, c1);
4073               break;
4074 
4075 
4076             case '+':
4077             case '?':
4078               if (syntax & RE_BK_PLUS_QM)
4079                 goto handle_plus;
4080               else
4081                 goto normal_backslash;
4082 
4083             default:
4084             normal_backslash:
4085               /* You might think it would be useful for \ to mean
4086                  not to translate; but if we don't translate it
4087                  it will never match anything.  */
4088               c = TRANSLATE (c);
4089               goto normal_char;
4090             }
4091           break;
4092 
4093 
4094 	default:
4095         /* Expects the character in `c'.  */
4096 	normal_char:
4097 	      /* If no exactn currently being built.  */
4098           if (!pending_exact
4099 #ifdef WCHAR
4100 	      /* If last exactn handle binary(or character) and
4101 		 new exactn handle character(or binary).  */
4102 	      || is_exactn_bin != is_binary[p - 1 - pattern]
4103 #endif /* WCHAR */
4104 
4105               /* If last exactn not at current position.  */
4106               || pending_exact + *pending_exact + 1 != b
4107 
4108               /* We have only one byte following the exactn for the count.  */
4109 	      || *pending_exact == (1 << BYTEWIDTH) - 1
4110 
4111               /* If followed by a repetition operator.  */
4112               || *p == '*' || *p == '^'
4113 	      || ((syntax & RE_BK_PLUS_QM)
4114 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4115 		  : (*p == '+' || *p == '?'))
4116 	      || ((syntax & RE_INTERVALS)
4117                   && ((syntax & RE_NO_BK_BRACES)
4118 		      ? *p == '{'
4119                       : (p[0] == '\\' && p[1] == '{'))))
4120 	    {
4121 	      /* Start building a new exactn.  */
4122 
4123               laststart = b;
4124 
4125 #ifdef WCHAR
4126 	      /* Is this exactn binary data or character? */
4127 	      is_exactn_bin = is_binary[p - 1 - pattern];
4128 	      if (is_exactn_bin)
4129 		  BUF_PUSH_2 (exactn_bin, 0);
4130 	      else
4131 		  BUF_PUSH_2 (exactn, 0);
4132 #else
4133 	      BUF_PUSH_2 (exactn, 0);
4134 #endif /* WCHAR */
4135 	      pending_exact = b - 1;
4136             }
4137 
4138 	  BUF_PUSH (c);
4139           (*pending_exact)++;
4140 	  break;
4141         } /* switch (c) */
4142     } /* while p != pend */
4143 
4144 
4145   /* Through the pattern now.  */
4146 
4147   if (fixup_alt_jump)
4148     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4149 
4150   if (!COMPILE_STACK_EMPTY)
4151     FREE_STACK_RETURN (REG_EPAREN);
4152 
4153   /* If we don't want backtracking, force success
4154      the first time we reach the end of the compiled pattern.  */
4155   if (syntax & RE_NO_POSIX_BACKTRACKING)
4156     BUF_PUSH (succeed);
4157 
4158 #ifdef WCHAR
4159   free (pattern);
4160   free (mbs_offset);
4161   free (is_binary);
4162 #endif
4163   free (compile_stack.stack);
4164 
4165   /* We have succeeded; set the length of the buffer.  */
4166 #ifdef WCHAR
4167   bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4168 #else
4169   bufp->used = b - bufp->buffer;
4170 #endif
4171 
4172 #ifdef DEBUG
4173   if (debug)
4174     {
4175       DEBUG_PRINT1 ("\nCompiled pattern: \n");
4176       PREFIX(print_compiled_pattern) (bufp);
4177     }
4178 #endif /* DEBUG */
4179 
4180 #ifndef MATCH_MAY_ALLOCATE
4181   /* Initialize the failure stack to the largest possible stack.  This
4182      isn't necessary unless we're trying to avoid calling alloca in
4183      the search and match routines.  */
4184   {
4185     int num_regs = bufp->re_nsub + 1;
4186 
4187     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4188        is strictly greater than re_max_failures, the largest possible stack
4189        is 2 * re_max_failures failure points.  */
4190     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4191       {
4192 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4193 
4194 # ifdef emacs
4195 	if (! fail_stack.stack)
4196 	  fail_stack.stack
4197 	    = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4198 				    * sizeof (PREFIX(fail_stack_elt_t)));
4199 	else
4200 	  fail_stack.stack
4201 	    = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4202 				     (fail_stack.size
4203 				      * sizeof (PREFIX(fail_stack_elt_t))));
4204 # else /* not emacs */
4205 	if (! fail_stack.stack)
4206 	  fail_stack.stack
4207 	    = malloc (fail_stack.size * sizeof (PREFIX(fail_stack_elt_t)));
4208 	else
4209 	  fail_stack.stack
4210 	    = realloc (fail_stack.stack,
4211 		       fail_stack.size * sizeof (PREFIX(fail_stack_elt_t)));
4212 # endif /* not emacs */
4213       }
4214 
4215    PREFIX(regex_grow_registers) (num_regs);
4216   }
4217 #endif /* not MATCH_MAY_ALLOCATE */
4218 
4219   return REG_NOERROR;
4220 } /* regex_compile */
4221 
4222 /* Subroutines for `regex_compile'.  */
4223 
4224 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
4225 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4226 
4227 static void
PREFIX(store_op1)4228 PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
4229 {
4230   *loc = (UCHAR_T) op;
4231   STORE_NUMBER (loc + 1, arg);
4232 }
4233 
4234 
4235 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
4236 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4237 
4238 static void
PREFIX(store_op2)4239 PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
4240 {
4241   *loc = (UCHAR_T) op;
4242   STORE_NUMBER (loc + 1, arg1);
4243   STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4244 }
4245 
4246 
4247 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
4248    for OP followed by two-byte integer parameter ARG.  */
4249 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4250 
4251 static void
PREFIX(insert_op1)4252 PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
4253 {
4254   register UCHAR_T *pfrom = end;
4255   register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4256 
4257   while (pfrom != loc)
4258     *--pto = *--pfrom;
4259 
4260   PREFIX(store_op1) (op, loc, arg);
4261 }
4262 
4263 
4264 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
4265 /* ifdef WCHAR, integer parameter is 1 wchar_t.  */
4266 
4267 static void
PREFIX(insert_op2)4268 PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2,
4269 		    UCHAR_T *end)
4270 {
4271   register UCHAR_T *pfrom = end;
4272   register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4273 
4274   while (pfrom != loc)
4275     *--pto = *--pfrom;
4276 
4277   PREFIX(store_op2) (op, loc, arg1, arg2);
4278 }
4279 
4280 
4281 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
4282    after an alternative or a begin-subexpression.  We assume there is at
4283    least one character before the ^.  */
4284 
4285 static boolean
PREFIX(at_begline_loc_p)4286 PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
4287 			  reg_syntax_t syntax)
4288 {
4289   const CHAR_T *prev = p - 2;
4290   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4291 
4292   return
4293        /* After a subexpression?  */
4294        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4295        /* After an alternative?  */
4296     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4297 }
4298 
4299 
4300 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
4301    at least one character after the $, i.e., `P < PEND'.  */
4302 
4303 static boolean
PREFIX(at_endline_loc_p)4304 PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
4305 			  reg_syntax_t syntax)
4306 {
4307   const CHAR_T *next = p;
4308   boolean next_backslash = *next == '\\';
4309   const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4310 
4311   return
4312        /* Before a subexpression?  */
4313        (syntax & RE_NO_BK_PARENS ? *next == ')'
4314         : next_backslash && next_next && *next_next == ')')
4315        /* Before an alternative?  */
4316     || (syntax & RE_NO_BK_VBAR ? *next == '|'
4317         : next_backslash && next_next && *next_next == '|');
4318 }
4319 
4320 #else /* not INSIDE_RECURSION */
4321 
4322 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4323    false if it's not.  */
4324 
4325 static boolean
group_in_compile_stack(compile_stack_type compile_stack,regnum_t regnum)4326 group_in_compile_stack (compile_stack_type compile_stack,
4327 			regnum_t regnum)
4328 {
4329   int this_element;
4330 
4331   for (this_element = compile_stack.avail - 1;
4332        this_element >= 0;
4333        this_element--)
4334     if (compile_stack.stack[this_element].regnum == regnum)
4335       return true;
4336 
4337   return false;
4338 }
4339 #endif /* not INSIDE_RECURSION */
4340 
4341 #ifdef INSIDE_RECURSION
4342 
4343 #ifdef WCHAR
4344 /* This insert space, which size is "num", into the pattern at "loc".
4345    "end" must point the end of the allocated buffer.  */
4346 static void
insert_space(int num,CHAR_T * loc,CHAR_T * end)4347 insert_space (int num, CHAR_T *loc, CHAR_T *end)
4348 {
4349   register CHAR_T *pto = end;
4350   register CHAR_T *pfrom = end - num;
4351 
4352   while (pfrom >= loc)
4353     *pto-- = *pfrom--;
4354 }
4355 #endif /* WCHAR */
4356 
4357 #ifdef WCHAR
4358 static reg_errcode_t
wcs_compile_range(CHAR_T range_start_char,const CHAR_T ** p_ptr,const CHAR_T * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,CHAR_T * b,CHAR_T * char_set)4359 wcs_compile_range (CHAR_T range_start_char,
4360 		   const CHAR_T **p_ptr, const CHAR_T *pend,
4361 		   RE_TRANSLATE_TYPE translate, reg_syntax_t syntax,
4362 		   CHAR_T *b, CHAR_T *char_set)
4363 {
4364   const CHAR_T *p = *p_ptr;
4365   CHAR_T range_start, range_end;
4366   reg_errcode_t ret;
4367 # ifdef _LIBC
4368   uint32_t nrules;
4369   uint32_t start_val, end_val;
4370 # endif
4371   if (p == pend)
4372     return REG_ERANGE;
4373 
4374 # ifdef _LIBC
4375   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4376   if (nrules != 0)
4377     {
4378       const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4379 						       _NL_COLLATE_COLLSEQWC);
4380       const unsigned char *extra = (const unsigned char *)
4381 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4382 
4383       if (range_start_char < -1)
4384 	{
4385 	  /* range_start is a collating symbol.  */
4386 	  int32_t *wextra;
4387 	  /* Retreive the index and get collation sequence value.  */
4388 	  wextra = (int32_t*)(extra + char_set[-range_start_char]);
4389 	  start_val = wextra[1 + *wextra];
4390 	}
4391       else
4392 	start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4393 
4394       end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4395 
4396       /* Report an error if the range is empty and the syntax prohibits
4397 	 this.  */
4398       ret = ((syntax & RE_NO_EMPTY_RANGES)
4399 	     && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4400 
4401       /* Insert space to the end of the char_ranges.  */
4402       insert_space(2, b - char_set[5] - 2, b - 1);
4403       *(b - char_set[5] - 2) = (wchar_t)start_val;
4404       *(b - char_set[5] - 1) = (wchar_t)end_val;
4405       char_set[4]++; /* ranges_index */
4406     }
4407   else
4408 # endif
4409     {
4410       range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4411 	range_start_char;
4412       range_end = TRANSLATE (p[0]);
4413       /* Report an error if the range is empty and the syntax prohibits
4414 	 this.  */
4415       ret = ((syntax & RE_NO_EMPTY_RANGES)
4416 	     && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4417 
4418       /* Insert space to the end of the char_ranges.  */
4419       insert_space(2, b - char_set[5] - 2, b - 1);
4420       *(b - char_set[5] - 2) = range_start;
4421       *(b - char_set[5] - 1) = range_end;
4422       char_set[4]++; /* ranges_index */
4423     }
4424   /* Have to increment the pointer into the pattern string, so the
4425      caller isn't still at the ending character.  */
4426   (*p_ptr)++;
4427 
4428   return ret;
4429 }
4430 #else /* BYTE */
4431 /* Read the ending character of a range (in a bracket expression) from the
4432    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
4433    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
4434    Then we set the translation of all bits between the starting and
4435    ending characters (inclusive) in the compiled pattern B.
4436 
4437    Return an error code.
4438 
4439    We use these short variable names so we can use the same macros as
4440    `regex_compile' itself.  */
4441 
4442 static reg_errcode_t
byte_compile_range(unsigned int range_start_char,const char ** p_ptr,const char * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,unsigned char * b)4443 byte_compile_range (unsigned int range_start_char,
4444 		    const char **p_ptr, const char *pend,
4445 		    RE_TRANSLATE_TYPE translate, reg_syntax_t syntax,
4446 		    unsigned char *b)
4447 {
4448   unsigned this_char;
4449   const char *p = *p_ptr;
4450   reg_errcode_t ret;
4451 # if _LIBC
4452   const unsigned char *collseq;
4453   unsigned int start_colseq;
4454   unsigned int end_colseq;
4455 # else
4456   unsigned end_char;
4457 # endif
4458 
4459   if (p == pend)
4460     return REG_ERANGE;
4461 
4462   /* Have to increment the pointer into the pattern string, so the
4463      caller isn't still at the ending character.  */
4464   (*p_ptr)++;
4465 
4466   /* Report an error if the range is empty and the syntax prohibits this.  */
4467   ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4468 
4469 # if _LIBC
4470   collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4471 						 _NL_COLLATE_COLLSEQMB);
4472 
4473   start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4474   end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4475   for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4476     {
4477       unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4478 
4479       if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4480 	{
4481 	  SET_LIST_BIT (TRANSLATE (this_char));
4482 	  ret = REG_NOERROR;
4483 	}
4484     }
4485 # else
4486   /* Here we see why `this_char' has to be larger than an `unsigned
4487      char' -- we would otherwise go into an infinite loop, since all
4488      characters <= 0xff.  */
4489   range_start_char = TRANSLATE (range_start_char);
4490   /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4491      and some compilers cast it to int implicitly, so following for_loop
4492      may fall to (almost) infinite loop.
4493      e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4494      To avoid this, we cast p[0] to unsigned int and truncate it.  */
4495   end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4496 
4497   for (this_char = range_start_char; this_char <= end_char; ++this_char)
4498     {
4499       SET_LIST_BIT (TRANSLATE (this_char));
4500       ret = REG_NOERROR;
4501     }
4502 # endif
4503 
4504   return ret;
4505 }
4506 #endif /* WCHAR */
4507 
4508 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4509    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4510    characters can start a string that matches the pattern.  This fastmap
4511    is used by re_search to skip quickly over impossible starting points.
4512 
4513    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4514    area as BUFP->fastmap.
4515 
4516    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4517    the pattern buffer.
4518 
4519    Returns 0 if we succeed, -2 if an internal error.   */
4520 
4521 #ifdef WCHAR
4522 /* local function for re_compile_fastmap.
4523    truncate wchar_t character to char.  */
4524 
4525 static unsigned char
truncate_wchar(CHAR_T c)4526 truncate_wchar (CHAR_T c)
4527 {
4528   unsigned char buf[MB_CUR_MAX];
4529   mbstate_t state;
4530   int retval;
4531   memset (&state, '\0', sizeof (state));
4532   retval = wcrtomb (buf, c, &state);
4533   return retval > 0 ? buf[0] : (unsigned char) c;
4534 }
4535 #endif /* WCHAR */
4536 
4537 static int
PREFIX(re_compile_fastmap)4538 PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
4539 {
4540   int j, k;
4541 #ifdef MATCH_MAY_ALLOCATE
4542   PREFIX(fail_stack_type) fail_stack;
4543 #endif
4544 #ifndef REGEX_MALLOC
4545   char *destination;
4546 #endif
4547 
4548   register char *fastmap = bufp->fastmap;
4549 
4550 #ifdef WCHAR
4551   /* We need to cast pattern to (wchar_t*), because we casted this compiled
4552      pattern to (char*) in regex_compile.  */
4553   UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4554   register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4555 #else /* BYTE */
4556   UCHAR_T *pattern = bufp->buffer;
4557   register UCHAR_T *pend = pattern + bufp->used;
4558 #endif /* WCHAR */
4559   UCHAR_T *p = pattern;
4560 
4561 #ifdef REL_ALLOC
4562   /* This holds the pointer to the failure stack, when
4563      it is allocated relocatably.  */
4564   fail_stack_elt_t *failure_stack_ptr;
4565 #endif
4566 
4567   /* Assume that each path through the pattern can be null until
4568      proven otherwise.  We set this false at the bottom of switch
4569      statement, to which we get only if a particular path doesn't
4570      match the empty string.  */
4571   boolean path_can_be_null = true;
4572 
4573   /* We aren't doing a `succeed_n' to begin with.  */
4574   boolean succeed_n_p = false;
4575 
4576   assert (fastmap != NULL && p != NULL);
4577 
4578   INIT_FAIL_STACK ();
4579   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4580   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
4581   bufp->can_be_null = 0;
4582 
4583   while (1)
4584     {
4585       if (p == pend || *p == succeed)
4586 	{
4587 	  /* We have reached the (effective) end of pattern.  */
4588 	  if (!FAIL_STACK_EMPTY ())
4589 	    {
4590 	      bufp->can_be_null |= path_can_be_null;
4591 
4592 	      /* Reset for next path.  */
4593 	      path_can_be_null = true;
4594 
4595 	      p = fail_stack.stack[--fail_stack.avail].pointer;
4596 
4597 	      continue;
4598 	    }
4599 	  else
4600 	    break;
4601 	}
4602 
4603       /* We should never be about to go beyond the end of the pattern.  */
4604       assert (p < pend);
4605 
4606       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4607 	{
4608 
4609         /* I guess the idea here is to simply not bother with a fastmap
4610            if a backreference is used, since it's too hard to figure out
4611            the fastmap for the corresponding group.  Setting
4612            `can_be_null' stops `re_search_2' from using the fastmap, so
4613            that is all we do.  */
4614 	case duplicate:
4615 	  bufp->can_be_null = 1;
4616           goto done;
4617 
4618 
4619       /* Following are the cases which match a character.  These end
4620          with `break'.  */
4621 
4622 #ifdef WCHAR
4623 	case exactn:
4624           fastmap[truncate_wchar(p[1])] = 1;
4625 	  break;
4626 #else /* BYTE */
4627 	case exactn:
4628           fastmap[p[1]] = 1;
4629 	  break;
4630 #endif /* WCHAR */
4631 #ifdef MBS_SUPPORT
4632 	case exactn_bin:
4633 	  fastmap[p[1]] = 1;
4634 	  break;
4635 #endif
4636 
4637 #ifdef WCHAR
4638         /* It is hard to distinguish fastmap from (multi byte) characters
4639            which depends on current locale.  */
4640         case charset:
4641 	case charset_not:
4642 	case wordchar:
4643 	case notwordchar:
4644           bufp->can_be_null = 1;
4645           goto done;
4646 #else /* BYTE */
4647         case charset:
4648           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4649 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4650               fastmap[j] = 1;
4651 	  break;
4652 
4653 
4654 	case charset_not:
4655 	  /* Chars beyond end of map must be allowed.  */
4656 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4657             fastmap[j] = 1;
4658 
4659 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4660 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4661               fastmap[j] = 1;
4662           break;
4663 
4664 
4665 	case wordchar:
4666 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4667 	    if (SYNTAX (j) == Sword)
4668 	      fastmap[j] = 1;
4669 	  break;
4670 
4671 
4672 	case notwordchar:
4673 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4674 	    if (SYNTAX (j) != Sword)
4675 	      fastmap[j] = 1;
4676 	  break;
4677 #endif /* WCHAR */
4678 
4679         case anychar:
4680 	  {
4681 	    int fastmap_newline = fastmap['\n'];
4682 
4683 	    /* `.' matches anything ...  */
4684 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
4685 	      fastmap[j] = 1;
4686 
4687 	    /* ... except perhaps newline.  */
4688 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
4689 	      fastmap['\n'] = fastmap_newline;
4690 
4691 	    /* Return if we have already set `can_be_null'; if we have,
4692 	       then the fastmap is irrelevant.  Something's wrong here.  */
4693 	    else if (bufp->can_be_null)
4694 	      goto done;
4695 
4696 	    /* Otherwise, have to check alternative paths.  */
4697 	    break;
4698 	  }
4699 
4700 #ifdef emacs
4701         case syntaxspec:
4702 	  k = *p++;
4703 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4704 	    if (SYNTAX (j) == (enum syntaxcode) k)
4705 	      fastmap[j] = 1;
4706 	  break;
4707 
4708 
4709 	case notsyntaxspec:
4710 	  k = *p++;
4711 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
4712 	    if (SYNTAX (j) != (enum syntaxcode) k)
4713 	      fastmap[j] = 1;
4714 	  break;
4715 
4716 
4717       /* All cases after this match the empty string.  These end with
4718          `continue'.  */
4719 
4720 
4721 	case before_dot:
4722 	case at_dot:
4723 	case after_dot:
4724           continue;
4725 #endif /* emacs */
4726 
4727 
4728         case no_op:
4729         case begline:
4730         case endline:
4731 	case begbuf:
4732 	case endbuf:
4733 	case wordbound:
4734 	case notwordbound:
4735 	case wordbeg:
4736 	case wordend:
4737         case push_dummy_failure:
4738           continue;
4739 
4740 
4741 	case jump_n:
4742         case pop_failure_jump:
4743 	case maybe_pop_jump:
4744 	case jump:
4745         case jump_past_alt:
4746 	case dummy_failure_jump:
4747           EXTRACT_NUMBER_AND_INCR (j, p);
4748 	  p += j;
4749 	  if (j > 0)
4750 	    continue;
4751 
4752           /* Jump backward implies we just went through the body of a
4753              loop and matched nothing.  Opcode jumped to should be
4754              `on_failure_jump' or `succeed_n'.  Just treat it like an
4755              ordinary jump.  For a * loop, it has pushed its failure
4756              point already; if so, discard that as redundant.  */
4757           if ((re_opcode_t) *p != on_failure_jump
4758 	      && (re_opcode_t) *p != succeed_n)
4759 	    continue;
4760 
4761           p++;
4762           EXTRACT_NUMBER_AND_INCR (j, p);
4763           p += j;
4764 
4765           /* If what's on the stack is where we are now, pop it.  */
4766           if (!FAIL_STACK_EMPTY ()
4767 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4768             fail_stack.avail--;
4769 
4770           continue;
4771 
4772 
4773         case on_failure_jump:
4774         case on_failure_keep_string_jump:
4775 	handle_on_failure_jump:
4776           EXTRACT_NUMBER_AND_INCR (j, p);
4777 
4778           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4779              end of the pattern.  We don't want to push such a point,
4780              since when we restore it above, entering the switch will
4781              increment `p' past the end of the pattern.  We don't need
4782              to push such a point since we obviously won't find any more
4783              fastmap entries beyond `pend'.  Such a pattern can match
4784              the null string, though.  */
4785           if (p + j < pend)
4786             {
4787               if (!PUSH_PATTERN_OP (p + j, fail_stack))
4788 		{
4789 		  RESET_FAIL_STACK ();
4790 		  return -2;
4791 		}
4792             }
4793           else
4794             bufp->can_be_null = 1;
4795 
4796           if (succeed_n_p)
4797             {
4798               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
4799               succeed_n_p = false;
4800 	    }
4801 
4802           continue;
4803 
4804 
4805 	case succeed_n:
4806           /* Get to the number of times to succeed.  */
4807           p += OFFSET_ADDRESS_SIZE;
4808 
4809           /* Increment p past the n for when k != 0.  */
4810           EXTRACT_NUMBER_AND_INCR (k, p);
4811           if (k == 0)
4812 	    {
4813               p -= 2 * OFFSET_ADDRESS_SIZE;
4814   	      succeed_n_p = true;  /* Spaghetti code alert.  */
4815               goto handle_on_failure_jump;
4816             }
4817           continue;
4818 
4819 
4820 	case set_number_at:
4821           p += 2 * OFFSET_ADDRESS_SIZE;
4822           continue;
4823 
4824 
4825 	case start_memory:
4826         case stop_memory:
4827 	  p += 2;
4828 	  continue;
4829 
4830 
4831 	default:
4832           abort (); /* We have listed all the cases.  */
4833         } /* switch *p++ */
4834 
4835       /* Getting here means we have found the possible starting
4836          characters for one path of the pattern -- and that the empty
4837          string does not match.  We need not follow this path further.
4838          Instead, look at the next alternative (remembered on the
4839          stack), or quit if no more.  The test at the top of the loop
4840          does these things.  */
4841       path_can_be_null = false;
4842       p = pend;
4843     } /* while p */
4844 
4845   /* Set `can_be_null' for the last path (also the first path, if the
4846      pattern is empty).  */
4847   bufp->can_be_null |= path_can_be_null;
4848 
4849  done:
4850   RESET_FAIL_STACK ();
4851   return 0;
4852 }
4853 
4854 #else /* not INSIDE_RECURSION */
4855 
4856 int
re_compile_fastmap(struct re_pattern_buffer * bufp)4857 re_compile_fastmap (struct re_pattern_buffer *bufp)
4858 {
4859 # ifdef MBS_SUPPORT
4860   if (MB_CUR_MAX != 1)
4861     return wcs_re_compile_fastmap(bufp);
4862   else
4863 # endif
4864     return byte_re_compile_fastmap(bufp);
4865 } /* re_compile_fastmap */
4866 #ifdef _LIBC
weak_alias(__re_compile_fastmap,re_compile_fastmap)4867 weak_alias (__re_compile_fastmap, re_compile_fastmap)
4868 #endif
4869 
4870 
4871 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4872    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4873    this memory for recording register information.  STARTS and ENDS
4874    must be allocated using the malloc library routine, and must each
4875    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4876 
4877    If NUM_REGS == 0, then subsequent matches should allocate their own
4878    register data.
4879 
4880    Unless this function is called, the first search or match using
4881    PATTERN_BUFFER will allocate its own register data, without
4882    freeing the old data.  */
4883 
4884 void
4885 re_set_registers (struct re_pattern_buffer *bufp,
4886 		  struct re_registers *regs,
4887 		  unsigned int num_regs,
4888 		  regoff_t *starts, regoff_t *ends)
4889 {
4890   if (num_regs)
4891     {
4892       bufp->regs_allocated = REGS_REALLOCATE;
4893       regs->num_regs = num_regs;
4894       regs->start = starts;
4895       regs->end = ends;
4896     }
4897   else
4898     {
4899       bufp->regs_allocated = REGS_UNALLOCATED;
4900       regs->num_regs = 0;
4901       regs->start = regs->end = (regoff_t *) 0;
4902     }
4903 }
4904 #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)4905 weak_alias (__re_set_registers, re_set_registers)
4906 #endif
4907 
4908 /* Searching routines.  */
4909 
4910 /* Like re_search_2, below, but only one string is specified, and
4911    doesn't let you say where to stop matching.  */
4912 
4913 int
4914 re_search (struct re_pattern_buffer *bufp,
4915 	   const char *string,
4916 	   int size, int startpos, int range,
4917 	   struct re_registers *regs)
4918 {
4919   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4920 		      regs, size);
4921 }
4922 #ifdef _LIBC
weak_alias(__re_search,re_search)4923 weak_alias (__re_search, re_search)
4924 #endif
4925 
4926 
4927 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4928    virtual concatenation of STRING1 and STRING2, starting first at index
4929    STARTPOS, then at STARTPOS + 1, and so on.
4930 
4931    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4932 
4933    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4934    only at STARTPOS; in general, the last start tried is STARTPOS +
4935    RANGE.
4936 
4937    In REGS, return the indices of the virtual concatenation of STRING1
4938    and STRING2 that matched the entire BUFP->buffer and its contained
4939    subexpressions.
4940 
4941    Do not consider matching one past the index STOP in the virtual
4942    concatenation of STRING1 and STRING2.
4943 
4944    We return either the position in the strings at which the match was
4945    found, -1 if no match, or -2 if error (such as failure
4946    stack overflow).  */
4947 
4948 int
4949 re_search_2 (struct re_pattern_buffer *bufp,
4950 	     const char *string1, int size1,
4951 	     const char *string2, int size2,
4952 	     int startpos, int range,
4953 	     struct re_registers *regs,
4954 	     int stop)
4955 {
4956 # ifdef MBS_SUPPORT
4957   if (MB_CUR_MAX != 1)
4958     return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4959 			    range, regs, stop);
4960   else
4961 # endif
4962     return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
4963 			     range, regs, stop);
4964 } /* re_search_2 */
4965 #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)4966 weak_alias (__re_search_2, re_search_2)
4967 #endif
4968 
4969 #endif /* not INSIDE_RECURSION */
4970 
4971 #ifdef INSIDE_RECURSION
4972 
4973 #ifdef MATCH_MAY_ALLOCATE
4974 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4975 #else
4976 # define FREE_VAR(var) if (var) free (var); var = NULL
4977 #endif
4978 
4979 #ifdef WCHAR
4980 # define MAX_ALLOCA_SIZE	2000
4981 
4982 # define FREE_WCS_BUFFERS() \
4983   do {									      \
4984     if (size1 > MAX_ALLOCA_SIZE)					      \
4985       {									      \
4986 	free (wcs_string1);						      \
4987 	free (mbs_offset1);						      \
4988       }									      \
4989     else								      \
4990       {									      \
4991 	FREE_VAR (wcs_string1);						      \
4992 	FREE_VAR (mbs_offset1);						      \
4993       }									      \
4994     if (size2 > MAX_ALLOCA_SIZE) 					      \
4995       {									      \
4996 	free (wcs_string2);						      \
4997 	free (mbs_offset2);						      \
4998       }									      \
4999     else								      \
5000       {									      \
5001 	FREE_VAR (wcs_string2);						      \
5002 	FREE_VAR (mbs_offset2);						      \
5003       }									      \
5004   } while (0)
5005 
5006 #endif
5007 
5008 
5009 static int
5010 PREFIX(re_search_2) (struct re_pattern_buffer *bufp,
5011 		     const char *string1, int size1,
5012 		     const char *string2, int size2,
5013 		     int startpos, int range,
5014 		     struct re_registers *regs,
5015 		     int stop)
5016 {
5017   int val;
5018   register char *fastmap = bufp->fastmap;
5019   register RE_TRANSLATE_TYPE translate = bufp->translate;
5020   int total_size = size1 + size2;
5021   int endpos = startpos + range;
5022 #ifdef WCHAR
5023   /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
5024   wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5025   /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
5026   int wcs_size1 = 0, wcs_size2 = 0;
5027   /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
5028   int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5029   /* They hold whether each wchar_t is binary data or not.  */
5030   char *is_binary = NULL;
5031 #endif /* WCHAR */
5032 
5033   /* Check for out-of-range STARTPOS.  */
5034   if (startpos < 0 || startpos > total_size)
5035     return -1;
5036 
5037   /* Fix up RANGE if it might eventually take us outside
5038      the virtual concatenation of STRING1 and STRING2.
5039      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
5040   if (endpos < 0)
5041     range = 0 - startpos;
5042   else if (endpos > total_size)
5043     range = total_size - startpos;
5044 
5045   /* If the search isn't to be a backwards one, don't waste time in a
5046      search for a pattern that must be anchored.  */
5047   if (bufp->used > 0 && range > 0
5048       && ((re_opcode_t) bufp->buffer[0] == begbuf
5049 	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
5050 	  || ((re_opcode_t) bufp->buffer[0] == begline
5051 	      && !bufp->newline_anchor)))
5052     {
5053       if (startpos > 0)
5054 	return -1;
5055       else
5056 	range = 1;
5057     }
5058 
5059 #ifdef emacs
5060   /* In a forward search for something that starts with \=.
5061      don't keep searching past point.  */
5062   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5063     {
5064       range = PT - startpos;
5065       if (range <= 0)
5066 	return -1;
5067     }
5068 #endif /* emacs */
5069 
5070   /* Update the fastmap now if not correct already.  */
5071   if (fastmap && !bufp->fastmap_accurate)
5072     if (re_compile_fastmap (bufp) == -2)
5073       return -2;
5074 
5075 #ifdef WCHAR
5076   /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5077      fill them with converted string.  */
5078   if (size1 != 0)
5079     {
5080       if (size1 > MAX_ALLOCA_SIZE)
5081 	{
5082 	  wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5083 	  mbs_offset1 = TALLOC (size1 + 1, int);
5084 	  is_binary = TALLOC (size1 + 1, char);
5085 	}
5086       else
5087 	{
5088 	  wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5089 	  mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5090 	  is_binary = REGEX_TALLOC (size1 + 1, char);
5091 	}
5092       if (!wcs_string1 || !mbs_offset1 || !is_binary)
5093 	{
5094 	  if (size1 > MAX_ALLOCA_SIZE)
5095 	    {
5096 	      free (wcs_string1);
5097 	      free (mbs_offset1);
5098 	      free (is_binary);
5099 	    }
5100 	  else
5101 	    {
5102 	      FREE_VAR (wcs_string1);
5103 	      FREE_VAR (mbs_offset1);
5104 	      FREE_VAR (is_binary);
5105 	    }
5106 	  return -2;
5107 	}
5108       wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5109 				     mbs_offset1, is_binary);
5110       wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
5111       if (size1 > MAX_ALLOCA_SIZE)
5112 	free (is_binary);
5113       else
5114 	FREE_VAR (is_binary);
5115     }
5116   if (size2 != 0)
5117     {
5118       if (size2 > MAX_ALLOCA_SIZE)
5119 	{
5120 	  wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5121 	  mbs_offset2 = TALLOC (size2 + 1, int);
5122 	  is_binary = TALLOC (size2 + 1, char);
5123 	}
5124       else
5125 	{
5126 	  wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5127 	  mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5128 	  is_binary = REGEX_TALLOC (size2 + 1, char);
5129 	}
5130       if (!wcs_string2 || !mbs_offset2 || !is_binary)
5131 	{
5132 	  FREE_WCS_BUFFERS ();
5133 	  if (size2 > MAX_ALLOCA_SIZE)
5134 	    free (is_binary);
5135 	  else
5136 	    FREE_VAR (is_binary);
5137 	  return -2;
5138 	}
5139       wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5140 				     mbs_offset2, is_binary);
5141       wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
5142       if (size2 > MAX_ALLOCA_SIZE)
5143 	free (is_binary);
5144       else
5145 	FREE_VAR (is_binary);
5146     }
5147 #endif /* WCHAR */
5148 
5149 
5150   /* Loop through the string, looking for a place to start matching.  */
5151   for (;;)
5152     {
5153       /* If a fastmap is supplied, skip quickly over characters that
5154          cannot be the start of a match.  If the pattern can match the
5155          null string, however, we don't need to skip characters; we want
5156          the first null string.  */
5157       if (fastmap && startpos < total_size && !bufp->can_be_null)
5158 	{
5159 	  if (range > 0)	/* Searching forwards.  */
5160 	    {
5161 	      register const char *d;
5162 	      register int lim = 0;
5163 	      int irange = range;
5164 
5165               if (startpos < size1 && startpos + range >= size1)
5166                 lim = range - (size1 - startpos);
5167 
5168 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5169 
5170               /* Written out as an if-else to avoid testing `translate'
5171                  inside the loop.  */
5172 	      if (translate)
5173                 while (range > lim
5174                        && !fastmap[(unsigned char)
5175 				   translate[(unsigned char) *d++]])
5176                   range--;
5177 	      else
5178                 while (range > lim && !fastmap[(unsigned char) *d++])
5179                   range--;
5180 
5181 	      startpos += irange - range;
5182 	    }
5183 	  else				/* Searching backwards.  */
5184 	    {
5185 	      register CHAR_T c = (size1 == 0 || startpos >= size1
5186 				      ? string2[startpos - size1]
5187 				      : string1[startpos]);
5188 
5189 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
5190 		goto advance;
5191 	    }
5192 	}
5193 
5194       /* If can't match the null string, and that's all we have left, fail.  */
5195       if (range >= 0 && startpos == total_size && fastmap
5196           && !bufp->can_be_null)
5197        {
5198 #ifdef WCHAR
5199          FREE_WCS_BUFFERS ();
5200 #endif
5201          return -1;
5202        }
5203 
5204 #ifdef WCHAR
5205       val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5206 				     size2, startpos, regs, stop,
5207 				     wcs_string1, wcs_size1,
5208 				     wcs_string2, wcs_size2,
5209 				     mbs_offset1, mbs_offset2);
5210 #else /* BYTE */
5211       val = byte_re_match_2_internal (bufp, string1, size1, string2,
5212 				      size2, startpos, regs, stop);
5213 #endif /* BYTE */
5214 
5215 #ifndef REGEX_MALLOC
5216 # ifdef C_ALLOCA
5217       alloca (0);
5218 # endif
5219 #endif
5220 
5221       if (val >= 0)
5222 	{
5223 #ifdef WCHAR
5224 	  FREE_WCS_BUFFERS ();
5225 #endif
5226 	  return startpos;
5227 	}
5228 
5229       if (val == -2)
5230 	{
5231 #ifdef WCHAR
5232 	  FREE_WCS_BUFFERS ();
5233 #endif
5234 	  return -2;
5235 	}
5236 
5237     advance:
5238       if (!range)
5239         break;
5240       else if (range > 0)
5241         {
5242           range--;
5243           startpos++;
5244         }
5245       else
5246         {
5247           range++;
5248           startpos--;
5249         }
5250     }
5251 #ifdef WCHAR
5252   FREE_WCS_BUFFERS ();
5253 #endif
5254   return -1;
5255 }
5256 
5257 #ifdef WCHAR
5258 /* This converts PTR, a pointer into one of the search wchar_t strings
5259    `string1' and `string2' into an multibyte string offset from the
5260    beginning of that string. We use mbs_offset to optimize.
5261    See convert_mbs_to_wcs.  */
5262 # define POINTER_TO_OFFSET(ptr)						\
5263   (FIRST_STRING_P (ptr)							\
5264    ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0))	\
5265    : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0)	\
5266 		 + csize1)))
5267 #else /* BYTE */
5268 /* This converts PTR, a pointer into one of the search strings `string1'
5269    and `string2' into an offset from the beginning of that string.  */
5270 # define POINTER_TO_OFFSET(ptr)			\
5271   (FIRST_STRING_P (ptr)				\
5272    ? ((regoff_t) ((ptr) - string1))		\
5273    : ((regoff_t) ((ptr) - string2 + size1)))
5274 #endif /* WCHAR */
5275 
5276 /* Macros for dealing with the split strings in re_match_2.  */
5277 
5278 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
5279 
5280 /* Call before fetching a character with *d.  This switches over to
5281    string2 if necessary.  */
5282 #define PREFETCH()							\
5283   while (d == dend)						    	\
5284     {									\
5285       /* End of string2 => fail.  */					\
5286       if (dend == end_match_2) 						\
5287         goto fail;							\
5288       /* End of string1 => advance to string2.  */ 			\
5289       d = string2;						        \
5290       dend = end_match_2;						\
5291     }
5292 
5293 /* Test if at very beginning or at very end of the virtual concatenation
5294    of `string1' and `string2'.  If only one string, it's `string2'.  */
5295 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5296 #define AT_STRINGS_END(d) ((d) == end2)
5297 
5298 
5299 /* Test if D points to a character which is word-constituent.  We have
5300    two special cases to check for: if past the end of string1, look at
5301    the first character in string2; and if before the beginning of
5302    string2, look at the last character in string1.  */
5303 #ifdef WCHAR
5304 /* Use internationalized API instead of SYNTAX.  */
5305 # define WORDCHAR_P(d)							\
5306   (iswalnum ((wint_t)((d) == end1 ? *string2				\
5307            : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0		\
5308    || ((d) == end1 ? *string2						\
5309        : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5310 #else /* BYTE */
5311 # define WORDCHAR_P(d)							\
5312   (SYNTAX ((d) == end1 ? *string2					\
5313            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
5314    == Sword)
5315 #endif /* WCHAR */
5316 
5317 /* Disabled due to a compiler bug -- see comment at case wordbound */
5318 #if 0
5319 /* Test if the character before D and the one at D differ with respect
5320    to being word-constituent.  */
5321 #define AT_WORD_BOUNDARY(d)						\
5322   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
5323    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5324 #endif
5325 
5326 /* Free everything we malloc.  */
5327 #ifdef MATCH_MAY_ALLOCATE
5328 # ifdef WCHAR
5329 #  define FREE_VARIABLES()						\
5330   do {									\
5331     REGEX_FREE_STACK (fail_stack.stack);				\
5332     FREE_VAR (regstart);						\
5333     FREE_VAR (regend);							\
5334     FREE_VAR (old_regstart);						\
5335     FREE_VAR (old_regend);						\
5336     FREE_VAR (best_regstart);						\
5337     FREE_VAR (best_regend);						\
5338     FREE_VAR (reg_info);						\
5339     FREE_VAR (reg_dummy);						\
5340     FREE_VAR (reg_info_dummy);						\
5341     if (!cant_free_wcs_buf)						\
5342       {									\
5343         FREE_VAR (string1);						\
5344         FREE_VAR (string2);						\
5345         FREE_VAR (mbs_offset1);						\
5346         FREE_VAR (mbs_offset2);						\
5347       }									\
5348   } while (0)
5349 # else /* BYTE */
5350 #  define FREE_VARIABLES()						\
5351   do {									\
5352     REGEX_FREE_STACK (fail_stack.stack);				\
5353     FREE_VAR (regstart);						\
5354     FREE_VAR (regend);							\
5355     FREE_VAR (old_regstart);						\
5356     FREE_VAR (old_regend);						\
5357     FREE_VAR (best_regstart);						\
5358     FREE_VAR (best_regend);						\
5359     FREE_VAR (reg_info);						\
5360     FREE_VAR (reg_dummy);						\
5361     FREE_VAR (reg_info_dummy);						\
5362   } while (0)
5363 # endif /* WCHAR */
5364 #else
5365 # ifdef WCHAR
5366 #  define FREE_VARIABLES()						\
5367   do {									\
5368     if (!cant_free_wcs_buf)						\
5369       {									\
5370         FREE_VAR (string1);						\
5371         FREE_VAR (string2);						\
5372         FREE_VAR (mbs_offset1);						\
5373         FREE_VAR (mbs_offset2);						\
5374       }									\
5375   } while (0)
5376 # else /* BYTE */
5377 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
5378 # endif /* WCHAR */
5379 #endif /* not MATCH_MAY_ALLOCATE */
5380 
5381 /* These values must meet several constraints.  They must not be valid
5382    register values; since we have a limit of 255 registers (because
5383    we use only one byte in the pattern for the register number), we can
5384    use numbers larger than 255.  They must differ by 1, because of
5385    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
5386    be larger than the value for the highest register, so we do not try
5387    to actually save any registers when none are active.  */
5388 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5389 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5390 
5391 #else /* not INSIDE_RECURSION */
5392 /* Matching routines.  */
5393 
5394 #ifndef emacs   /* Emacs never uses this.  */
5395 /* re_match is like re_match_2 except it takes only a single string.  */
5396 
5397 int
5398 re_match (struct re_pattern_buffer *bufp,
5399 	  const char *string,
5400 	  int size, int pos,
5401 	  struct re_registers *regs)
5402 {
5403   int result;
5404 # ifdef MBS_SUPPORT
5405   if (MB_CUR_MAX != 1)
5406     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5407 				      pos, regs, size,
5408 				      NULL, 0, NULL, 0, NULL, NULL);
5409   else
5410 # endif
5411     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5412 				  pos, regs, size);
5413 # ifndef REGEX_MALLOC
5414 #  ifdef C_ALLOCA
5415   alloca (0);
5416 #  endif
5417 # endif
5418   return result;
5419 }
5420 # ifdef _LIBC
5421 weak_alias (__re_match, re_match)
5422 # endif
5423 #endif /* not emacs */
5424 
5425 #endif /* not INSIDE_RECURSION */
5426 
5427 #ifdef INSIDE_RECURSION
5428 static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
5429 						  UCHAR_T *end,
5430 					PREFIX(register_info_type) *reg_info);
5431 static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
5432 						UCHAR_T *end,
5433 					PREFIX(register_info_type) *reg_info);
5434 static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
5435 						      UCHAR_T *end,
5436 					PREFIX(register_info_type) *reg_info);
5437 static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
5438 				   int len, char *translate);
5439 #else /* not INSIDE_RECURSION */
5440 
5441 /* re_match_2 matches the compiled pattern in BUFP against the
5442    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5443    and SIZE2, respectively).  We start matching at POS, and stop
5444    matching at STOP.
5445 
5446    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5447    store offsets for the substring each group matched in REGS.  See the
5448    documentation for exactly how many groups we fill.
5449 
5450    We return -1 if no match, -2 if an internal error (such as the
5451    failure stack overflowing).  Otherwise, we return the length of the
5452    matched substring.  */
5453 
5454 int
re_match_2(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int pos,struct re_registers * regs,int stop)5455 re_match_2 (struct re_pattern_buffer *bufp,
5456 	    const char *string1, int size1,
5457 	    const char *string2, int size2,
5458 	    int pos, struct re_registers *regs,
5459 	    int stop)
5460 {
5461   int result;
5462 # ifdef MBS_SUPPORT
5463   if (MB_CUR_MAX != 1)
5464     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5465 				      pos, regs, stop,
5466 				      NULL, 0, NULL, 0, NULL, NULL);
5467   else
5468 # endif
5469     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5470 				  pos, regs, stop);
5471 
5472 #ifndef REGEX_MALLOC
5473 # ifdef C_ALLOCA
5474   alloca (0);
5475 # endif
5476 #endif
5477   return result;
5478 }
5479 #ifdef _LIBC
weak_alias(__re_match_2,re_match_2)5480 weak_alias (__re_match_2, re_match_2)
5481 #endif
5482 
5483 #endif /* not INSIDE_RECURSION */
5484 
5485 #ifdef INSIDE_RECURSION
5486 
5487 #ifdef WCHAR
5488 
5489 /* This check the substring (from 0, to length) of the multibyte string,
5490    to which offset_buffer correspond. And count how many wchar_t_characters
5491    the substring occupy. We use offset_buffer to optimization.
5492    See convert_mbs_to_wcs.  */
5493 
5494 static int
5495 count_mbs_length (int *offset_buffer, int length)
5496 {
5497   int upper, lower;
5498 
5499   /* Check whether the size is valid.  */
5500   if (length < 0)
5501     return -1;
5502 
5503   if (offset_buffer == NULL)
5504     return 0;
5505 
5506   /* If there are no multibyte character, offset_buffer[i] == i.
5507    Optmize for this case.  */
5508   if (offset_buffer[length] == length)
5509     return length;
5510 
5511   /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
5512   upper = length;
5513   lower = 0;
5514 
5515   while (true)
5516     {
5517       int middle = (lower + upper) / 2;
5518       if (middle == lower || middle == upper)
5519 	break;
5520       if (offset_buffer[middle] > length)
5521 	upper = middle;
5522       else if (offset_buffer[middle] < length)
5523 	lower = middle;
5524       else
5525 	return middle;
5526     }
5527 
5528   return -1;
5529 }
5530 #endif /* WCHAR */
5531 
5532 /* This is a separate function so that we can force an alloca cleanup
5533    afterwards.  */
5534 #ifdef WCHAR
5535 static int
wcs_re_match_2_internal(struct re_pattern_buffer * bufp,const char * cstring1,int csize1,const char * cstring2,int csize2,int pos,struct re_registers * regs,int stop,wchar_t * string1,int size1,wchar_t * string2,int size2,int * mbs_offset1,int * mbs_offset2)5536 wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
5537 			 const char *cstring1, int csize1,
5538 			 const char *cstring2, int csize2,
5539 			 int pos,
5540 			 struct re_registers *regs,
5541 			 int stop,
5542 			 /* string1 == string2 == NULL means
5543 			    string1/2, size1/2 and mbs_offset1/2 need
5544 			    setting up in this function.  */
5545 			 /* We need wchar_t * buffers corresponding to
5546 			    cstring1, cstring2.  */
5547 			 wchar_t *string1, int size1,
5548 			 wchar_t *string2, int size2,
5549 			 /* Offset buffer for optimization.  See
5550 			    convert_mbs_to_wc.  */
5551 			 int *mbs_offset1,
5552 			 int *mbs_offset2)
5553 #else /* BYTE */
5554 static int
5555 byte_re_match_2_internal (struct re_pattern_buffer *bufp,
5556 			  const char *string1, int size1,
5557 			  const char *string2, int size2,
5558 			  int pos,
5559 			  struct re_registers *regs,
5560 			  int stop)
5561 #endif /* BYTE */
5562 {
5563   /* General temporaries.  */
5564   int mcnt;
5565   UCHAR_T *p1;
5566 #ifdef WCHAR
5567   /* They hold whether each wchar_t is binary data or not.  */
5568   char *is_binary = NULL;
5569   /* If true, we can't free string1/2, mbs_offset1/2.  */
5570   int cant_free_wcs_buf = 1;
5571 #endif /* WCHAR */
5572 
5573   /* Just past the end of the corresponding string.  */
5574   const CHAR_T *end1, *end2;
5575 
5576   /* Pointers into string1 and string2, just past the last characters in
5577      each to consider matching.  */
5578   const CHAR_T *end_match_1, *end_match_2;
5579 
5580   /* Where we are in the data, and the end of the current string.  */
5581   const CHAR_T *d, *dend;
5582 
5583   /* Where we are in the pattern, and the end of the pattern.  */
5584 #ifdef WCHAR
5585   UCHAR_T *pattern, *p;
5586   register UCHAR_T *pend;
5587 #else /* BYTE */
5588   UCHAR_T *p = bufp->buffer;
5589   register UCHAR_T *pend = p + bufp->used;
5590 #endif /* WCHAR */
5591 
5592   /* Mark the opcode just after a start_memory, so we can test for an
5593      empty subpattern when we get to the stop_memory.  */
5594   UCHAR_T *just_past_start_mem = 0;
5595 
5596   /* We use this to map every character in the string.  */
5597   RE_TRANSLATE_TYPE translate = bufp->translate;
5598 
5599   /* Failure point stack.  Each place that can handle a failure further
5600      down the line pushes a failure point on this stack.  It consists of
5601      restart, regend, and reg_info for all registers corresponding to
5602      the subexpressions we're currently inside, plus the number of such
5603      registers, and, finally, two char *'s.  The first char * is where
5604      to resume scanning the pattern; the second one is where to resume
5605      scanning the strings.  If the latter is zero, the failure point is
5606      a ``dummy''; if a failure happens and the failure point is a dummy,
5607      it gets discarded and the next next one is tried.  */
5608 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5609   PREFIX(fail_stack_type) fail_stack;
5610 #endif
5611 #ifdef DEBUG
5612   static unsigned failure_id;
5613   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5614 #endif
5615 
5616 #ifdef REL_ALLOC
5617   /* This holds the pointer to the failure stack, when
5618      it is allocated relocatably.  */
5619   fail_stack_elt_t *failure_stack_ptr;
5620 #endif
5621 
5622   /* We fill all the registers internally, independent of what we
5623      return, for use in backreferences.  The number here includes
5624      an element for register zero.  */
5625   size_t num_regs = bufp->re_nsub + 1;
5626 
5627   /* The currently active registers.  */
5628   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5629   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5630 
5631   /* Information on the contents of registers. These are pointers into
5632      the input strings; they record just what was matched (on this
5633      attempt) by a subexpression part of the pattern, that is, the
5634      regnum-th regstart pointer points to where in the pattern we began
5635      matching and the regnum-th regend points to right after where we
5636      stopped matching the regnum-th subexpression.  (The zeroth register
5637      keeps track of what the whole pattern matches.)  */
5638 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5639   const CHAR_T **regstart, **regend;
5640 #endif
5641 
5642   /* If a group that's operated upon by a repetition operator fails to
5643      match anything, then the register for its start will need to be
5644      restored because it will have been set to wherever in the string we
5645      are when we last see its open-group operator.  Similarly for a
5646      register's end.  */
5647 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5648   const CHAR_T **old_regstart, **old_regend;
5649 #endif
5650 
5651   /* The is_active field of reg_info helps us keep track of which (possibly
5652      nested) subexpressions we are currently in. The matched_something
5653      field of reg_info[reg_num] helps us tell whether or not we have
5654      matched any of the pattern so far this time through the reg_num-th
5655      subexpression.  These two fields get reset each time through any
5656      loop their register is in.  */
5657 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
5658   PREFIX(register_info_type) *reg_info;
5659 #endif
5660 
5661   /* The following record the register info as found in the above
5662      variables when we find a match better than any we've seen before.
5663      This happens as we backtrack through the failure points, which in
5664      turn happens only if we have not yet matched the entire string. */
5665   unsigned best_regs_set = false;
5666 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5667   const CHAR_T **best_regstart, **best_regend;
5668 #endif
5669 
5670   /* Logically, this is `best_regend[0]'.  But we don't want to have to
5671      allocate space for that if we're not allocating space for anything
5672      else (see below).  Also, we never need info about register 0 for
5673      any of the other register vectors, and it seems rather a kludge to
5674      treat `best_regend' differently than the rest.  So we keep track of
5675      the end of the best match so far in a separate variable.  We
5676      initialize this to NULL so that when we backtrack the first time
5677      and need to test it, it's not garbage.  */
5678   const CHAR_T *match_end = NULL;
5679 
5680   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
5681   int set_regs_matched_done = 0;
5682 
5683   /* Used when we pop values we don't care about.  */
5684 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5685   const CHAR_T **reg_dummy;
5686   PREFIX(register_info_type) *reg_info_dummy;
5687 #endif
5688 
5689 #ifdef DEBUG
5690   /* Counts the total number of registers pushed.  */
5691   unsigned num_regs_pushed = 0;
5692 #endif
5693 
5694   /* Definitions for state transitions.  More efficiently for gcc.  */
5695 #ifdef __GNUC__
5696 # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
5697 #  define NEXT \
5698       do								      \
5699 	{								      \
5700 	  int offset;							      \
5701 	  const void *__unbounded ptr;					      \
5702 	  offset = (p == pend						      \
5703 		    ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);   \
5704 	  ptr = &&end_of_pattern + offset;				      \
5705 	  goto *ptr;							      \
5706 	}								      \
5707       while (0)
5708 #  define REF(x) \
5709   &&label_##x - &&end_of_pattern
5710 #  define JUMP_TABLE_TYPE const int
5711 # else
5712 #  define NEXT \
5713       do								      \
5714 	{								      \
5715 	  const void *__unbounded ptr;					      \
5716 	  ptr = (p == pend ? &&end_of_pattern				      \
5717 		 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]);	      \
5718 	  goto *ptr;							      \
5719 	}								      \
5720       while (0)
5721 #  define REF(x) \
5722   &&label_##x
5723 #  define JUMP_TABLE_TYPE const void *const
5724 # endif
5725 # define CASE(x) label_##x
5726   static JUMP_TABLE_TYPE jmptable[] =
5727     {
5728     REF (no_op),
5729     REF (succeed),
5730     REF (exactn),
5731 # ifdef MBS_SUPPORT
5732     REF (exactn_bin),
5733 # endif
5734     REF (anychar),
5735     REF (charset),
5736     REF (charset_not),
5737     REF (start_memory),
5738     REF (stop_memory),
5739     REF (duplicate),
5740     REF (begline),
5741     REF (endline),
5742     REF (begbuf),
5743     REF (endbuf),
5744     REF (jump),
5745     REF (jump_past_alt),
5746     REF (on_failure_jump),
5747     REF (on_failure_keep_string_jump),
5748     REF (pop_failure_jump),
5749     REF (maybe_pop_jump),
5750     REF (dummy_failure_jump),
5751     REF (push_dummy_failure),
5752     REF (succeed_n),
5753     REF (jump_n),
5754     REF (set_number_at),
5755     REF (wordchar),
5756     REF (notwordchar),
5757     REF (wordbeg),
5758     REF (wordend),
5759     REF (wordbound),
5760     REF (notwordbound)
5761 # ifdef emacs
5762     ,REF (before_dot),
5763     REF (at_dot),
5764     REF (after_dot),
5765     REF (syntaxspec),
5766     REF (notsyntaxspec)
5767 # endif
5768     };
5769 #else
5770 # define NEXT \
5771   break
5772 # define CASE(x) \
5773   case x
5774 #endif
5775 
5776   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5777 
5778   INIT_FAIL_STACK ();
5779 
5780 #ifdef MATCH_MAY_ALLOCATE
5781   /* Do not bother to initialize all the register variables if there are
5782      no groups in the pattern, as it takes a fair amount of time.  If
5783      there are groups, we include space for register 0 (the whole
5784      pattern), even though we never use it, since it simplifies the
5785      array indexing.  We should fix this.  */
5786   if (bufp->re_nsub)
5787     {
5788       regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5789       regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5790       old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5791       old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5792       best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5793       best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5794       reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5795       reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5796       reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5797 
5798       if (!(regstart && regend && old_regstart && old_regend && reg_info
5799             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5800         {
5801           FREE_VARIABLES ();
5802           return -2;
5803         }
5804     }
5805   else
5806     {
5807       /* We must initialize all our variables to NULL, so that
5808          `FREE_VARIABLES' doesn't try to free them.  */
5809       regstart = regend = old_regstart = old_regend = best_regstart
5810         = best_regend = reg_dummy = NULL;
5811       reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5812     }
5813 #endif /* MATCH_MAY_ALLOCATE */
5814 
5815   /* The starting position is bogus.  */
5816 #ifdef WCHAR
5817   if (pos < 0 || pos > csize1 + csize2)
5818 #else /* BYTE */
5819   if (pos < 0 || pos > size1 + size2)
5820 #endif
5821     {
5822       FREE_VARIABLES ();
5823       return -1;
5824     }
5825 
5826 #ifdef WCHAR
5827   /* Allocate wchar_t array for string1 and string2 and
5828      fill them with converted string.  */
5829   if (string1 == NULL && string2 == NULL)
5830     {
5831       /* We need seting up buffers here.  */
5832 
5833       /* We must free wcs buffers in this function.  */
5834       cant_free_wcs_buf = 0;
5835 
5836       if (csize1 != 0)
5837 	{
5838 	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5839 	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5840 	  is_binary = REGEX_TALLOC (csize1 + 1, char);
5841 	  if (!string1 || !mbs_offset1 || !is_binary)
5842 	    {
5843 	      FREE_VAR (string1);
5844 	      FREE_VAR (mbs_offset1);
5845 	      FREE_VAR (is_binary);
5846 	      return -2;
5847 	    }
5848 	}
5849       if (csize2 != 0)
5850 	{
5851 	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5852 	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5853 	  is_binary = REGEX_TALLOC (csize2 + 1, char);
5854 	  if (!string2 || !mbs_offset2 || !is_binary)
5855 	    {
5856 	      FREE_VAR (string1);
5857 	      FREE_VAR (mbs_offset1);
5858 	      FREE_VAR (string2);
5859 	      FREE_VAR (mbs_offset2);
5860 	      FREE_VAR (is_binary);
5861 	      return -2;
5862 	    }
5863 	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5864 				     mbs_offset2, is_binary);
5865 	  string2[size2] = L'\0'; /* for a sentinel  */
5866 	  FREE_VAR (is_binary);
5867 	}
5868     }
5869 
5870   /* We need to cast pattern to (wchar_t*), because we casted this compiled
5871      pattern to (char*) in regex_compile.  */
5872   p = pattern = (CHAR_T*)bufp->buffer;
5873   pend = (CHAR_T*)(bufp->buffer + bufp->used);
5874 
5875 #endif /* WCHAR */
5876 
5877   /* Initialize subexpression text positions to -1 to mark ones that no
5878      start_memory/stop_memory has been seen for. Also initialize the
5879      register information struct.  */
5880   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5881     {
5882       regstart[mcnt] = regend[mcnt]
5883         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
5884 
5885       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
5886       IS_ACTIVE (reg_info[mcnt]) = 0;
5887       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5888       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
5889     }
5890 
5891   /* We move `string1' into `string2' if the latter's empty -- but not if
5892      `string1' is null.  */
5893   if (size2 == 0 && string1 != NULL)
5894     {
5895       string2 = string1;
5896       size2 = size1;
5897       string1 = 0;
5898       size1 = 0;
5899 #ifdef WCHAR
5900       mbs_offset2 = mbs_offset1;
5901       csize2 = csize1;
5902       mbs_offset1 = NULL;
5903       csize1 = 0;
5904 #endif
5905     }
5906   end1 = string1 + size1;
5907   end2 = string2 + size2;
5908 
5909   /* Compute where to stop matching, within the two strings.  */
5910 #ifdef WCHAR
5911   if (stop <= csize1)
5912     {
5913       mcnt = count_mbs_length(mbs_offset1, stop);
5914       end_match_1 = string1 + mcnt;
5915       end_match_2 = string2;
5916     }
5917   else
5918     {
5919       if (stop > csize1 + csize2)
5920 	stop = csize1 + csize2;
5921       end_match_1 = end1;
5922       mcnt = count_mbs_length(mbs_offset2, stop-csize1);
5923       end_match_2 = string2 + mcnt;
5924     }
5925   if (mcnt < 0)
5926     { /* count_mbs_length return error.  */
5927       FREE_VARIABLES ();
5928       return -1;
5929     }
5930 #else
5931   if (stop <= size1)
5932     {
5933       end_match_1 = string1 + stop;
5934       end_match_2 = string2;
5935     }
5936   else
5937     {
5938       end_match_1 = end1;
5939       end_match_2 = string2 + stop - size1;
5940     }
5941 #endif /* WCHAR */
5942 
5943   /* `p' scans through the pattern as `d' scans through the data.
5944      `dend' is the end of the input string that `d' points within.  `d'
5945      is advanced into the following input string whenever necessary, but
5946      this happens before fetching; therefore, at the beginning of the
5947      loop, `d' can be pointing at the end of a string, but it cannot
5948      equal `string2'.  */
5949 #ifdef WCHAR
5950   if (size1 > 0 && pos <= csize1)
5951     {
5952       mcnt = count_mbs_length(mbs_offset1, pos);
5953       d = string1 + mcnt;
5954       dend = end_match_1;
5955     }
5956   else
5957     {
5958       mcnt = count_mbs_length(mbs_offset2, pos-csize1);
5959       d = string2 + mcnt;
5960       dend = end_match_2;
5961     }
5962 
5963   if (mcnt < 0)
5964     { /* count_mbs_length return error.  */
5965       FREE_VARIABLES ();
5966       return -1;
5967     }
5968 #else
5969   if (size1 > 0 && pos <= size1)
5970     {
5971       d = string1 + pos;
5972       dend = end_match_1;
5973     }
5974   else
5975     {
5976       d = string2 + pos - size1;
5977       dend = end_match_2;
5978     }
5979 #endif /* WCHAR */
5980 
5981   DEBUG_PRINT1 ("The compiled pattern is:\n");
5982   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5983   DEBUG_PRINT1 ("The string to match is: `");
5984   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5985   DEBUG_PRINT1 ("'\n");
5986 
5987   /* This loops over pattern commands.  It exits by returning from the
5988      function if the match is complete, or it drops through if the match
5989      fails at this starting point in the input data.  */
5990   for (;;)
5991     {
5992 #ifdef _LIBC
5993       DEBUG_PRINT2 ("\n%p: ", p);
5994 #else
5995       DEBUG_PRINT2 ("\n0x%x: ", p);
5996 #endif
5997 
5998 #ifdef __GNUC__
5999       NEXT;
6000 #else
6001       if (p == pend)
6002 #endif
6003 	{
6004 #ifdef __GNUC__
6005 	end_of_pattern:
6006 #endif
6007 	  /* End of pattern means we might have succeeded.  */
6008 	  DEBUG_PRINT1 ("end of pattern ... ");
6009 
6010 	  /* If we haven't matched the entire string, and we want the
6011 	     longest match, try backtracking.  */
6012 	  if (d != end_match_2)
6013 	    {
6014 	      /* 1 if this match ends in the same string (string1 or string2)
6015 		 as the best previous match.  */
6016 	      boolean same_str_p = (FIRST_STRING_P (match_end)
6017 				    == MATCHING_IN_FIRST_STRING);
6018 	      /* 1 if this match is the best seen so far.  */
6019 	      boolean best_match_p;
6020 
6021 	      /* AIX compiler got confused when this was combined
6022 		 with the previous declaration.  */
6023 	      if (same_str_p)
6024 		best_match_p = d > match_end;
6025 	      else
6026 		best_match_p = !MATCHING_IN_FIRST_STRING;
6027 
6028 	      DEBUG_PRINT1 ("backtracking.\n");
6029 
6030 	      if (!FAIL_STACK_EMPTY ())
6031 		{ /* More failure points to try.  */
6032 
6033 		  /* If exceeds best match so far, save it.  */
6034 		  if (!best_regs_set || best_match_p)
6035 		    {
6036 		      best_regs_set = true;
6037 		      match_end = d;
6038 
6039 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
6040 
6041 		      for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6042 			{
6043 			  best_regstart[mcnt] = regstart[mcnt];
6044 			  best_regend[mcnt] = regend[mcnt];
6045 			}
6046 		    }
6047 		  goto fail;
6048 		}
6049 
6050 	      /* If no failure points, don't restore garbage.  And if
6051 		 last match is real best match, don't restore second
6052 		 best one. */
6053 	      else if (best_regs_set && !best_match_p)
6054 		{
6055 		restore_best_regs:
6056 		  /* Restore best match.  It may happen that `dend ==
6057 		     end_match_1' while the restored d is in string2.
6058 		     For example, the pattern `x.*y.*z' against the
6059 		     strings `x-' and `y-z-', if the two strings are
6060 		     not consecutive in memory.  */
6061 		  DEBUG_PRINT1 ("Restoring best registers.\n");
6062 
6063 		  d = match_end;
6064 		  dend = ((d >= string1 && d <= end1)
6065 			  ? end_match_1 : end_match_2);
6066 
6067 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6068 		    {
6069 		      regstart[mcnt] = best_regstart[mcnt];
6070 		      regend[mcnt] = best_regend[mcnt];
6071 		    }
6072 		}
6073 	    } /* d != end_match_2 */
6074 
6075 	succeed_label:
6076 	  DEBUG_PRINT1 ("Accepting match.\n");
6077 	  /* If caller wants register contents data back, do it.  */
6078 	  if (regs && !bufp->no_sub)
6079 	    {
6080 	      /* Have the register data arrays been allocated?  */
6081 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
6082 		{ /* No.  So allocate them with malloc.  We need one
6083 		     extra element beyond `num_regs' for the `-1' marker
6084 		     GNU code uses.  */
6085 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
6086 		  regs->start = TALLOC (regs->num_regs, regoff_t);
6087 		  regs->end = TALLOC (regs->num_regs, regoff_t);
6088 		  if (regs->start == NULL || regs->end == NULL)
6089 		    {
6090 		      FREE_VARIABLES ();
6091 		      return -2;
6092 		    }
6093 		  bufp->regs_allocated = REGS_REALLOCATE;
6094 		}
6095 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
6096 		{ /* Yes.  If we need more elements than were already
6097 		     allocated, reallocate them.  If we need fewer, just
6098 		     leave it alone.  */
6099 		  if (regs->num_regs < num_regs + 1)
6100 		    {
6101 		      regs->num_regs = num_regs + 1;
6102 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
6103 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
6104 		      if (regs->start == NULL || regs->end == NULL)
6105 			{
6106 			  FREE_VARIABLES ();
6107 			  return -2;
6108 			}
6109 		    }
6110 		}
6111 	      else
6112 		{
6113 		  /* These braces fend off a "empty body in an else-statement"
6114 		     warning under GCC when assert expands to nothing.  */
6115 		  assert (bufp->regs_allocated == REGS_FIXED);
6116 		}
6117 
6118 	      /* Convert the pointer data in `regstart' and `regend' to
6119 		 indices.  Register zero has to be set differently,
6120 		 since we haven't kept track of any info for it.  */
6121 	      if (regs->num_regs > 0)
6122 		{
6123 		  regs->start[0] = pos;
6124 #ifdef WCHAR
6125 		  if (MATCHING_IN_FIRST_STRING)
6126 		    regs->end[0] = (mbs_offset1 != NULL ?
6127 				    mbs_offset1[d-string1] : 0);
6128 		  else
6129 		    regs->end[0] = csize1 + (mbs_offset2 != NULL
6130 					     ? mbs_offset2[d-string2] : 0);
6131 #else
6132 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
6133 				  ? ((regoff_t) (d - string1))
6134 				  : ((regoff_t) (d - string2 + size1)));
6135 #endif /* WCHAR */
6136 		}
6137 
6138 	      /* Go through the first `min (num_regs, regs->num_regs)'
6139 		 registers, since that is all we initialized.  */
6140 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6141 		   mcnt++)
6142 		{
6143 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6144 		    regs->start[mcnt] = regs->end[mcnt] = -1;
6145 		  else
6146 		    {
6147 		      regs->start[mcnt]
6148 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6149 		      regs->end[mcnt]
6150 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6151 		    }
6152 		}
6153 
6154 	      /* If the regs structure we return has more elements than
6155 		 were in the pattern, set the extra elements to -1.  If
6156 		 we (re)allocated the registers, this is the case,
6157 		 because we always allocate enough to have at least one
6158 		 -1 at the end.  */
6159 	      for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6160 		regs->start[mcnt] = regs->end[mcnt] = -1;
6161 	    } /* regs && !bufp->no_sub */
6162 
6163 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6164 			nfailure_points_pushed, nfailure_points_popped,
6165 			nfailure_points_pushed - nfailure_points_popped);
6166 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6167 
6168 #ifdef WCHAR
6169 	  if (MATCHING_IN_FIRST_STRING)
6170 	    mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6171 	  else
6172 	    mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6173 	      csize1;
6174 	  mcnt -= pos;
6175 #else
6176 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6177 			    ? string1 : string2 - size1);
6178 #endif /* WCHAR */
6179 
6180 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6181 
6182 	  FREE_VARIABLES ();
6183 	  return mcnt;
6184 	}
6185 
6186 #ifndef __GNUC__
6187       /* Otherwise match next pattern command.  */
6188       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6189 	{
6190 #endif
6191         /* Ignore these.  Used to ignore the n of succeed_n's which
6192            currently have n == 0.  */
6193         CASE (no_op):
6194           DEBUG_PRINT1 ("EXECUTING no_op.\n");
6195           NEXT;
6196 
6197 	CASE (succeed):
6198           DEBUG_PRINT1 ("EXECUTING succeed.\n");
6199 	  goto succeed_label;
6200 
6201         /* Match the next n pattern characters exactly.  The following
6202            byte in the pattern defines n, and the n bytes after that
6203            are the characters to match.  */
6204 	CASE (exactn):
6205 #ifdef MBS_SUPPORT
6206 	CASE (exactn_bin):
6207 #endif
6208 	  mcnt = *p++;
6209           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6210 
6211           /* This is written out as an if-else so we don't waste time
6212              testing `translate' inside the loop.  */
6213           if (translate)
6214 	    {
6215 	      do
6216 		{
6217 		  PREFETCH ();
6218 #ifdef WCHAR
6219 		  if (*d <= 0xff)
6220 		    {
6221 		      if ((UCHAR_T) translate[(unsigned char) *d++]
6222 			  != (UCHAR_T) *p++)
6223 			goto fail;
6224 		    }
6225 		  else
6226 		    {
6227 		      if (*d++ != (CHAR_T) *p++)
6228 			goto fail;
6229 		    }
6230 #else
6231 		  if ((UCHAR_T) translate[(unsigned char) *d++]
6232 		      != (UCHAR_T) *p++)
6233                     goto fail;
6234 #endif /* WCHAR */
6235 		}
6236 	      while (--mcnt);
6237 	    }
6238 	  else
6239 	    {
6240 	      do
6241 		{
6242 		  PREFETCH ();
6243 		  if (*d++ != (CHAR_T) *p++) goto fail;
6244 		}
6245 	      while (--mcnt);
6246 	    }
6247 	  SET_REGS_MATCHED ();
6248           NEXT;
6249 
6250 
6251         /* Match any character except possibly a newline or a null.  */
6252 	CASE (anychar):
6253           DEBUG_PRINT1 ("EXECUTING anychar.\n");
6254 
6255           PREFETCH ();
6256 
6257           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6258               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6259 	    goto fail;
6260 
6261           SET_REGS_MATCHED ();
6262           DEBUG_PRINT2 ("  Matched `%ld'.\n", (long int) *d);
6263           d++;
6264 	  NEXT;
6265 
6266 
6267 	CASE (charset):
6268 	CASE (charset_not):
6269 	  {
6270 	    register UCHAR_T c;
6271 #ifdef WCHAR
6272 	    unsigned int i, char_class_length, coll_symbol_length,
6273               equiv_class_length, ranges_length, chars_length, length;
6274 	    CHAR_T *workp, *workp2, *charset_top;
6275 #define WORK_BUFFER_SIZE 128
6276             CHAR_T str_buf[WORK_BUFFER_SIZE];
6277 # ifdef _LIBC
6278 	    uint32_t nrules;
6279 # endif /* _LIBC */
6280 #endif /* WCHAR */
6281 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
6282 
6283             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
6284 	    PREFETCH ();
6285 	    c = TRANSLATE (*d); /* The character to match.  */
6286 #ifdef WCHAR
6287 # ifdef _LIBC
6288 	    nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6289 # endif /* _LIBC */
6290 	    charset_top = p - 1;
6291 	    char_class_length = *p++;
6292 	    coll_symbol_length = *p++;
6293 	    equiv_class_length = *p++;
6294 	    ranges_length = *p++;
6295 	    chars_length = *p++;
6296 	    /* p points charset[6], so the address of the next instruction
6297 	       (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6298 	       where l=length of char_classes, m=length of collating_symbol,
6299 	       n=equivalence_class, o=length of char_range,
6300 	       p'=length of character.  */
6301 	    workp = p;
6302 	    /* Update p to indicate the next instruction.  */
6303 	    p += char_class_length + coll_symbol_length+ equiv_class_length +
6304               2*ranges_length + chars_length;
6305 
6306             /* match with char_class?  */
6307 	    for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6308 	      {
6309 		wctype_t wctype;
6310 		uintptr_t alignedp = ((uintptr_t)workp
6311 				      + __alignof__(wctype_t) - 1)
6312 		  		      & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6313 		wctype = *((wctype_t*)alignedp);
6314 		workp += CHAR_CLASS_SIZE;
6315 		if (iswctype((wint_t)c, wctype))
6316 		  goto char_set_matched;
6317 	      }
6318 
6319             /* match with collating_symbol?  */
6320 # ifdef _LIBC
6321 	    if (nrules != 0)
6322 	      {
6323 		const unsigned char *extra = (const unsigned char *)
6324 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6325 
6326 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6327 		     workp++)
6328 		  {
6329 		    int32_t *wextra;
6330 		    wextra = (int32_t*)(extra + *workp++);
6331 		    for (i = 0; i < *wextra; ++i)
6332 		      if (TRANSLATE(d[i]) != wextra[1 + i])
6333 			break;
6334 
6335 		    if (i == *wextra)
6336 		      {
6337 			/* Update d, however d will be incremented at
6338 			   char_set_matched:, we decrement d here.  */
6339 			d += i - 1;
6340 			goto char_set_matched;
6341 		      }
6342 		  }
6343 	      }
6344 	    else /* (nrules == 0) */
6345 # endif
6346 	      /* If we can't look up collation data, we use wcscoll
6347 		 instead.  */
6348 	      {
6349 		for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6350 		  {
6351 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6352 		    length = wcslen (workp);
6353 
6354 		    /* If wcscoll(the collating symbol, whole string) > 0,
6355 		       any substring of the string never match with the
6356 		       collating symbol.  */
6357 		    if (wcscoll (workp, d) > 0)
6358 		      {
6359 			workp += length + 1;
6360 			continue;
6361 		      }
6362 
6363 		    /* First, we compare the collating symbol with
6364 		       the first character of the string.
6365 		       If it don't match, we add the next character to
6366 		       the compare buffer in turn.  */
6367 		    for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6368 		      {
6369 			int match;
6370 			if (d == dend)
6371 			  {
6372 			    if (dend == end_match_2)
6373 			      break;
6374 			    d = string2;
6375 			    dend = end_match_2;
6376 			  }
6377 
6378 			/* add next character to the compare buffer.  */
6379 			str_buf[i] = TRANSLATE(*d);
6380 			str_buf[i+1] = '\0';
6381 
6382 			match = wcscoll (workp, str_buf);
6383 			if (match == 0)
6384 			  goto char_set_matched;
6385 
6386 			if (match < 0)
6387 			  /* (str_buf > workp) indicate (str_buf + X > workp),
6388 			     because for all X (str_buf + X > str_buf).
6389 			     So we don't need continue this loop.  */
6390 			  break;
6391 
6392 			/* Otherwise(str_buf < workp),
6393 			   (str_buf+next_character) may equals (workp).
6394 			   So we continue this loop.  */
6395 		      }
6396 		    /* not matched */
6397 		    d = backup_d;
6398 		    dend = backup_dend;
6399 		    workp += length + 1;
6400 		  }
6401               }
6402             /* match with equivalence_class?  */
6403 # ifdef _LIBC
6404 	    if (nrules != 0)
6405 	      {
6406                 const CHAR_T *backup_d = d, *backup_dend = dend;
6407 		/* Try to match the equivalence class against
6408 		   those known to the collate implementation.  */
6409 		const int32_t *table;
6410 		const int32_t *weights;
6411 		const int32_t *extra;
6412 		const int32_t *indirect;
6413 		int32_t idx, idx2;
6414 		wint_t *cp;
6415 		size_t len;
6416 
6417 		/* This #include defines a local function!  */
6418 #  include <locale/weightwc.h>
6419 
6420 		table = (const int32_t *)
6421 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6422 		weights = (const wint_t *)
6423 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6424 		extra = (const wint_t *)
6425 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6426 		indirect = (const int32_t *)
6427 		  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6428 
6429 		/* Write 1 collating element to str_buf, and
6430 		   get its index.  */
6431 		idx2 = 0;
6432 
6433 		for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6434 		  {
6435 		    cp = (wint_t*)str_buf;
6436 		    if (d == dend)
6437 		      {
6438 			if (dend == end_match_2)
6439 			  break;
6440 			d = string2;
6441 			dend = end_match_2;
6442 		      }
6443 		    str_buf[i] = TRANSLATE(*(d+i));
6444 		    str_buf[i+1] = '\0'; /* sentinel */
6445 		    idx2 = findidx ((const wint_t**)&cp);
6446 		  }
6447 
6448 		/* Update d, however d will be incremented at
6449 		   char_set_matched:, we decrement d here.  */
6450 		d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6451 		if (d >= dend)
6452 		  {
6453 		    if (dend == end_match_2)
6454 			d = dend;
6455 		    else
6456 		      {
6457 			d = string2;
6458 			dend = end_match_2;
6459 		      }
6460 		  }
6461 
6462 		len = weights[idx2];
6463 
6464 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6465 		     workp++)
6466 		  {
6467 		    idx = (int32_t)*workp;
6468 		    /* We already checked idx != 0 in regex_compile. */
6469 
6470 		    if (idx2 != 0 && len == weights[idx])
6471 		      {
6472 			int cnt = 0;
6473 			while (cnt < len && (weights[idx + 1 + cnt]
6474 					     == weights[idx2 + 1 + cnt]))
6475 			  ++cnt;
6476 
6477 			if (cnt == len)
6478 			  goto char_set_matched;
6479 		      }
6480 		  }
6481 		/* not matched */
6482                 d = backup_d;
6483                 dend = backup_dend;
6484 	      }
6485 	    else /* (nrules == 0) */
6486 # endif
6487 	      /* If we can't look up collation data, we use wcscoll
6488 		 instead.  */
6489 	      {
6490 		for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6491 		  {
6492 		    const CHAR_T *backup_d = d, *backup_dend = dend;
6493 		    length = wcslen (workp);
6494 
6495 		    /* If wcscoll(the collating symbol, whole string) > 0,
6496 		       any substring of the string never match with the
6497 		       collating symbol.  */
6498 		    if (wcscoll (workp, d) > 0)
6499 		      {
6500 			workp += length + 1;
6501 			break;
6502 		      }
6503 
6504 		    /* First, we compare the equivalence class with
6505 		       the first character of the string.
6506 		       If it don't match, we add the next character to
6507 		       the compare buffer in turn.  */
6508 		    for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6509 		      {
6510 			int match;
6511 			if (d == dend)
6512 			  {
6513 			    if (dend == end_match_2)
6514 			      break;
6515 			    d = string2;
6516 			    dend = end_match_2;
6517 			  }
6518 
6519 			/* add next character to the compare buffer.  */
6520 			str_buf[i] = TRANSLATE(*d);
6521 			str_buf[i+1] = '\0';
6522 
6523 			match = wcscoll (workp, str_buf);
6524 
6525 			if (match == 0)
6526 			  goto char_set_matched;
6527 
6528 			if (match < 0)
6529 			/* (str_buf > workp) indicate (str_buf + X > workp),
6530 			   because for all X (str_buf + X > str_buf).
6531 			   So we don't need continue this loop.  */
6532 			  break;
6533 
6534 			/* Otherwise(str_buf < workp),
6535 			   (str_buf+next_character) may equals (workp).
6536 			   So we continue this loop.  */
6537 		      }
6538 		    /* not matched */
6539 		    d = backup_d;
6540 		    dend = backup_dend;
6541 		    workp += length + 1;
6542 		  }
6543 	      }
6544 
6545             /* match with char_range?  */
6546 # ifdef _LIBC
6547 	    if (nrules != 0)
6548 	      {
6549 		uint32_t collseqval;
6550 		const char *collseq = (const char *)
6551 		  _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6552 
6553 		collseqval = collseq_table_lookup (collseq, c);
6554 
6555 		for (; workp < p - chars_length ;)
6556 		  {
6557 		    uint32_t start_val, end_val;
6558 
6559 		    /* We already compute the collation sequence value
6560 		       of the characters (or collating symbols).  */
6561 		    start_val = (uint32_t) *workp++; /* range_start */
6562 		    end_val = (uint32_t) *workp++; /* range_end */
6563 
6564 		    if (start_val <= collseqval && collseqval <= end_val)
6565 		      goto char_set_matched;
6566 		  }
6567 	      }
6568 	    else
6569 # endif
6570 	      {
6571 		/* We set range_start_char at str_buf[0], range_end_char
6572 		   at str_buf[4], and compared char at str_buf[2].  */
6573 		str_buf[1] = 0;
6574 		str_buf[2] = c;
6575 		str_buf[3] = 0;
6576 		str_buf[5] = 0;
6577 		for (; workp < p - chars_length ;)
6578 		  {
6579 		    wchar_t *range_start_char, *range_end_char;
6580 
6581 		    /* match if (range_start_char <= c <= range_end_char).  */
6582 
6583 		    /* If range_start(or end) < 0, we assume -range_start(end)
6584 		       is the offset of the collating symbol which is specified
6585 		       as the character of the range start(end).  */
6586 
6587 		    /* range_start */
6588 		    if (*workp < 0)
6589 		      range_start_char = charset_top - (*workp++);
6590 		    else
6591 		      {
6592 			str_buf[0] = *workp++;
6593 			range_start_char = str_buf;
6594 		      }
6595 
6596 		    /* range_end */
6597 		    if (*workp < 0)
6598 		      range_end_char = charset_top - (*workp++);
6599 		    else
6600 		      {
6601 			str_buf[4] = *workp++;
6602 			range_end_char = str_buf + 4;
6603 		      }
6604 
6605 		    if (wcscoll (range_start_char, str_buf+2) <= 0
6606 			&& wcscoll (str_buf+2, range_end_char) <= 0)
6607 		      goto char_set_matched;
6608 		  }
6609 	      }
6610 
6611             /* match with char?  */
6612 	    for (; workp < p ; workp++)
6613 	      if (c == *workp)
6614 		goto char_set_matched;
6615 
6616 	    not = !not;
6617 
6618 	  char_set_matched:
6619 	    if (not) goto fail;
6620 #else
6621             /* Cast to `unsigned' instead of `unsigned char' in case the
6622                bit list is a full 32 bytes long.  */
6623 	    if (c < (unsigned) (*p * BYTEWIDTH)
6624 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6625 	      not = !not;
6626 
6627 	    p += 1 + *p;
6628 
6629 	    if (!not) goto fail;
6630 #undef WORK_BUFFER_SIZE
6631 #endif /* WCHAR */
6632 	    SET_REGS_MATCHED ();
6633             d++;
6634 	    NEXT;
6635 	  }
6636 
6637 
6638         /* The beginning of a group is represented by start_memory.
6639            The arguments are the register number in the next byte, and the
6640            number of groups inner to this one in the next.  The text
6641            matched within the group is recorded (in the internal
6642            registers data structure) under the register number.  */
6643         CASE (start_memory):
6644 	  DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6645 			(long int) *p, (long int) p[1]);
6646 
6647           /* Find out if this group can match the empty string.  */
6648 	  p1 = p;		/* To send to group_match_null_string_p.  */
6649 
6650           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6651             REG_MATCH_NULL_STRING_P (reg_info[*p])
6652               = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6653 
6654           /* Save the position in the string where we were the last time
6655              we were at this open-group operator in case the group is
6656              operated upon by a repetition operator, e.g., with `(a*)*b'
6657              against `ab'; then we want to ignore where we are now in
6658              the string in case this attempt to match fails.  */
6659           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6660                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6661                              : regstart[*p];
6662 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
6663 			 POINTER_TO_OFFSET (old_regstart[*p]));
6664 
6665           regstart[*p] = d;
6666 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6667 
6668           IS_ACTIVE (reg_info[*p]) = 1;
6669           MATCHED_SOMETHING (reg_info[*p]) = 0;
6670 
6671 	  /* Clear this whenever we change the register activity status.  */
6672 	  set_regs_matched_done = 0;
6673 
6674           /* This is the new highest active register.  */
6675           highest_active_reg = *p;
6676 
6677           /* If nothing was active before, this is the new lowest active
6678              register.  */
6679           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6680             lowest_active_reg = *p;
6681 
6682           /* Move past the register number and inner group count.  */
6683           p += 2;
6684 	  just_past_start_mem = p;
6685 
6686           NEXT;
6687 
6688 
6689         /* The stop_memory opcode represents the end of a group.  Its
6690            arguments are the same as start_memory's: the register
6691            number, and the number of inner groups.  */
6692 	CASE (stop_memory):
6693 	  DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6694 			(long int) *p, (long int) p[1]);
6695 
6696           /* We need to save the string position the last time we were at
6697              this close-group operator in case the group is operated
6698              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6699              against `aba'; then we want to ignore where we are now in
6700              the string in case this attempt to match fails.  */
6701           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6702                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
6703 			   : regend[*p];
6704 	  DEBUG_PRINT2 ("      old_regend: %d\n",
6705 			 POINTER_TO_OFFSET (old_regend[*p]));
6706 
6707           regend[*p] = d;
6708 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6709 
6710           /* This register isn't active anymore.  */
6711           IS_ACTIVE (reg_info[*p]) = 0;
6712 
6713 	  /* Clear this whenever we change the register activity status.  */
6714 	  set_regs_matched_done = 0;
6715 
6716           /* If this was the only register active, nothing is active
6717              anymore.  */
6718           if (lowest_active_reg == highest_active_reg)
6719             {
6720               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6721               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6722             }
6723           else
6724             { /* We must scan for the new highest active register, since
6725                  it isn't necessarily one less than now: consider
6726                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
6727                  new highest active register is 1.  */
6728               UCHAR_T r = *p - 1;
6729               while (r > 0 && !IS_ACTIVE (reg_info[r]))
6730                 r--;
6731 
6732               /* If we end up at register zero, that means that we saved
6733                  the registers as the result of an `on_failure_jump', not
6734                  a `start_memory', and we jumped to past the innermost
6735                  `stop_memory'.  For example, in ((.)*) we save
6736                  registers 1 and 2 as a result of the *, but when we pop
6737                  back to the second ), we are at the stop_memory 1.
6738                  Thus, nothing is active.  */
6739 	      if (r == 0)
6740                 {
6741                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6742                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6743                 }
6744               else
6745                 highest_active_reg = r;
6746             }
6747 
6748           /* If just failed to match something this time around with a
6749              group that's operated on by a repetition operator, try to
6750              force exit from the ``loop'', and restore the register
6751              information for this group that we had before trying this
6752              last match.  */
6753           if ((!MATCHED_SOMETHING (reg_info[*p])
6754                || just_past_start_mem == p - 1)
6755 	      && (p + 2) < pend)
6756             {
6757               boolean is_a_jump_n = false;
6758 
6759               p1 = p + 2;
6760               mcnt = 0;
6761               switch ((re_opcode_t) *p1++)
6762                 {
6763                   case jump_n:
6764 		    is_a_jump_n = true;
6765                   case pop_failure_jump:
6766 		  case maybe_pop_jump:
6767 		  case jump:
6768 		  case dummy_failure_jump:
6769                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6770 		    if (is_a_jump_n)
6771 		      p1 += OFFSET_ADDRESS_SIZE;
6772                     break;
6773 
6774                   default:
6775                     /* do nothing */ ;
6776                 }
6777 	      p1 += mcnt;
6778 
6779               /* If the next operation is a jump backwards in the pattern
6780 	         to an on_failure_jump right before the start_memory
6781                  corresponding to this stop_memory, exit from the loop
6782                  by forcing a failure after pushing on the stack the
6783                  on_failure_jump's jump in the pattern, and d.  */
6784               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6785                   && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6786 		  && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6787 		{
6788                   /* If this group ever matched anything, then restore
6789                      what its registers were before trying this last
6790                      failed match, e.g., with `(a*)*b' against `ab' for
6791                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
6792                      against `aba' for regend[3].
6793 
6794                      Also restore the registers for inner groups for,
6795                      e.g., `((a*)(b*))*' against `aba' (register 3 would
6796                      otherwise get trashed).  */
6797 
6798                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6799 		    {
6800 		      unsigned r;
6801 
6802                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6803 
6804 		      /* Restore this and inner groups' (if any) registers.  */
6805                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6806 			   r++)
6807                         {
6808                           regstart[r] = old_regstart[r];
6809 
6810                           /* xx why this test?  */
6811                           if (old_regend[r] >= regstart[r])
6812                             regend[r] = old_regend[r];
6813                         }
6814                     }
6815 		  p1++;
6816                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6817                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6818 
6819                   goto fail;
6820                 }
6821             }
6822 
6823           /* Move past the register number and the inner group count.  */
6824           p += 2;
6825           NEXT;
6826 
6827 
6828 	/* \<digit> has been turned into a `duplicate' command which is
6829            followed by the numeric value of <digit> as the register number.  */
6830         CASE (duplicate):
6831 	  {
6832 	    register const CHAR_T *d2, *dend2;
6833 	    int regno = *p++;   /* Get which register to match against.  */
6834 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6835 
6836 	    /* Can't back reference a group which we've never matched.  */
6837             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6838               goto fail;
6839 
6840             /* Where in input to try to start matching.  */
6841             d2 = regstart[regno];
6842 
6843             /* Where to stop matching; if both the place to start and
6844                the place to stop matching are in the same string, then
6845                set to the place to stop, otherwise, for now have to use
6846                the end of the first string.  */
6847 
6848             dend2 = ((FIRST_STRING_P (regstart[regno])
6849 		      == FIRST_STRING_P (regend[regno]))
6850 		     ? regend[regno] : end_match_1);
6851 	    for (;;)
6852 	      {
6853 		/* If necessary, advance to next segment in register
6854                    contents.  */
6855 		while (d2 == dend2)
6856 		  {
6857 		    if (dend2 == end_match_2) break;
6858 		    if (dend2 == regend[regno]) break;
6859 
6860                     /* End of string1 => advance to string2. */
6861                     d2 = string2;
6862                     dend2 = regend[regno];
6863 		  }
6864 		/* At end of register contents => success */
6865 		if (d2 == dend2) break;
6866 
6867 		/* If necessary, advance to next segment in data.  */
6868 		PREFETCH ();
6869 
6870 		/* How many characters left in this segment to match.  */
6871 		mcnt = dend - d;
6872 
6873 		/* Want how many consecutive characters we can match in
6874                    one shot, so, if necessary, adjust the count.  */
6875                 if (mcnt > dend2 - d2)
6876 		  mcnt = dend2 - d2;
6877 
6878 		/* Compare that many; failure if mismatch, else move
6879                    past them.  */
6880 		if (translate
6881                     ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6882                     : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
6883 		  goto fail;
6884 		d += mcnt, d2 += mcnt;
6885 
6886 		/* Do this because we've match some characters.  */
6887 		SET_REGS_MATCHED ();
6888 	      }
6889 	  }
6890 	  NEXT;
6891 
6892 
6893         /* begline matches the empty string at the beginning of the string
6894            (unless `not_bol' is set in `bufp'), and, if
6895            `newline_anchor' is set, after newlines.  */
6896 	CASE (begline):
6897           DEBUG_PRINT1 ("EXECUTING begline.\n");
6898 
6899           if (AT_STRINGS_BEG (d))
6900             {
6901               if (!bufp->not_bol)
6902 		{
6903 		  NEXT;
6904 		}
6905             }
6906           else if (d[-1] == '\n' && bufp->newline_anchor)
6907             {
6908               NEXT;
6909             }
6910           /* In all other cases, we fail.  */
6911           goto fail;
6912 
6913 
6914         /* endline is the dual of begline.  */
6915 	CASE (endline):
6916           DEBUG_PRINT1 ("EXECUTING endline.\n");
6917 
6918           if (AT_STRINGS_END (d))
6919             {
6920               if (!bufp->not_eol)
6921 		{
6922 		  NEXT;
6923 		}
6924             }
6925 
6926           /* We have to ``prefetch'' the next character.  */
6927           else if ((d == end1 ? *string2 : *d) == '\n'
6928                    && bufp->newline_anchor)
6929             {
6930               NEXT;
6931             }
6932           goto fail;
6933 
6934 
6935 	/* Match at the very beginning of the data.  */
6936         CASE (begbuf):
6937           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
6938           if (AT_STRINGS_BEG (d))
6939 	    {
6940 	      NEXT;
6941 	    }
6942           goto fail;
6943 
6944 
6945 	/* Match at the very end of the data.  */
6946         CASE (endbuf):
6947           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
6948 	  if (AT_STRINGS_END (d))
6949 	    {
6950 	      NEXT;
6951 	    }
6952           goto fail;
6953 
6954 
6955         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
6956            pushes NULL as the value for the string on the stack.  Then
6957            `pop_failure_point' will keep the current value for the
6958            string, instead of restoring it.  To see why, consider
6959            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
6960            then the . fails against the \n.  But the next thing we want
6961            to do is match the \n against the \n; if we restored the
6962            string value, we would be back at the foo.
6963 
6964            Because this is used only in specific cases, we don't need to
6965            check all the things that `on_failure_jump' does, to make
6966            sure the right things get saved on the stack.  Hence we don't
6967            share its code.  The only reason to push anything on the
6968            stack at all is that otherwise we would have to change
6969            `anychar's code to do something besides goto fail in this
6970            case; that seems worse than this.  */
6971         CASE (on_failure_keep_string_jump):
6972           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
6973 
6974           EXTRACT_NUMBER_AND_INCR (mcnt, p);
6975 #ifdef _LIBC
6976           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
6977 #else
6978           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
6979 #endif
6980 
6981           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
6982           NEXT;
6983 
6984 
6985 	/* Uses of on_failure_jump:
6986 
6987            Each alternative starts with an on_failure_jump that points
6988            to the beginning of the next alternative.  Each alternative
6989            except the last ends with a jump that in effect jumps past
6990            the rest of the alternatives.  (They really jump to the
6991            ending jump of the following alternative, because tensioning
6992            these jumps is a hassle.)
6993 
6994            Repeats start with an on_failure_jump that points past both
6995            the repetition text and either the following jump or
6996            pop_failure_jump back to this on_failure_jump.  */
6997 	CASE (on_failure_jump):
6998         on_failure:
6999           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
7000 
7001           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7002 #ifdef _LIBC
7003           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
7004 #else
7005           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
7006 #endif
7007 
7008           /* If this on_failure_jump comes right before a group (i.e.,
7009              the original * applied to a group), save the information
7010              for that group and all inner ones, so that if we fail back
7011              to this point, the group's information will be correct.
7012              For example, in \(a*\)*\1, we need the preceding group,
7013              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
7014 
7015           /* We can't use `p' to check ahead because we push
7016              a failure point to `p + mcnt' after we do this.  */
7017           p1 = p;
7018 
7019           /* We need to skip no_op's before we look for the
7020              start_memory in case this on_failure_jump is happening as
7021              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
7022              against aba.  */
7023           while (p1 < pend && (re_opcode_t) *p1 == no_op)
7024             p1++;
7025 
7026           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
7027             {
7028               /* We have a new highest active register now.  This will
7029                  get reset at the start_memory we are about to get to,
7030                  but we will have saved all the registers relevant to
7031                  this repetition op, as described above.  */
7032               highest_active_reg = *(p1 + 1) + *(p1 + 2);
7033               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
7034                 lowest_active_reg = *(p1 + 1);
7035             }
7036 
7037           DEBUG_PRINT1 (":\n");
7038           PUSH_FAILURE_POINT (p + mcnt, d, -2);
7039           NEXT;
7040 
7041 
7042         /* A smart repeat ends with `maybe_pop_jump'.
7043 	   We change it to either `pop_failure_jump' or `jump'.  */
7044         CASE (maybe_pop_jump):
7045           EXTRACT_NUMBER_AND_INCR (mcnt, p);
7046           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
7047           {
7048 	    register UCHAR_T *p2 = p;
7049 
7050             /* Compare the beginning of the repeat with what in the
7051                pattern follows its end. If we can establish that there
7052                is nothing that they would both match, i.e., that we
7053                would have to backtrack because of (as in, e.g., `a*a')
7054                then we can change to pop_failure_jump, because we'll
7055                never have to backtrack.
7056 
7057                This is not true in the case of alternatives: in
7058                `(a|ab)*' we do need to backtrack to the `ab' alternative
7059                (e.g., if the string was `ab').  But instead of trying to
7060                detect that here, the alternative has put on a dummy
7061                failure point which is what we will end up popping.  */
7062 
7063 	    /* Skip over open/close-group commands.
7064 	       If what follows this loop is a ...+ construct,
7065 	       look at what begins its body, since we will have to
7066 	       match at least one of that.  */
7067 	    while (1)
7068 	      {
7069 		if (p2 + 2 < pend
7070 		    && ((re_opcode_t) *p2 == stop_memory
7071 			|| (re_opcode_t) *p2 == start_memory))
7072 		  p2 += 3;
7073 		else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7074 			 && (re_opcode_t) *p2 == dummy_failure_jump)
7075 		  p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7076 		else
7077 		  break;
7078 	      }
7079 
7080 	    p1 = p + mcnt;
7081 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7082 	       to the `maybe_finalize_jump' of this case.  Examine what
7083 	       follows.  */
7084 
7085             /* If we're at the end of the pattern, we can change.  */
7086             if (p2 == pend)
7087 	      {
7088 		/* Consider what happens when matching ":\(.*\)"
7089 		   against ":/".  I don't really understand this code
7090 		   yet.  */
7091   	        p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7092 		  pop_failure_jump;
7093                 DEBUG_PRINT1
7094                   ("  End of pattern: change to `pop_failure_jump'.\n");
7095               }
7096 
7097             else if ((re_opcode_t) *p2 == exactn
7098 #ifdef MBS_SUPPORT
7099 		     || (re_opcode_t) *p2 == exactn_bin
7100 #endif
7101 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7102 	      {
7103 		register UCHAR_T c
7104                   = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7105 
7106                 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7107 #ifdef MBS_SUPPORT
7108 		     || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7109 #endif
7110 		    ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7111                   {
7112   		    p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7113 		      pop_failure_jump;
7114 #ifdef WCHAR
7115 		      DEBUG_PRINT3 ("  %C != %C => pop_failure_jump.\n",
7116 				    (wint_t) c,
7117 				    (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7118 #else
7119 		      DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
7120 				    (char) c,
7121 				    (char) p1[3+OFFSET_ADDRESS_SIZE]);
7122 #endif
7123                   }
7124 
7125 #ifndef WCHAR
7126 		else if ((re_opcode_t) p1[3] == charset
7127 			 || (re_opcode_t) p1[3] == charset_not)
7128 		  {
7129 		    int not = (re_opcode_t) p1[3] == charset_not;
7130 
7131 		    if (c < (unsigned) (p1[4] * BYTEWIDTH)
7132 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7133 		      not = !not;
7134 
7135                     /* `not' is equal to 1 if c would match, which means
7136                         that we can't change to pop_failure_jump.  */
7137 		    if (!not)
7138                       {
7139   		        p[-3] = (unsigned char) pop_failure_jump;
7140                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7141                       }
7142 		  }
7143 #endif /* not WCHAR */
7144 	      }
7145 #ifndef WCHAR
7146             else if ((re_opcode_t) *p2 == charset)
7147 	      {
7148 		/* We win if the first character of the loop is not part
7149                    of the charset.  */
7150                 if ((re_opcode_t) p1[3] == exactn
7151  		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7152  			  && (p2[2 + p1[5] / BYTEWIDTH]
7153  			      & (1 << (p1[5] % BYTEWIDTH)))))
7154 		  {
7155 		    p[-3] = (unsigned char) pop_failure_jump;
7156 		    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7157                   }
7158 
7159 		else if ((re_opcode_t) p1[3] == charset_not)
7160 		  {
7161 		    int idx;
7162 		    /* We win if the charset_not inside the loop
7163 		       lists every character listed in the charset after.  */
7164 		    for (idx = 0; idx < (int) p2[1]; idx++)
7165 		      if (! (p2[2 + idx] == 0
7166 			     || (idx < (int) p1[4]
7167 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7168 			break;
7169 
7170 		    if (idx == p2[1])
7171                       {
7172   		        p[-3] = (unsigned char) pop_failure_jump;
7173                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7174                       }
7175 		  }
7176 		else if ((re_opcode_t) p1[3] == charset)
7177 		  {
7178 		    int idx;
7179 		    /* We win if the charset inside the loop
7180 		       has no overlap with the one after the loop.  */
7181 		    for (idx = 0;
7182 			 idx < (int) p2[1] && idx < (int) p1[4];
7183 			 idx++)
7184 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
7185 			break;
7186 
7187 		    if (idx == p2[1] || idx == p1[4])
7188                       {
7189   		        p[-3] = (unsigned char) pop_failure_jump;
7190                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
7191                       }
7192 		  }
7193 	      }
7194 #endif /* not WCHAR */
7195 	  }
7196 	  p -= OFFSET_ADDRESS_SIZE;	/* Point at relative address again.  */
7197 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
7198 	    {
7199 	      p[-1] = (UCHAR_T) jump;
7200               DEBUG_PRINT1 ("  Match => jump.\n");
7201 	      goto unconditional_jump;
7202 	    }
7203         /* Note fall through.  */
7204 
7205 
7206 	/* The end of a simple repeat has a pop_failure_jump back to
7207            its matching on_failure_jump, where the latter will push a
7208            failure point.  The pop_failure_jump takes off failure
7209            points put on by this pop_failure_jump's matching
7210            on_failure_jump; we got through the pattern to here from the
7211            matching on_failure_jump, so didn't fail.  */
7212         CASE (pop_failure_jump):
7213           {
7214             /* We need to pass separate storage for the lowest and
7215                highest registers, even though we don't care about the
7216                actual values.  Otherwise, we will restore only one
7217                register from the stack, since lowest will == highest in
7218                `pop_failure_point'.  */
7219             active_reg_t dummy_low_reg, dummy_high_reg;
7220             UCHAR_T *pdummy = NULL;
7221             const CHAR_T *sdummy = NULL;
7222 
7223             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7224             POP_FAILURE_POINT (sdummy, pdummy,
7225                                dummy_low_reg, dummy_high_reg,
7226                                reg_dummy, reg_dummy, reg_info_dummy);
7227           }
7228 	  /* Note fall through.  */
7229 
7230 	unconditional_jump:
7231 #ifdef _LIBC
7232 	  DEBUG_PRINT2 ("\n%p: ", p);
7233 #else
7234 	  DEBUG_PRINT2 ("\n0x%x: ", p);
7235 #endif
7236           /* Note fall through.  */
7237 
7238         /* Unconditionally jump (without popping any failure points).  */
7239         CASE (jump):
7240 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
7241           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7242 	  p += mcnt;				/* Do the jump.  */
7243 #ifdef _LIBC
7244           DEBUG_PRINT2 ("(to %p).\n", p);
7245 #else
7246           DEBUG_PRINT2 ("(to 0x%x).\n", p);
7247 #endif
7248 	  NEXT;
7249 
7250 
7251         /* We need this opcode so we can detect where alternatives end
7252            in `group_match_null_string_p' et al.  */
7253         CASE (jump_past_alt):
7254           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7255           goto unconditional_jump;
7256 
7257 
7258         /* Normally, the on_failure_jump pushes a failure point, which
7259            then gets popped at pop_failure_jump.  We will end up at
7260            pop_failure_jump, also, and with a pattern of, say, `a+', we
7261            are skipping over the on_failure_jump, so we have to push
7262            something meaningless for pop_failure_jump to pop.  */
7263         CASE (dummy_failure_jump):
7264           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7265           /* It doesn't matter what we push for the string here.  What
7266              the code at `fail' tests is the value for the pattern.  */
7267           PUSH_FAILURE_POINT (NULL, NULL, -2);
7268           goto unconditional_jump;
7269 
7270 
7271         /* At the end of an alternative, we need to push a dummy failure
7272            point in case we are followed by a `pop_failure_jump', because
7273            we don't want the failure point for the alternative to be
7274            popped.  For example, matching `(a|ab)*' against `aab'
7275            requires that we match the `ab' alternative.  */
7276         CASE (push_dummy_failure):
7277           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7278           /* See comments just above at `dummy_failure_jump' about the
7279              two zeroes.  */
7280           PUSH_FAILURE_POINT (NULL, NULL, -2);
7281           NEXT;
7282 
7283         /* Have to succeed matching what follows at least n times.
7284            After that, handle like `on_failure_jump'.  */
7285         CASE (succeed_n):
7286           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7287           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7288 
7289           assert (mcnt >= 0);
7290           /* Originally, this is how many times we HAVE to succeed.  */
7291           if (mcnt > 0)
7292             {
7293                mcnt--;
7294 	       p += OFFSET_ADDRESS_SIZE;
7295                STORE_NUMBER_AND_INCR (p, mcnt);
7296 #ifdef _LIBC
7297                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7298 			     , mcnt);
7299 #else
7300                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7301 			     , mcnt);
7302 #endif
7303             }
7304 	  else if (mcnt == 0)
7305             {
7306 #ifdef _LIBC
7307               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n",
7308 			    p + OFFSET_ADDRESS_SIZE);
7309 #else
7310               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n",
7311 			    p + OFFSET_ADDRESS_SIZE);
7312 #endif /* _LIBC */
7313 
7314 #ifdef WCHAR
7315 	      p[1] = (UCHAR_T) no_op;
7316 #else
7317 	      p[2] = (UCHAR_T) no_op;
7318               p[3] = (UCHAR_T) no_op;
7319 #endif /* WCHAR */
7320               goto on_failure;
7321             }
7322           NEXT;
7323 
7324         CASE (jump_n):
7325           EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7326           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7327 
7328           /* Originally, this is how many times we CAN jump.  */
7329           if (mcnt)
7330             {
7331                mcnt--;
7332                STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7333 
7334 #ifdef _LIBC
7335                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7336 			     mcnt);
7337 #else
7338                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7339 			     mcnt);
7340 #endif /* _LIBC */
7341 	       goto unconditional_jump;
7342             }
7343           /* If don't have to jump any more, skip over the rest of command.  */
7344 	  else
7345 	    p += 2 * OFFSET_ADDRESS_SIZE;
7346           NEXT;
7347 
7348 	CASE (set_number_at):
7349 	  {
7350             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7351 
7352             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7353             p1 = p + mcnt;
7354             EXTRACT_NUMBER_AND_INCR (mcnt, p);
7355 #ifdef _LIBC
7356             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
7357 #else
7358             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
7359 #endif
7360 	    STORE_NUMBER (p1, mcnt);
7361             NEXT;
7362           }
7363 
7364 #if 0
7365 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
7366 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
7367 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
7368 	   macro and introducing temporary variables works around the bug.  */
7369 
7370 	CASE (wordbound):
7371 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7372 	  if (AT_WORD_BOUNDARY (d))
7373 	    {
7374 	      NEXT;
7375 	    }
7376 	  goto fail;
7377 
7378 	CASE (notwordbound):
7379 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7380 	  if (AT_WORD_BOUNDARY (d))
7381 	    goto fail;
7382 	  NEXT;
7383 #else
7384 	CASE (wordbound):
7385 	{
7386 	  boolean prevchar, thischar;
7387 
7388 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7389 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7390 	    {
7391 	      NEXT;
7392 	    }
7393 
7394 	  prevchar = WORDCHAR_P (d - 1);
7395 	  thischar = WORDCHAR_P (d);
7396 	  if (prevchar != thischar)
7397 	    {
7398 	      NEXT;
7399 	    }
7400 	  goto fail;
7401 	}
7402 
7403       CASE (notwordbound):
7404 	{
7405 	  boolean prevchar, thischar;
7406 
7407 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7408 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7409 	    goto fail;
7410 
7411 	  prevchar = WORDCHAR_P (d - 1);
7412 	  thischar = WORDCHAR_P (d);
7413 	  if (prevchar != thischar)
7414 	    goto fail;
7415 	  NEXT;
7416 	}
7417 #endif
7418 
7419 	CASE (wordbeg):
7420           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7421 	  if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7422 	      && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7423 	    {
7424 	      NEXT;
7425 	    }
7426           goto fail;
7427 
7428 	CASE (wordend):
7429           DEBUG_PRINT1 ("EXECUTING wordend.\n");
7430 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7431               && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7432 	    {
7433 	      NEXT;
7434 	    }
7435           goto fail;
7436 
7437 #ifdef emacs
7438   	CASE (before_dot):
7439           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7440  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7441   	    goto fail;
7442   	  NEXT;
7443 
7444   	CASE (at_dot):
7445           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7446  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
7447   	    goto fail;
7448   	  NEXT;
7449 
7450   	CASE (after_dot):
7451           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7452           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7453   	    goto fail;
7454   	  NEXT;
7455 
7456 	CASE (syntaxspec):
7457           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7458 	  mcnt = *p++;
7459 	  goto matchsyntax;
7460 
7461         CASE (wordchar):
7462           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7463 	  mcnt = (int) Sword;
7464         matchsyntax:
7465 	  PREFETCH ();
7466 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7467 	  d++;
7468 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7469 	    goto fail;
7470           SET_REGS_MATCHED ();
7471 	  NEXT;
7472 
7473 	CASE (notsyntaxspec):
7474           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7475 	  mcnt = *p++;
7476 	  goto matchnotsyntax;
7477 
7478         CASE (notwordchar):
7479           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7480 	  mcnt = (int) Sword;
7481         matchnotsyntax:
7482 	  PREFETCH ();
7483 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
7484 	  d++;
7485 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7486 	    goto fail;
7487 	  SET_REGS_MATCHED ();
7488           NEXT;
7489 
7490 #else /* not emacs */
7491 	CASE (wordchar):
7492           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7493 	  PREFETCH ();
7494           if (!WORDCHAR_P (d))
7495             goto fail;
7496 	  SET_REGS_MATCHED ();
7497           d++;
7498 	  NEXT;
7499 
7500 	CASE (notwordchar):
7501           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7502 	  PREFETCH ();
7503 	  if (WORDCHAR_P (d))
7504             goto fail;
7505           SET_REGS_MATCHED ();
7506           d++;
7507 	  NEXT;
7508 #endif /* not emacs */
7509 
7510 #ifndef __GNUC__
7511         default:
7512           abort ();
7513 	}
7514       continue;  /* Successfully executed one pattern command; keep going.  */
7515 #endif
7516 
7517 
7518     /* We goto here if a matching operation fails. */
7519     fail:
7520       if (!FAIL_STACK_EMPTY ())
7521 	{ /* A restart point is known.  Restore to that state.  */
7522           DEBUG_PRINT1 ("\nFAIL:\n");
7523           POP_FAILURE_POINT (d, p,
7524                              lowest_active_reg, highest_active_reg,
7525                              regstart, regend, reg_info);
7526 
7527           /* If this failure point is a dummy, try the next one.  */
7528           if (!p)
7529 	    goto fail;
7530 
7531           /* If we failed to the end of the pattern, don't examine *p.  */
7532 	  assert (p <= pend);
7533           if (p < pend)
7534             {
7535               boolean is_a_jump_n = false;
7536 
7537               /* If failed to a backwards jump that's part of a repetition
7538                  loop, need to pop this failure point and use the next one.  */
7539               switch ((re_opcode_t) *p)
7540                 {
7541                 case jump_n:
7542                   is_a_jump_n = true;
7543                 case maybe_pop_jump:
7544                 case pop_failure_jump:
7545                 case jump:
7546                   p1 = p + 1;
7547                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7548                   p1 += mcnt;
7549 
7550                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7551                       || (!is_a_jump_n
7552                           && (re_opcode_t) *p1 == on_failure_jump))
7553                     goto fail;
7554                   break;
7555                 default:
7556                   /* do nothing */ ;
7557                 }
7558             }
7559 
7560           if (d >= string1 && d <= end1)
7561 	    dend = end_match_1;
7562         }
7563       else
7564         break;   /* Matching at this starting point really fails.  */
7565     } /* for (;;) */
7566 
7567   if (best_regs_set)
7568     goto restore_best_regs;
7569 
7570   FREE_VARIABLES ();
7571 
7572   return -1;         			/* Failure to match.  */
7573 } /* re_match_2 */
7574 
7575 /* Subroutine definitions for re_match_2.  */
7576 
7577 
7578 /* We are passed P pointing to a register number after a start_memory.
7579 
7580    Return true if the pattern up to the corresponding stop_memory can
7581    match the empty string, and false otherwise.
7582 
7583    If we find the matching stop_memory, sets P to point to one past its number.
7584    Otherwise, sets P to an undefined byte less than or equal to END.
7585 
7586    We don't handle duplicates properly (yet).  */
7587 
7588 static boolean
PREFIX(group_match_null_string_p)7589 PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7590 				   PREFIX(register_info_type) *reg_info)
7591 {
7592   int mcnt;
7593   /* Point to after the args to the start_memory.  */
7594   UCHAR_T *p1 = *p + 2;
7595 
7596   while (p1 < end)
7597     {
7598       /* Skip over opcodes that can match nothing, and return true or
7599 	 false, as appropriate, when we get to one that can't, or to the
7600          matching stop_memory.  */
7601 
7602       switch ((re_opcode_t) *p1)
7603         {
7604         /* Could be either a loop or a series of alternatives.  */
7605         case on_failure_jump:
7606           p1++;
7607           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7608 
7609           /* If the next operation is not a jump backwards in the
7610 	     pattern.  */
7611 
7612 	  if (mcnt >= 0)
7613 	    {
7614               /* Go through the on_failure_jumps of the alternatives,
7615                  seeing if any of the alternatives cannot match nothing.
7616                  The last alternative starts with only a jump,
7617                  whereas the rest start with on_failure_jump and end
7618                  with a jump, e.g., here is the pattern for `a|b|c':
7619 
7620                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7621                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7622                  /exactn/1/c
7623 
7624                  So, we have to first go through the first (n-1)
7625                  alternatives and then deal with the last one separately.  */
7626 
7627 
7628               /* Deal with the first (n-1) alternatives, which start
7629                  with an on_failure_jump (see above) that jumps to right
7630                  past a jump_past_alt.  */
7631 
7632               while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7633 		     jump_past_alt)
7634                 {
7635                   /* `mcnt' holds how many bytes long the alternative
7636                      is, including the ending `jump_past_alt' and
7637                      its number.  */
7638 
7639                   if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7640 						(1 + OFFSET_ADDRESS_SIZE),
7641 						reg_info))
7642                     return false;
7643 
7644                   /* Move to right after this alternative, including the
7645 		     jump_past_alt.  */
7646                   p1 += mcnt;
7647 
7648                   /* Break if it's the beginning of an n-th alternative
7649                      that doesn't begin with an on_failure_jump.  */
7650                   if ((re_opcode_t) *p1 != on_failure_jump)
7651                     break;
7652 
7653 		  /* Still have to check that it's not an n-th
7654 		     alternative that starts with an on_failure_jump.  */
7655 		  p1++;
7656                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7657                   if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7658 		      jump_past_alt)
7659                     {
7660 		      /* Get to the beginning of the n-th alternative.  */
7661                       p1 -= 1 + OFFSET_ADDRESS_SIZE;
7662                       break;
7663                     }
7664                 }
7665 
7666               /* Deal with the last alternative: go back and get number
7667                  of the `jump_past_alt' just before it.  `mcnt' contains
7668                  the length of the alternative.  */
7669               EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7670 
7671               if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7672                 return false;
7673 
7674               p1 += mcnt;	/* Get past the n-th alternative.  */
7675             } /* if mcnt > 0 */
7676           break;
7677 
7678 
7679         case stop_memory:
7680 	  assert (p1[1] == **p);
7681           *p = p1 + 2;
7682           return true;
7683 
7684 
7685         default:
7686           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7687             return false;
7688         }
7689     } /* while p1 < end */
7690 
7691   return false;
7692 } /* group_match_null_string_p */
7693 
7694 
7695 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7696    It expects P to be the first byte of a single alternative and END one
7697    byte past the last. The alternative can contain groups.  */
7698 
7699 static boolean
PREFIX(alt_match_null_string_p)7700 PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
7701 				 PREFIX(register_info_type) *reg_info)
7702 {
7703   int mcnt;
7704   UCHAR_T *p1 = p;
7705 
7706   while (p1 < end)
7707     {
7708       /* Skip over opcodes that can match nothing, and break when we get
7709          to one that can't.  */
7710 
7711       switch ((re_opcode_t) *p1)
7712         {
7713 	/* It's a loop.  */
7714         case on_failure_jump:
7715           p1++;
7716           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7717           p1 += mcnt;
7718           break;
7719 
7720 	default:
7721           if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7722             return false;
7723         }
7724     }  /* while p1 < end */
7725 
7726   return true;
7727 } /* alt_match_null_string_p */
7728 
7729 
7730 /* Deals with the ops common to group_match_null_string_p and
7731    alt_match_null_string_p.
7732 
7733    Sets P to one after the op and its arguments, if any.  */
7734 
7735 static boolean
PREFIX(common_op_match_null_string_p)7736 PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
7737 				       PREFIX(register_info_type) *reg_info)
7738 {
7739   int mcnt;
7740   boolean ret;
7741   int reg_no;
7742   UCHAR_T *p1 = *p;
7743 
7744   switch ((re_opcode_t) *p1++)
7745     {
7746     case no_op:
7747     case begline:
7748     case endline:
7749     case begbuf:
7750     case endbuf:
7751     case wordbeg:
7752     case wordend:
7753     case wordbound:
7754     case notwordbound:
7755 #ifdef emacs
7756     case before_dot:
7757     case at_dot:
7758     case after_dot:
7759 #endif
7760       break;
7761 
7762     case start_memory:
7763       reg_no = *p1;
7764       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7765       ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7766 
7767       /* Have to set this here in case we're checking a group which
7768          contains a group and a back reference to it.  */
7769 
7770       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7771         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7772 
7773       if (!ret)
7774         return false;
7775       break;
7776 
7777     /* If this is an optimized succeed_n for zero times, make the jump.  */
7778     case jump:
7779       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7780       if (mcnt >= 0)
7781         p1 += mcnt;
7782       else
7783         return false;
7784       break;
7785 
7786     case succeed_n:
7787       /* Get to the number of times to succeed.  */
7788       p1 += OFFSET_ADDRESS_SIZE;
7789       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7790 
7791       if (mcnt == 0)
7792         {
7793           p1 -= 2 * OFFSET_ADDRESS_SIZE;
7794           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7795           p1 += mcnt;
7796         }
7797       else
7798         return false;
7799       break;
7800 
7801     case duplicate:
7802       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7803         return false;
7804       break;
7805 
7806     case set_number_at:
7807       p1 += 2 * OFFSET_ADDRESS_SIZE;
7808 
7809     default:
7810       /* All other opcodes mean we cannot match the empty string.  */
7811       return false;
7812   }
7813 
7814   *p = p1;
7815   return true;
7816 } /* common_op_match_null_string_p */
7817 
7818 
7819 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7820    bytes; nonzero otherwise.  */
7821 
7822 static int
PREFIX(bcmp_translate)7823 PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
7824 			register int len,
7825 			RE_TRANSLATE_TYPE translate)
7826 {
7827   register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7828   register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7829   while (len)
7830     {
7831 #ifdef WCHAR
7832       if (((*p1<=0xff)?translate[*p1++]:*p1++)
7833 	  != ((*p2<=0xff)?translate[*p2++]:*p2++))
7834 	return 1;
7835 #else /* BYTE */
7836       if (translate[*p1++] != translate[*p2++]) return 1;
7837 #endif /* WCHAR */
7838       len--;
7839     }
7840   return 0;
7841 }
7842 
7843 
7844 #else /* not INSIDE_RECURSION */
7845 
7846 /* Entry points for GNU code.  */
7847 
7848 /* re_compile_pattern is the GNU regular expression compiler: it
7849    compiles PATTERN (of length SIZE) and puts the result in BUFP.
7850    Returns 0 if the pattern was valid, otherwise an error string.
7851 
7852    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7853    are set in BUFP on entry.
7854 
7855    We call regex_compile to do the actual compilation.  */
7856 
7857 const char *
7858 re_compile_pattern (const char *pattern,
7859 		    size_t length,
7860 		    struct re_pattern_buffer *bufp)
7861 {
7862   reg_errcode_t ret;
7863 
7864   /* GNU code is written to assume at least RE_NREGS registers will be set
7865      (and at least one extra will be -1).  */
7866   bufp->regs_allocated = REGS_UNALLOCATED;
7867 
7868   /* And GNU code determines whether or not to get register information
7869      by passing null for the REGS argument to re_match, etc., not by
7870      setting no_sub.  */
7871   bufp->no_sub = 0;
7872 
7873   /* Match anchors at newline.  */
7874   bufp->newline_anchor = 1;
7875 
7876 # ifdef MBS_SUPPORT
7877   if (MB_CUR_MAX != 1)
7878     ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
7879   else
7880 # endif
7881     ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
7882 
7883   if (!ret)
7884     return NULL;
7885   return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
7886 }
7887 #ifdef _LIBC
7888 weak_alias (__re_compile_pattern, re_compile_pattern)
7889 #endif
7890 
7891 /* Entry points compatible with 4.2 BSD regex library.  We don't define
7892    them unless specifically requested.  */
7893 
7894 #if defined _REGEX_RE_COMP || defined _LIBC
7895 
7896 /* BSD has one and only one pattern buffer.  */
7897 static struct re_pattern_buffer re_comp_buf;
7898 
7899 char *
7900 #ifdef _LIBC
7901 /* Make these definitions weak in libc, so POSIX programs can redefine
7902    these names if they don't use our functions, and still use
7903    regcomp/regexec below without link errors.  */
7904 weak_function
7905 #endif
7906 re_comp (const char *s)
7907 {
7908   reg_errcode_t ret;
7909 
7910   if (!s)
7911     {
7912       if (!re_comp_buf.buffer)
7913 	return (char *) gettext ("No previous regular expression");
7914       return 0;
7915     }
7916 
7917   if (!re_comp_buf.buffer)
7918     {
7919       re_comp_buf.buffer = malloc (200);
7920       if (re_comp_buf.buffer == NULL)
7921         return (char *) gettext (re_error_msgid
7922 				 + re_error_msgid_idx[(int) REG_ESPACE]);
7923       re_comp_buf.allocated = 200;
7924 
7925       re_comp_buf.fastmap = malloc (1 << BYTEWIDTH);
7926       if (re_comp_buf.fastmap == NULL)
7927 	return (char *) gettext (re_error_msgid
7928 				 + re_error_msgid_idx[(int) REG_ESPACE]);
7929     }
7930 
7931   /* Since `re_exec' always passes NULL for the `regs' argument, we
7932      don't need to initialize the pattern buffer fields which affect it.  */
7933 
7934   /* Match anchors at newlines.  */
7935   re_comp_buf.newline_anchor = 1;
7936 
7937 # ifdef MBS_SUPPORT
7938   if (MB_CUR_MAX != 1)
7939     ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7940   else
7941 # endif
7942     ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
7943 
7944   if (!ret)
7945     return NULL;
7946 
7947   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
7948   return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
7949 }
7950 
7951 
7952 int
7953 #ifdef _LIBC
7954 weak_function
7955 #endif
7956 re_exec (const char *s)
7957 {
7958   const int len = strlen (s);
7959   return
7960     0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
7961 }
7962 
7963 #endif /* _REGEX_RE_COMP */
7964 
7965 /* POSIX.2 functions.  Don't define these for Emacs.  */
7966 
7967 #ifndef emacs
7968 
7969 /* regcomp takes a regular expression as a string and compiles it.
7970 
7971    PREG is a regex_t *.  We do not expect any fields to be initialized,
7972    since POSIX says we shouldn't.  Thus, we set
7973 
7974      `buffer' to the compiled pattern;
7975      `used' to the length of the compiled pattern;
7976      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7977        REG_EXTENDED bit in CFLAGS is set; otherwise, to
7978        RE_SYNTAX_POSIX_BASIC;
7979      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7980      `fastmap' to an allocated space for the fastmap;
7981      `fastmap_accurate' to zero;
7982      `re_nsub' to the number of subexpressions in PATTERN.
7983 
7984    PATTERN is the address of the pattern string.
7985 
7986    CFLAGS is a series of bits which affect compilation.
7987 
7988      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7989      use POSIX basic syntax.
7990 
7991      If REG_NEWLINE is set, then . and [^...] don't match newline.
7992      Also, regexec will try a match beginning after every newline.
7993 
7994      If REG_ICASE is set, then we considers upper- and lowercase
7995      versions of letters to be equivalent when matching.
7996 
7997      If REG_NOSUB is set, then when PREG is passed to regexec, that
7998      routine will report only success or failure, and nothing about the
7999      registers.
8000 
8001    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
8002    the return codes and their meanings.)  */
8003 
8004 int
8005 regcomp (regex_t *preg, const char *pattern, int cflags)
8006 {
8007   reg_errcode_t ret;
8008   reg_syntax_t syntax
8009     = (cflags & REG_EXTENDED) ?
8010       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
8011 
8012   /* regex_compile will allocate the space for the compiled pattern.  */
8013   preg->buffer = 0;
8014   preg->allocated = 0;
8015   preg->used = 0;
8016 
8017   /* Try to allocate space for the fastmap.  */
8018   preg->fastmap = malloc (1 << BYTEWIDTH);
8019 
8020   if (cflags & REG_ICASE)
8021     {
8022       unsigned i;
8023 
8024       preg->translate = malloc (CHAR_SET_SIZE
8025 				* sizeof (*(RE_TRANSLATE_TYPE)0));
8026       if (preg->translate == NULL)
8027         return (int) REG_ESPACE;
8028 
8029       /* Map uppercase characters to corresponding lowercase ones.  */
8030       for (i = 0; i < CHAR_SET_SIZE; i++)
8031         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
8032     }
8033   else
8034     preg->translate = NULL;
8035 
8036   /* If REG_NEWLINE is set, newlines are treated differently.  */
8037   if (cflags & REG_NEWLINE)
8038     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
8039       syntax &= ~RE_DOT_NEWLINE;
8040       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
8041       /* It also changes the matching behavior.  */
8042       preg->newline_anchor = 1;
8043     }
8044   else
8045     preg->newline_anchor = 0;
8046 
8047   preg->no_sub = !!(cflags & REG_NOSUB);
8048 
8049   /* POSIX says a null character in the pattern terminates it, so we
8050      can use strlen here in compiling the pattern.  */
8051 # ifdef MBS_SUPPORT
8052   if (MB_CUR_MAX != 1)
8053     ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
8054   else
8055 # endif
8056     ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
8057 
8058   /* POSIX doesn't distinguish between an unmatched open-group and an
8059      unmatched close-group: both are REG_EPAREN.  */
8060   if (ret == REG_ERPAREN) ret = REG_EPAREN;
8061 
8062   if (ret == REG_NOERROR && preg->fastmap)
8063     {
8064       /* Compute the fastmap now, since regexec cannot modify the pattern
8065 	 buffer.  */
8066       if (re_compile_fastmap (preg) == -2)
8067 	{
8068 	  /* Some error occurred while computing the fastmap, just forget
8069 	     about it.  */
8070 	  free (preg->fastmap);
8071 	  preg->fastmap = NULL;
8072 	}
8073     }
8074 
8075   return (int) ret;
8076 }
8077 #ifdef _LIBC
8078 weak_alias (__regcomp, regcomp)
8079 #endif
8080 
8081 
8082 /* regexec searches for a given pattern, specified by PREG, in the
8083    string STRING.
8084 
8085    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8086    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
8087    least NMATCH elements, and we set them to the offsets of the
8088    corresponding matched substrings.
8089 
8090    EFLAGS specifies `execution flags' which affect matching: if
8091    REG_NOTBOL is set, then ^ does not match at the beginning of the
8092    string; if REG_NOTEOL is set, then $ does not match at the end.
8093 
8094    We return 0 if we find a match and REG_NOMATCH if not.  */
8095 
8096 int
8097 regexec (const regex_t *preg, const char *string,
8098 	 size_t nmatch, regmatch_t pmatch[], int eflags)
8099 {
8100   int ret;
8101   struct re_registers regs;
8102   regex_t private_preg;
8103   int len = strlen (string);
8104   boolean want_reg_info = !preg->no_sub && nmatch > 0;
8105 
8106   private_preg = *preg;
8107 
8108   private_preg.not_bol = !!(eflags & REG_NOTBOL);
8109   private_preg.not_eol = !!(eflags & REG_NOTEOL);
8110 
8111   /* The user has told us exactly how many registers to return
8112      information about, via `nmatch'.  We have to pass that on to the
8113      matching routines.  */
8114   private_preg.regs_allocated = REGS_FIXED;
8115 
8116   if (want_reg_info)
8117     {
8118       regs.num_regs = nmatch;
8119       regs.start = TALLOC (nmatch * 2, regoff_t);
8120       if (regs.start == NULL)
8121         return (int) REG_NOMATCH;
8122       regs.end = regs.start + nmatch;
8123     }
8124 
8125   /* Perform the searching operation.  */
8126   ret = re_search (&private_preg, string, len,
8127                    /* start: */ 0, /* range: */ len,
8128                    want_reg_info ? &regs : 0);
8129 
8130   /* Copy the register information to the POSIX structure.  */
8131   if (want_reg_info)
8132     {
8133       if (ret >= 0)
8134         {
8135           unsigned r;
8136 
8137           for (r = 0; r < nmatch; r++)
8138             {
8139               pmatch[r].rm_so = regs.start[r];
8140               pmatch[r].rm_eo = regs.end[r];
8141             }
8142         }
8143 
8144       /* If we needed the temporary register info, free the space now.  */
8145       free (regs.start);
8146     }
8147 
8148   /* We want zero return to mean success, unlike `re_search'.  */
8149   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8150 }
8151 #ifdef _LIBC
8152 weak_alias (__regexec, regexec)
8153 #endif
8154 
8155 
8156 /* Returns a message corresponding to an error code, ERRCODE, returned
8157    from either regcomp or regexec.   We don't use PREG here.  */
8158 
8159 size_t
8160 regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
8161 {
8162   const char *msg;
8163   size_t msg_size;
8164 
8165   if (errcode < 0
8166       || errcode >= (int) (sizeof (re_error_msgid_idx)
8167 			   / sizeof (re_error_msgid_idx[0])))
8168     /* Only error codes returned by the rest of the code should be passed
8169        to this routine.  If we are given anything else, or if other regex
8170        code generates an invalid error code, then the program has a bug.
8171        Dump core so we can fix it.  */
8172     abort ();
8173 
8174   msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
8175 
8176   msg_size = strlen (msg) + 1; /* Includes the null.  */
8177 
8178   if (errbuf_size != 0)
8179     {
8180       if (msg_size > errbuf_size)
8181         {
8182 #if defined HAVE_MEMPCPY || defined _LIBC
8183 	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8184 #else
8185           memcpy (errbuf, msg, errbuf_size - 1);
8186           errbuf[errbuf_size - 1] = 0;
8187 #endif
8188         }
8189       else
8190         memcpy (errbuf, msg, msg_size);
8191     }
8192 
8193   return msg_size;
8194 }
8195 #ifdef _LIBC
8196 weak_alias (__regerror, regerror)
8197 #endif
8198 
8199 
8200 /* Free dynamically allocated space used by PREG.  */
8201 
8202 void
8203 regfree (regex_t *preg)
8204 {
8205   if (preg->buffer != NULL)
8206     free (preg->buffer);
8207   preg->buffer = NULL;
8208 
8209   preg->allocated = 0;
8210   preg->used = 0;
8211 
8212   if (preg->fastmap != NULL)
8213     free (preg->fastmap);
8214   preg->fastmap = NULL;
8215   preg->fastmap_accurate = 0;
8216 
8217   if (preg->translate != NULL)
8218     free (preg->translate);
8219   preg->translate = NULL;
8220 }
8221 #ifdef _LIBC
8222 weak_alias (__regfree, regfree)
8223 #endif
8224 
8225 #endif /* not emacs  */
8226 
8227 #endif /* not INSIDE_RECURSION */
8228 
8229 
8230 #undef STORE_NUMBER
8231 #undef STORE_NUMBER_AND_INCR
8232 #undef EXTRACT_NUMBER
8233 #undef EXTRACT_NUMBER_AND_INCR
8234 
8235 #undef DEBUG_PRINT_COMPILED_PATTERN
8236 #undef DEBUG_PRINT_DOUBLE_STRING
8237 
8238 #undef INIT_FAIL_STACK
8239 #undef RESET_FAIL_STACK
8240 #undef DOUBLE_FAIL_STACK
8241 #undef PUSH_PATTERN_OP
8242 #undef PUSH_FAILURE_POINTER
8243 #undef PUSH_FAILURE_INT
8244 #undef PUSH_FAILURE_ELT
8245 #undef POP_FAILURE_POINTER
8246 #undef POP_FAILURE_INT
8247 #undef POP_FAILURE_ELT
8248 #undef DEBUG_PUSH
8249 #undef DEBUG_POP
8250 #undef PUSH_FAILURE_POINT
8251 #undef POP_FAILURE_POINT
8252 
8253 #undef REG_UNSET_VALUE
8254 #undef REG_UNSET
8255 
8256 #undef PATFETCH
8257 #undef PATFETCH_RAW
8258 #undef PATUNFETCH
8259 #undef TRANSLATE
8260 
8261 #undef INIT_BUF_SIZE
8262 #undef GET_BUFFER_SPACE
8263 #undef BUF_PUSH
8264 #undef BUF_PUSH_2
8265 #undef BUF_PUSH_3
8266 #undef STORE_JUMP
8267 #undef STORE_JUMP2
8268 #undef INSERT_JUMP
8269 #undef INSERT_JUMP2
8270 #undef EXTEND_BUFFER
8271 #undef GET_UNSIGNED_NUMBER
8272 #undef FREE_STACK_RETURN
8273 
8274 # undef POINTER_TO_OFFSET
8275 # undef MATCHING_IN_FRST_STRING
8276 # undef PREFETCH
8277 # undef AT_STRINGS_BEG
8278 # undef AT_STRINGS_END
8279 # undef WORDCHAR_P
8280 # undef FREE_VAR
8281 # undef FREE_VARIABLES
8282 # undef NO_HIGHEST_ACTIVE_REG
8283 # undef NO_LOWEST_ACTIVE_REG
8284 
8285 # undef CHAR_T
8286 # undef UCHAR_T
8287 # undef COMPILED_BUFFER_VAR
8288 # undef OFFSET_ADDRESS_SIZE
8289 # undef CHAR_CLASS_SIZE
8290 # undef PREFIX
8291 # undef ARG_PREFIX
8292 # undef PUT_CHAR
8293 # undef BYTE
8294 # undef WCHAR
8295 
8296 # define DEFINED_ONCE
8297