xref: /plan9/sys/src/ape/cmd/diff/regex.c (revision 0b459c2cb92b7c9d88818e9a2f72e678e5bc4553)
1 /* Extended regular expression matching and search library, version
2    0.12.  (Implements POSIX draft P10003.2/D11.2, except for
3    internationalization features.)
4 
5    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20    USA.	 */
21 
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24   #pragma alloca
25 #endif
26 
27 #undef	_GNU_SOURCE
28 #define _GNU_SOURCE
29 
30 #ifdef emacs
31 /* Converts the pointer to the char to BEG-based offset from the start.	 */
32 #define PTR_TO_OFFSET(d)						\
33 	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
34 			  ? (d) - string1 : (d) - (string2 - size1))
35 #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
36 #else
37 #define PTR_TO_OFFSET(d) 0
38 #endif
39 
40 #include "config.h"
41 
42 /* We need this for `regex.h', and perhaps for the Emacs include files.	 */
43 #include <sys/types.h>
44 
45 /* This is for other GNU distributions with internationalized messages.	 */
46 #if HAVE_LIBINTL_H || defined (_LIBC)
47 # include <libintl.h>
48 #else
49 # define gettext(msgid) (msgid)
50 #endif
51 
52 #ifndef gettext_noop
53 /* This define is so xgettext can find the internationalizable
54    strings.  */
55 #define gettext_noop(String) String
56 #endif
57 
58 /* The `emacs' switch turns on certain matching commands
59    that make sense only in Emacs. */
60 #ifdef emacs
61 
62 #include "lisp.h"
63 #include "buffer.h"
64 
65 /* Make syntax table lookup grant data in gl_state.  */
66 #define SYNTAX_ENTRY_VIA_PROPERTY
67 
68 #include "syntax.h"
69 #include "charset.h"
70 #include "category.h"
71 
72 #define malloc xmalloc
73 #define realloc xrealloc
74 #define free xfree
75 
76 #else  /* not emacs */
77 
78 /* If we are not linking with Emacs proper,
79    we can't use the relocating allocator
80    even if config.h says that we can.  */
81 #undef REL_ALLOC
82 
83 #if defined (STDC_HEADERS) || defined (_LIBC)
84 #include <stdlib.h>
85 #else
86 char *malloc ();
87 char *realloc ();
88 #endif
89 
90 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
91    If nothing else has been done, use the method below.	 */
92 #ifdef INHIBIT_STRING_HEADER
93 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
94 #if !defined (bzero) && !defined (bcopy)
95 #undef INHIBIT_STRING_HEADER
96 #endif
97 #endif
98 #endif
99 
100 /* This is the normal way of making sure we have a bcopy and a bzero.
101    This is used in most programs--a few other programs avoid this
102    by defining INHIBIT_STRING_HEADER.  */
103 #ifndef INHIBIT_STRING_HEADER
104 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
105 #include <string.h>
106 #ifndef bcmp
107 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
108 #endif
109 #ifndef bcopy
110 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
111 #endif
112 #ifndef bzero
113 #define bzero(s, n)	memset ((s), 0, (n))
114 #endif
115 #else
116 #include <strings.h>
117 #endif
118 #endif
119 
120 /* Define the syntax stuff for \<, \>, etc.  */
121 
122 /* This must be nonzero for the wordchar and notwordchar pattern
123    commands in re_match_2.  */
124 #ifndef Sword
125 #define Sword 1
126 #endif
127 
128 #ifdef SWITCH_ENUM_BUG
129 #define SWITCH_ENUM_CAST(x) ((int)(x))
130 #else
131 #define SWITCH_ENUM_CAST(x) (x)
132 #endif
133 
134 #ifdef SYNTAX_TABLE
135 
136 extern char *re_syntax_table;
137 
138 #else /* not SYNTAX_TABLE */
139 
140 /* How many characters in the character set.  */
141 #define CHAR_SET_SIZE 256
142 
143 static char re_syntax_table[CHAR_SET_SIZE];
144 
145 static void
init_syntax_once()146 init_syntax_once ()
147 {
148    register int c;
149    static int done = 0;
150 
151    if (done)
152      return;
153 
154    bzero (re_syntax_table, sizeof re_syntax_table);
155 
156    for (c = 'a'; c <= 'z'; c++)
157      re_syntax_table[c] = Sword;
158 
159    for (c = 'A'; c <= 'Z'; c++)
160      re_syntax_table[c] = Sword;
161 
162    for (c = '0'; c <= '9'; c++)
163      re_syntax_table[c] = Sword;
164 
165    re_syntax_table['_'] = Sword;
166 
167    done = 1;
168 }
169 
170 #endif /* not SYNTAX_TABLE */
171 
172 #define SYNTAX(c) re_syntax_table[c]
173 
174 /* Dummy macros for non-Emacs environments.  */
175 #define BASE_LEADING_CODE_P(c) (0)
176 #define WORD_BOUNDARY_P(c1, c2) (0)
177 #define CHAR_HEAD_P(p) (1)
178 #define SINGLE_BYTE_CHAR_P(c) (1)
179 #define SAME_CHARSET_P(c1, c2) (1)
180 #define MULTIBYTE_FORM_LENGTH(p, s) (1)
181 #define STRING_CHAR(p, s) (*(p))
182 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
183 #define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
184   (c = ((p) == (end1) ? *(str2) : *(p)))
185 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
186   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
187 #endif /* not emacs */
188 
189 /* Get the interface, including the syntax bits.  */
190 #include "regex.h"
191 
192 /* isalpha etc. are used for the character classes.  */
193 #include <ctype.h>
194 
195 /* Jim Meyering writes:
196 
197    "... Some ctype macros are valid only for character codes that
198    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
199    using /bin/cc or gcc but without giving an ansi option).  So, all
200    ctype uses should be through macros like ISPRINT...	If
201    STDC_HEADERS is defined, then autoconf has verified that the ctype
202    macros don't need to be guarded with references to isascii. ...
203    Defining isascii to 1 should let any compiler worth its salt
204    eliminate the && through constant folding."	*/
205 
206 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
207 #define ISASCII(c) 1
208 #else
209 #define ISASCII(c) isascii(c)
210 #endif
211 
212 #ifdef isblank
213 #define ISBLANK(c) (ISASCII (c) && isblank (c))
214 #else
215 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
216 #endif
217 #ifdef isgraph
218 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
219 #else
220 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
221 #endif
222 
223 #define ISPRINT(c) (ISASCII (c) && isprint (c))
224 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
225 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
226 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
227 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
228 #define ISLOWER(c) (ISASCII (c) && islower (c))
229 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
230 #define ISSPACE(c) (ISASCII (c) && isspace (c))
231 #define ISUPPER(c) (ISASCII (c) && isupper (c))
232 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
233 
234 #ifndef NULL
235 #define NULL (void *)0
236 #endif
237 
238 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
239    since ours (we hope) works properly with all combinations of
240    machines, compilers, `char' and `unsigned char' argument types.
241    (Per Bothner suggested the basic approach.)	*/
242 #undef SIGN_EXTEND_CHAR
243 #if __STDC__
244 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
245 #else  /* not __STDC__ */
246 /* As in Harbison and Steele.  */
247 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
248 #endif
249 
250 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
251    use `alloca' instead of `malloc'.  This is because using malloc in
252    re_search* or re_match* could cause memory leaks when C-g is used in
253    Emacs; also, malloc is slower and causes storage fragmentation.  On
254    the other hand, malloc is more portable, and easier to debug.
255 
256    Because we sometimes use alloca, some routines have to be macros,
257    not functions -- `alloca'-allocated space disappears at the end of the
258    function it is called in.  */
259 
260 #ifdef REGEX_MALLOC
261 
262 #define REGEX_ALLOCATE malloc
263 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
264 #define REGEX_FREE free
265 
266 #else /* not REGEX_MALLOC  */
267 
268 /* Emacs already defines alloca, sometimes.  */
269 #ifndef alloca
270 
271 /* Make alloca work the best possible way.  */
272 #ifdef __GNUC__
273 #define alloca __builtin_alloca
274 #else /* not __GNUC__ */
275 #if HAVE_ALLOCA_H
276 #include <alloca.h>
277 #else /* not __GNUC__ or HAVE_ALLOCA_H */
278 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
279 #ifndef _AIX /* Already did AIX, up at the top.	 */
280 char *alloca ();
281 #endif /* not _AIX */
282 #endif
283 #endif /* not HAVE_ALLOCA_H */
284 #endif /* not __GNUC__ */
285 
286 #endif /* not alloca */
287 
288 #define REGEX_ALLOCATE alloca
289 
290 /* Assumes a `char *destination' variable.  */
291 #define REGEX_REALLOCATE(source, osize, nsize)				\
292   (destination = (char *) alloca (nsize),				\
293    bcopy (source, destination, osize),					\
294    destination)
295 
296 /* No need to do anything to free, after alloca.  */
297 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
298 
299 #endif /* not REGEX_MALLOC */
300 
301 /* Define how to allocate the failure stack.  */
302 
303 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
304 
305 #define REGEX_ALLOCATE_STACK(size)				\
306   r_alloc (&failure_stack_ptr, (size))
307 #define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
308   r_re_alloc (&failure_stack_ptr, (nsize))
309 #define REGEX_FREE_STACK(ptr)					\
310   r_alloc_free (&failure_stack_ptr)
311 
312 #else /* not using relocating allocator */
313 
314 #ifdef REGEX_MALLOC
315 
316 #define REGEX_ALLOCATE_STACK malloc
317 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
318 #define REGEX_FREE_STACK free
319 
320 #else /* not REGEX_MALLOC */
321 
322 #define REGEX_ALLOCATE_STACK alloca
323 
324 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
325    REGEX_REALLOCATE (source, osize, nsize)
326 /* No need to explicitly free anything.	 */
327 #define REGEX_FREE_STACK(arg)
328 
329 #endif /* not REGEX_MALLOC */
330 #endif /* not using relocating allocator */
331 
332 
333 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
334    `string1' or just past its end.  This works if PTR is NULL, which is
335    a good thing.  */
336 #define FIRST_STRING_P(ptr)					\
337   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
338 
339 /* (Re)Allocate N items of type T using malloc, or fail.  */
340 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
341 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
342 #define RETALLOC_IF(addr, n, t) \
343   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
344 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
345 
346 #define BYTEWIDTH 8 /* In bits.	 */
347 
348 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
349 
350 #undef MAX
351 #undef MIN
352 #define MAX(a, b) ((a) > (b) ? (a) : (b))
353 #define MIN(a, b) ((a) < (b) ? (a) : (b))
354 
355 typedef char boolean;
356 #define false 0
357 #define true 1
358 
359 static int re_match_2_internal ();
360 
361 /* These are the command codes that appear in compiled regular
362    expressions.	 Some opcodes are followed by argument bytes.  A
363    command code can specify any interpretation whatsoever for its
364    arguments.  Zero bytes may appear in the compiled regular expression.  */
365 
366 typedef enum
367 {
368   no_op = 0,
369 
370   /* Succeed right away--no more backtracking.	*/
371   succeed,
372 
373 	/* Followed by one byte giving n, then by n literal bytes.  */
374   exactn,
375 
376 	/* Matches any (more or less) character.  */
377   anychar,
378 
379 	/* Matches any one char belonging to specified set.  First
380 	   following byte is number of bitmap bytes.  Then come bytes
381 	   for a bitmap saying which chars are in.  Bits in each byte
382 	   are ordered low-bit-first.  A character is in the set if its
383 	   bit is 1.  A character too large to have a bit in the map is
384 	   automatically not in the set.  */
385   charset,
386 
387 	/* Same parameters as charset, but match any character that is
388 	   not one of those specified.	*/
389   charset_not,
390 
391 	/* Start remembering the text that is matched, for storing in a
392 	   register.  Followed by one byte with the register number, in
393 	   the range 0 to one less than the pattern buffer's re_nsub
394 	   field.  Then followed by one byte with the number of groups
395 	   inner to this one.  (This last has to be part of the
396 	   start_memory only because we need it in the on_failure_jump
397 	   of re_match_2.)  */
398   start_memory,
399 
400 	/* Stop remembering the text that is matched and store it in a
401 	   memory register.  Followed by one byte with the register
402 	   number, in the range 0 to one less than `re_nsub' in the
403 	   pattern buffer, and one byte with the number of inner groups,
404 	   just like `start_memory'.  (We need the number of inner
405 	   groups here because we don't have any easy way of finding the
406 	   corresponding start_memory when we're at a stop_memory.)  */
407   stop_memory,
408 
409 	/* Match a duplicate of something remembered. Followed by one
410 	   byte containing the register number.	 */
411   duplicate,
412 
413 	/* Fail unless at beginning of line.  */
414   begline,
415 
416 	/* Fail unless at end of line.	*/
417   endline,
418 
419 	/* Succeeds if at beginning of buffer (if emacs) or at beginning
420 	   of string to be matched (if not).  */
421   begbuf,
422 
423 	/* Analogously, for end of buffer/string.  */
424   endbuf,
425 
426 	/* Followed by two byte relative address to which to jump.  */
427   jump,
428 
429 	/* Same as jump, but marks the end of an alternative.  */
430   jump_past_alt,
431 
432 	/* Followed by two-byte relative address of place to resume at
433 	   in case of failure.	*/
434   on_failure_jump,
435 
436 	/* Like on_failure_jump, but pushes a placeholder instead of the
437 	   current string position when executed.  */
438   on_failure_keep_string_jump,
439 
440 	/* Throw away latest failure point and then jump to following
441 	   two-byte relative address.  */
442   pop_failure_jump,
443 
444 	/* Change to pop_failure_jump if know won't have to backtrack to
445 	   match; otherwise change to jump.  This is used to jump
446 	   back to the beginning of a repeat.  If what follows this jump
447 	   clearly won't match what the repeat does, such that we can be
448 	   sure that there is no use backtracking out of repetitions
449 	   already matched, then we change it to a pop_failure_jump.
450 	   Followed by two-byte address.  */
451   maybe_pop_jump,
452 
453 	/* Jump to following two-byte address, and push a dummy failure
454 	   point. This failure point will be thrown away if an attempt
455 	   is made to use it for a failure.  A `+' construct makes this
456 	   before the first repeat.  Also used as an intermediary kind
457 	   of jump when compiling an alternative.  */
458   dummy_failure_jump,
459 
460 	/* Push a dummy failure point and continue.  Used at the end of
461 	   alternatives.  */
462   push_dummy_failure,
463 
464 	/* Followed by two-byte relative address and two-byte number n.
465 	   After matching N times, jump to the address upon failure.  */
466   succeed_n,
467 
468 	/* Followed by two-byte relative address, and two-byte number n.
469 	   Jump to the address N times, then fail.  */
470   jump_n,
471 
472 	/* Set the following two-byte relative address to the
473 	   subsequent two-byte number.	The address *includes* the two
474 	   bytes of number.  */
475   set_number_at,
476 
477   wordchar,	/* Matches any word-constituent character.  */
478   notwordchar,	/* Matches any char that is not a word-constituent.  */
479 
480   wordbeg,	/* Succeeds if at word beginning.  */
481   wordend,	/* Succeeds if at word end.  */
482 
483   wordbound,	/* Succeeds if at a word boundary.  */
484   notwordbound	/* Succeeds if not at a word boundary.	*/
485 
486 #ifdef emacs
487   ,before_dot,	/* Succeeds if before point.  */
488   at_dot,	/* Succeeds if at point.  */
489   after_dot,	/* Succeeds if after point.  */
490 
491 	/* Matches any character whose syntax is specified.  Followed by
492 	   a byte which contains a syntax code, e.g., Sword.  */
493   syntaxspec,
494 
495 	/* Matches any character whose syntax is not that specified.  */
496   notsyntaxspec,
497 
498   /* Matches any character whose category-set contains the specified
499      category.	The operator is followed by a byte which contains a
500      category code (mnemonic ASCII character).	*/
501   categoryspec,
502 
503   /* Matches any character whose category-set does not contain the
504      specified category.  The operator is followed by a byte which
505      contains the category code (mnemonic ASCII character).  */
506   notcategoryspec
507 #endif /* emacs */
508 } re_opcode_t;
509 
510 /* Common operations on the compiled pattern.  */
511 
512 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
513 
514 #define STORE_NUMBER(destination, number)				\
515   do {									\
516     (destination)[0] = (number) & 0377;					\
517     (destination)[1] = (number) >> 8;					\
518   } while (0)
519 
520 /* Same as STORE_NUMBER, except increment DESTINATION to
521    the byte after where the number is stored.  Therefore, DESTINATION
522    must be an lvalue.  */
523 
524 #define STORE_NUMBER_AND_INCR(destination, number)			\
525   do {									\
526     STORE_NUMBER (destination, number);					\
527     (destination) += 2;							\
528   } while (0)
529 
530 /* Put into DESTINATION a number stored in two contiguous bytes starting
531    at SOURCE.  */
532 
533 #define EXTRACT_NUMBER(destination, source)				\
534   do {									\
535     (destination) = *(source) & 0377;					\
536     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
537   } while (0)
538 
539 #ifdef DEBUG
540 static void
extract_number(dest,source)541 extract_number (dest, source)
542     int *dest;
543     unsigned char *source;
544 {
545   int temp = SIGN_EXTEND_CHAR (*(source + 1));
546   *dest = *source & 0377;
547   *dest += temp << 8;
548 }
549 
550 #ifndef EXTRACT_MACROS /* To debug the macros.	*/
551 #undef EXTRACT_NUMBER
552 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
553 #endif /* not EXTRACT_MACROS */
554 
555 #endif /* DEBUG */
556 
557 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
558    SOURCE must be an lvalue.  */
559 
560 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
561   do {									\
562     EXTRACT_NUMBER (destination, source);				\
563     (source) += 2;							\
564   } while (0)
565 
566 #ifdef DEBUG
567 static void
extract_number_and_incr(destination,source)568 extract_number_and_incr (destination, source)
569     int *destination;
570     unsigned char **source;
571 {
572   extract_number (destination, *source);
573   *source += 2;
574 }
575 
576 #ifndef EXTRACT_MACROS
577 #undef EXTRACT_NUMBER_AND_INCR
578 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
579   extract_number_and_incr (&dest, &src)
580 #endif /* not EXTRACT_MACROS */
581 
582 #endif /* DEBUG */
583 
584 /* Store a multibyte character in three contiguous bytes starting
585    DESTINATION, and increment DESTINATION to the byte after where the
586    character is stored.	 Therefore, DESTINATION must be an lvalue.  */
587 
588 #define STORE_CHARACTER_AND_INCR(destination, character)	\
589   do {								\
590     (destination)[0] = (character) & 0377;			\
591     (destination)[1] = ((character) >> 8) & 0377;		\
592     (destination)[2] = (character) >> 16;			\
593     (destination) += 3;						\
594   } while (0)
595 
596 /* Put into DESTINATION a character stored in three contiguous bytes
597    starting at SOURCE.	*/
598 
599 #define EXTRACT_CHARACTER(destination, source)	\
600   do {						\
601     (destination) = ((source)[0]		\
602 		     | ((source)[1] << 8)	\
603 		     | ((source)[2] << 16));	\
604   } while (0)
605 
606 
607 /* Macros for charset. */
608 
609 /* Size of bitmap of charset P in bytes.  P is a start of charset,
610    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
611 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
612 
613 /* Nonzero if charset P has range table.  */
614 #define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
615 
616 /* Return the address of range table of charset P.  But not the start
617    of table itself, but the before where the number of ranges is
618    stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
619 #define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
620 
621 /* Test if C is listed in the bitmap of charset P.  */
622 #define CHARSET_LOOKUP_BITMAP(p, c)				\
623   ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
624    && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
625 
626 /* Return the address of end of RANGE_TABLE.  COUNT is number of
627    ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
628    is start of range and end of range.	`* 3' is size of each start
629    and end.  */
630 #define CHARSET_RANGE_TABLE_END(range_table, count)	\
631   ((range_table) + (count) * 2 * 3)
632 
633 /* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
634    COUNT is number of ranges in RANGE_TABLE.  */
635 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
636   do									\
637     {									\
638       int range_start, range_end;					\
639       unsigned char *p;							\
640       unsigned char *range_table_end					\
641 	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
642 									\
643       for (p = (range_table); p < range_table_end; p += 2 * 3)		\
644 	{								\
645 	  EXTRACT_CHARACTER (range_start, p);				\
646 	  EXTRACT_CHARACTER (range_end, p + 3);				\
647 									\
648 	  if (range_start <= (c) && (c) <= range_end)			\
649 	    {								\
650 	      (not) = !(not);						\
651 	      break;							\
652 	    }								\
653 	}								\
654     }									\
655   while (0)
656 
657 /* Test if C is in range table of CHARSET.  The flag NOT is negated if
658    C is listed in it.  */
659 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
660   do									\
661     {									\
662       /* Number of ranges in range table. */				\
663       int count;							\
664       unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
665 									\
666       EXTRACT_NUMBER_AND_INCR (count, range_table);			\
667       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
668     }									\
669   while (0)
670 
671 /* If DEBUG is defined, Regex prints many voluminous messages about what
672    it is doing (if the variable `debug' is nonzero).  If linked with the
673    main program in `iregex.c', you can enter patterns and strings
674    interactively.  And if linked with the main program in `main.c' and
675    the other test files, you can run the already-written tests.	 */
676 
677 #ifdef DEBUG
678 
679 /* We use standard I/O for debugging.  */
680 #include <stdio.h>
681 
682 /* It is useful to test things that ``must'' be true when debugging.  */
683 #include <assert.h>
684 
685 static int debug = 0;
686 
687 #define DEBUG_STATEMENT(e) e
688 #define DEBUG_PRINT1(x) if (debug) printf (x)
689 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
690 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
691 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
692 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
693   if (debug) print_partial_compiled_pattern (s, e)
694 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
695   if (debug) print_double_string (w, s1, sz1, s2, sz2)
696 
697 
698 /* Print the fastmap in human-readable form.  */
699 
700 void
print_fastmap(fastmap)701 print_fastmap (fastmap)
702     char *fastmap;
703 {
704   unsigned was_a_range = 0;
705   unsigned i = 0;
706 
707   while (i < (1 << BYTEWIDTH))
708     {
709       if (fastmap[i++])
710 	{
711 	  was_a_range = 0;
712 	  putchar (i - 1);
713 	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
714 	    {
715 	      was_a_range = 1;
716 	      i++;
717 	    }
718 	  if (was_a_range)
719 	    {
720 	      printf ("-");
721 	      putchar (i - 1);
722 	    }
723 	}
724     }
725   putchar ('\n');
726 }
727 
728 
729 /* Print a compiled pattern string in human-readable form, starting at
730    the START pointer into it and ending just before the pointer END.  */
731 
732 void
print_partial_compiled_pattern(start,end)733 print_partial_compiled_pattern (start, end)
734     unsigned char *start;
735     unsigned char *end;
736 {
737   int mcnt, mcnt2;
738   unsigned char *p = start;
739   unsigned char *pend = end;
740 
741   if (start == NULL)
742     {
743       printf ("(null)\n");
744       return;
745     }
746 
747   /* Loop over pattern commands.  */
748   while (p < pend)
749     {
750       printf ("%d:\t", p - start);
751 
752       switch ((re_opcode_t) *p++)
753 	{
754 	case no_op:
755 	  printf ("/no_op");
756 	  break;
757 
758 	case exactn:
759 	  mcnt = *p++;
760 	  printf ("/exactn/%d", mcnt);
761 	  do
762 	    {
763 	      putchar ('/');
764 	      putchar (*p++);
765 	    }
766 	  while (--mcnt);
767 	  break;
768 
769 	case start_memory:
770 	  mcnt = *p++;
771 	  printf ("/start_memory/%d/%d", mcnt, *p++);
772 	  break;
773 
774 	case stop_memory:
775 	  mcnt = *p++;
776 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
777 	  break;
778 
779 	case duplicate:
780 	  printf ("/duplicate/%d", *p++);
781 	  break;
782 
783 	case anychar:
784 	  printf ("/anychar");
785 	  break;
786 
787 	case charset:
788 	case charset_not:
789 	  {
790 	    register int c, last = -100;
791 	    register int in_range = 0;
792 
793 	    printf ("/charset [%s",
794 		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
795 
796 	    assert (p + *p < pend);
797 
798 	    for (c = 0; c < 256; c++)
799 	      if (c / 8 < *p
800 		  && (p[1 + (c/8)] & (1 << (c % 8))))
801 		{
802 		  /* Are we starting a range?  */
803 		  if (last + 1 == c && ! in_range)
804 		    {
805 		      putchar ('-');
806 		      in_range = 1;
807 		    }
808 		  /* Have we broken a range?  */
809 		  else if (last + 1 != c && in_range)
810 	      {
811 		      putchar (last);
812 		      in_range = 0;
813 		    }
814 
815 		  if (! in_range)
816 		    putchar (c);
817 
818 		  last = c;
819 	      }
820 
821 	    if (in_range)
822 	      putchar (last);
823 
824 	    putchar (']');
825 
826 	    p += 1 + *p;
827 	  }
828 	  break;
829 
830 	case begline:
831 	  printf ("/begline");
832 	  break;
833 
834 	case endline:
835 	  printf ("/endline");
836 	  break;
837 
838 	case on_failure_jump:
839 	  extract_number_and_incr (&mcnt, &p);
840 	  printf ("/on_failure_jump to %d", p + mcnt - start);
841 	  break;
842 
843 	case on_failure_keep_string_jump:
844 	  extract_number_and_incr (&mcnt, &p);
845 	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
846 	  break;
847 
848 	case dummy_failure_jump:
849 	  extract_number_and_incr (&mcnt, &p);
850 	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
851 	  break;
852 
853 	case push_dummy_failure:
854 	  printf ("/push_dummy_failure");
855 	  break;
856 
857 	case maybe_pop_jump:
858 	  extract_number_and_incr (&mcnt, &p);
859 	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
860 	  break;
861 
862 	case pop_failure_jump:
863 	  extract_number_and_incr (&mcnt, &p);
864 	  printf ("/pop_failure_jump to %d", p + mcnt - start);
865 	  break;
866 
867 	case jump_past_alt:
868 	  extract_number_and_incr (&mcnt, &p);
869 	  printf ("/jump_past_alt to %d", p + mcnt - start);
870 	  break;
871 
872 	case jump:
873 	  extract_number_and_incr (&mcnt, &p);
874 	  printf ("/jump to %d", p + mcnt - start);
875 	  break;
876 
877 	case succeed_n:
878 	  extract_number_and_incr (&mcnt, &p);
879 	  extract_number_and_incr (&mcnt2, &p);
880 	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
881 	  break;
882 
883 	case jump_n:
884 	  extract_number_and_incr (&mcnt, &p);
885 	  extract_number_and_incr (&mcnt2, &p);
886 	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
887 	  break;
888 
889 	case set_number_at:
890 	  extract_number_and_incr (&mcnt, &p);
891 	  extract_number_and_incr (&mcnt2, &p);
892 	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
893 	  break;
894 
895 	case wordbound:
896 	  printf ("/wordbound");
897 	  break;
898 
899 	case notwordbound:
900 	  printf ("/notwordbound");
901 	  break;
902 
903 	case wordbeg:
904 	  printf ("/wordbeg");
905 	  break;
906 
907 	case wordend:
908 	  printf ("/wordend");
909 
910 #ifdef emacs
911 	case before_dot:
912 	  printf ("/before_dot");
913 	  break;
914 
915 	case at_dot:
916 	  printf ("/at_dot");
917 	  break;
918 
919 	case after_dot:
920 	  printf ("/after_dot");
921 	  break;
922 
923 	case syntaxspec:
924 	  printf ("/syntaxspec");
925 	  mcnt = *p++;
926 	  printf ("/%d", mcnt);
927 	  break;
928 
929 	case notsyntaxspec:
930 	  printf ("/notsyntaxspec");
931 	  mcnt = *p++;
932 	  printf ("/%d", mcnt);
933 	  break;
934 #endif /* emacs */
935 
936 	case wordchar:
937 	  printf ("/wordchar");
938 	  break;
939 
940 	case notwordchar:
941 	  printf ("/notwordchar");
942 	  break;
943 
944 	case begbuf:
945 	  printf ("/begbuf");
946 	  break;
947 
948 	case endbuf:
949 	  printf ("/endbuf");
950 	  break;
951 
952 	default:
953 	  printf ("?%d", *(p-1));
954 	}
955 
956       putchar ('\n');
957     }
958 
959   printf ("%d:\tend of pattern.\n", p - start);
960 }
961 
962 
963 void
print_compiled_pattern(bufp)964 print_compiled_pattern (bufp)
965     struct re_pattern_buffer *bufp;
966 {
967   unsigned char *buffer = bufp->buffer;
968 
969   print_partial_compiled_pattern (buffer, buffer + bufp->used);
970   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
971 
972   if (bufp->fastmap_accurate && bufp->fastmap)
973     {
974       printf ("fastmap: ");
975       print_fastmap (bufp->fastmap);
976     }
977 
978   printf ("re_nsub: %d\t", bufp->re_nsub);
979   printf ("regs_alloc: %d\t", bufp->regs_allocated);
980   printf ("can_be_null: %d\t", bufp->can_be_null);
981   printf ("newline_anchor: %d\n", bufp->newline_anchor);
982   printf ("no_sub: %d\t", bufp->no_sub);
983   printf ("not_bol: %d\t", bufp->not_bol);
984   printf ("not_eol: %d\t", bufp->not_eol);
985   printf ("syntax: %d\n", bufp->syntax);
986   /* Perhaps we should print the translate table?  */
987 }
988 
989 
990 void
print_double_string(where,string1,size1,string2,size2)991 print_double_string (where, string1, size1, string2, size2)
992     const char *where;
993     const char *string1;
994     const char *string2;
995     int size1;
996     int size2;
997 {
998   unsigned this_char;
999 
1000   if (where == NULL)
1001     printf ("(null)");
1002   else
1003     {
1004       if (FIRST_STRING_P (where))
1005 	{
1006 	  for (this_char = where - string1; this_char < size1; this_char++)
1007 	    putchar (string1[this_char]);
1008 
1009 	  where = string2;
1010 	}
1011 
1012       for (this_char = where - string2; this_char < size2; this_char++)
1013 	putchar (string2[this_char]);
1014     }
1015 }
1016 
1017 #else /* not DEBUG */
1018 
1019 #undef assert
1020 #define assert(e)
1021 
1022 #define DEBUG_STATEMENT(e)
1023 #define DEBUG_PRINT1(x)
1024 #define DEBUG_PRINT2(x1, x2)
1025 #define DEBUG_PRINT3(x1, x2, x3)
1026 #define DEBUG_PRINT4(x1, x2, x3, x4)
1027 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1028 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1029 
1030 #endif /* not DEBUG */
1031 
1032 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1033    also be assigned to arbitrarily: each pattern buffer stores its own
1034    syntax, so it can be changed between regex compilations.  */
1035 /* This has no initializer because initialized variables in Emacs
1036    become read-only after dumping.  */
1037 reg_syntax_t re_syntax_options;
1038 
1039 
1040 /* Specify the precise syntax of regexps for compilation.  This provides
1041    for compatibility for various utilities which historically have
1042    different, incompatible syntaxes.
1043 
1044    The argument SYNTAX is a bit mask comprised of the various bits
1045    defined in regex.h.	We return the old syntax.  */
1046 
1047 reg_syntax_t
re_set_syntax(syntax)1048 re_set_syntax (syntax)
1049     reg_syntax_t syntax;
1050 {
1051   reg_syntax_t ret = re_syntax_options;
1052 
1053   re_syntax_options = syntax;
1054   return ret;
1055 }
1056 
1057 /* This table gives an error message for each of the error codes listed
1058    in regex.h.	Obviously the order here has to be same as there.
1059    POSIX doesn't require that we do anything for REG_NOERROR,
1060    but why not be nice?	 */
1061 
1062 static const char *re_error_msgid[] =
1063   {
1064     gettext_noop ("Success"),	/* REG_NOERROR */
1065     gettext_noop ("No match"),	/* REG_NOMATCH */
1066     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1067     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1068     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1069     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1070     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1071     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1072     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1073     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1074     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1075     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1076     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1077     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1078     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1079     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1080     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1081   };
1082 
1083 /* Avoiding alloca during matching, to placate r_alloc.	 */
1084 
1085 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1086    searching and matching functions should not call alloca.  On some
1087    systems, alloca is implemented in terms of malloc, and if we're
1088    using the relocating allocator routines, then malloc could cause a
1089    relocation, which might (if the strings being searched are in the
1090    ralloc heap) shift the data out from underneath the regexp
1091    routines.
1092 
1093    Here's another reason to avoid allocation: Emacs
1094    processes input from X in a signal handler; processing X input may
1095    call malloc; if input arrives while a matching routine is calling
1096    malloc, then we're scrod.  But Emacs can't just block input while
1097    calling matching routines; then we don't notice interrupts when
1098    they come in.  So, Emacs blocks input around all regexp calls
1099    except the matching calls, which it leaves unprotected, in the
1100    faith that they will not malloc.  */
1101 
1102 /* Normally, this is fine.  */
1103 #define MATCH_MAY_ALLOCATE
1104 
1105 /* When using GNU C, we are not REALLY using the C alloca, no matter
1106    what config.h may say.  So don't take precautions for it.  */
1107 #ifdef __GNUC__
1108 #undef C_ALLOCA
1109 #endif
1110 
1111 /* The match routines may not allocate if (1) they would do it with malloc
1112    and (2) it's not safe for them to use malloc.
1113    Note that if REL_ALLOC is defined, matching would not use malloc for the
1114    failure stack, but we would still use it for the register vectors;
1115    so REL_ALLOC should not affect this.	 */
1116 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1117 #undef MATCH_MAY_ALLOCATE
1118 #endif
1119 
1120 
1121 /* Failure stack declarations and macros; both re_compile_fastmap and
1122    re_match_2 use a failure stack.  These have to be macros because of
1123    REGEX_ALLOCATE_STACK.  */
1124 
1125 
1126 /* Approximate number of failure points for which to initially allocate space
1127    when matching.  If this number is exceeded, we allocate more
1128    space, so it is not a hard limit.  */
1129 #ifndef INIT_FAILURE_ALLOC
1130 #define INIT_FAILURE_ALLOC 20
1131 #endif
1132 
1133 /* Roughly the maximum number of failure points on the stack.  Would be
1134    exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1135    This is a variable only so users of regex can assign to it; we never
1136    change it ourselves.	 */
1137 #if defined (MATCH_MAY_ALLOCATE)
1138 /* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1139    whose default stack limit is 2mb.  In order for a larger
1140    value to work reliably, you have to try to make it accord
1141    with the process stack limit.  */
1142 int re_max_failures = 40000;
1143 #else
1144 int re_max_failures = 4000;
1145 #endif
1146 
1147 union fail_stack_elt
1148 {
1149   unsigned char *pointer;
1150   int integer;
1151 };
1152 
1153 typedef union fail_stack_elt fail_stack_elt_t;
1154 
1155 typedef struct
1156 {
1157   fail_stack_elt_t *stack;
1158   unsigned size;
1159   unsigned avail;			/* Offset of next open position.  */
1160 } fail_stack_type;
1161 
1162 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1163 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1164 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1165 
1166 
1167 /* Define macros to initialize and free the failure stack.
1168    Do `return -2' if the alloc fails.  */
1169 
1170 #ifdef MATCH_MAY_ALLOCATE
1171 #define INIT_FAIL_STACK()						\
1172   do {									\
1173     fail_stack.stack = (fail_stack_elt_t *)				\
1174       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
1175 			    * sizeof (fail_stack_elt_t));		\
1176 									\
1177     if (fail_stack.stack == NULL)					\
1178       return -2;							\
1179 									\
1180     fail_stack.size = INIT_FAILURE_ALLOC;				\
1181     fail_stack.avail = 0;						\
1182   } while (0)
1183 
1184 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1185 #else
1186 #define INIT_FAIL_STACK()						\
1187   do {									\
1188     fail_stack.avail = 0;						\
1189   } while (0)
1190 
1191 #define RESET_FAIL_STACK()
1192 #endif
1193 
1194 
1195 /* Double the size of FAIL_STACK, up to a limit
1196    which allows approximately `re_max_failures' items.
1197 
1198    Return 1 if succeeds, and 0 if either ran out of memory
1199    allocating space for it or it was already too large.
1200 
1201    REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
1202 
1203 /* Factor to increase the failure stack size by
1204    when we increase it.
1205    This used to be 2, but 2 was too wasteful
1206    because the old discarded stacks added up to as much space
1207    were as ultimate, maximum-size stack.  */
1208 #define FAIL_STACK_GROWTH_FACTOR 4
1209 
1210 #define GROW_FAIL_STACK(fail_stack)					\
1211   (((fail_stack).size * sizeof (fail_stack_elt_t)			\
1212     >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1213    ? 0									\
1214    : ((fail_stack).stack						\
1215       = (fail_stack_elt_t *)						\
1216 	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1217 	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1218 	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
1219 	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1220 		* FAIL_STACK_GROWTH_FACTOR))),				\
1221 									\
1222       (fail_stack).stack == NULL					\
1223       ? 0								\
1224       : ((fail_stack).size						\
1225 	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
1226 		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1227 		  * FAIL_STACK_GROWTH_FACTOR))				\
1228 	    / sizeof (fail_stack_elt_t)),				\
1229 	 1)))
1230 
1231 
1232 /* Push pointer POINTER on FAIL_STACK.
1233    Return 1 if was able to do so and 0 if ran out of memory allocating
1234    space to do so.  */
1235 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1236   ((FAIL_STACK_FULL ()							\
1237     && !GROW_FAIL_STACK (FAIL_STACK))					\
1238    ? 0									\
1239    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1240       1))
1241 
1242 /* Push a pointer value onto the failure stack.
1243    Assumes the variable `fail_stack'.  Probably should only
1244    be called from within `PUSH_FAILURE_POINT'.	*/
1245 #define PUSH_FAILURE_POINTER(item)					\
1246   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1247 
1248 /* This pushes an integer-valued item onto the failure stack.
1249    Assumes the variable `fail_stack'.  Probably should only
1250    be called from within `PUSH_FAILURE_POINT'.	*/
1251 #define PUSH_FAILURE_INT(item)					\
1252   fail_stack.stack[fail_stack.avail++].integer = (item)
1253 
1254 /* Push a fail_stack_elt_t value onto the failure stack.
1255    Assumes the variable `fail_stack'.  Probably should only
1256    be called from within `PUSH_FAILURE_POINT'.	*/
1257 #define PUSH_FAILURE_ELT(item)					\
1258   fail_stack.stack[fail_stack.avail++] =  (item)
1259 
1260 /* These three POP... operations complement the three PUSH... operations.
1261    All assume that `fail_stack' is nonempty.  */
1262 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1263 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1264 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1265 
1266 /* Used to omit pushing failure point id's when we're not debugging.  */
1267 #ifdef DEBUG
1268 #define DEBUG_PUSH PUSH_FAILURE_INT
1269 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1270 #else
1271 #define DEBUG_PUSH(item)
1272 #define DEBUG_POP(item_addr)
1273 #endif
1274 
1275 
1276 /* Push the information about the state we will need
1277    if we ever fail back to it.
1278 
1279    Requires variables fail_stack, regstart, regend, reg_info, and
1280    num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1281    declared.
1282 
1283    Does `return FAILURE_CODE' if runs out of memory.  */
1284 
1285 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1286   do {									\
1287     char *destination;							\
1288     /* Must be int, so when we don't save any registers, the arithmetic	\
1289        of 0 + -1 isn't done as unsigned.  */				\
1290     int this_reg;							\
1291 									\
1292     DEBUG_STATEMENT (failure_id++);					\
1293     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1294     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1295     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1296     DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1297 									\
1298     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1299     DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
1300 									\
1301     /* Ensure we have enough space allocated for what we will push.  */	\
1302     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1303       {									\
1304 	if (!GROW_FAIL_STACK (fail_stack))				\
1305 	  return failure_code;						\
1306 									\
1307 	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1308 		       (fail_stack).size);				\
1309 	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1310       }									\
1311 									\
1312     /* Push the info, starting with the registers.  */			\
1313     DEBUG_PRINT1 ("\n");						\
1314 									\
1315     if (1)								\
1316       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1317 	   this_reg++)							\
1318 	{								\
1319 	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1320 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1321 									\
1322 	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1323 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1324 									\
1325 	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1326 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1327 									\
1328 	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1329 	  DEBUG_PRINT2 (" match_null=%d",				\
1330 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1331 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1332 	  DEBUG_PRINT2 (" matched_something=%d",			\
1333 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1334 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1335 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1336 	  DEBUG_PRINT1 ("\n");						\
1337 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1338 	}								\
1339 									\
1340     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1341     PUSH_FAILURE_INT (lowest_active_reg);				\
1342 									\
1343     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1344     PUSH_FAILURE_INT (highest_active_reg);				\
1345 									\
1346     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
1347     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1348     PUSH_FAILURE_POINTER (pattern_place);				\
1349 									\
1350     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1351     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
1352 				 size2);				\
1353     DEBUG_PRINT1 ("'\n");						\
1354     PUSH_FAILURE_POINTER (string_place);				\
1355 									\
1356     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1357     DEBUG_PUSH (failure_id);						\
1358   } while (0)
1359 
1360 /* This is the number of items that are pushed and popped on the stack
1361    for each register.  */
1362 #define NUM_REG_ITEMS  3
1363 
1364 /* Individual items aside from the registers.  */
1365 #ifdef DEBUG
1366 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1367 #else
1368 #define NUM_NONREG_ITEMS 4
1369 #endif
1370 
1371 /* Estimate the size of data pushed by a typical failure stack entry.
1372    An estimate is all we need, because all we use this for
1373    is to choose a limit for how big to make the failure stack.  */
1374 
1375 #define TYPICAL_FAILURE_SIZE 20
1376 
1377 /* This is how many items we actually use for a failure point.
1378    It depends on the regexp.  */
1379 #define NUM_FAILURE_ITEMS				\
1380   (((0							\
1381      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1382     * NUM_REG_ITEMS)					\
1383    + NUM_NONREG_ITEMS)
1384 
1385 /* How many items can still be added to the stack without overflowing it.  */
1386 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1387 
1388 
1389 /* Pops what PUSH_FAIL_STACK pushes.
1390 
1391    We restore into the parameters, all of which should be lvalues:
1392      STR -- the saved data position.
1393      PAT -- the saved pattern position.
1394      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1395      REGSTART, REGEND -- arrays of string positions.
1396      REG_INFO -- array of information about each subexpression.
1397 
1398    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1399    `pend', `string1', `size1', `string2', and `size2'.	*/
1400 
1401 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1402 {									\
1403   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1404   int this_reg;								\
1405   const unsigned char *string_temp;					\
1406 									\
1407   assert (!FAIL_STACK_EMPTY ());					\
1408 									\
1409   /* Remove failure points and point to how many regs pushed.  */	\
1410   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1411   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1412   DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1413 									\
1414   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1415 									\
1416   DEBUG_POP (&failure_id);						\
1417   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1418 									\
1419   /* If the saved string location is NULL, it came from an		\
1420      on_failure_keep_string_jump opcode, and we want to throw away the	\
1421      saved NULL, thus retaining our current position in the string.  */	\
1422   string_temp = POP_FAILURE_POINTER ();					\
1423   if (string_temp != NULL)						\
1424     str = (const char *) string_temp;					\
1425 									\
1426   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1427   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1428   DEBUG_PRINT1 ("'\n");							\
1429 									\
1430   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1431   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
1432   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1433 									\
1434   /* Restore register info.  */						\
1435   high_reg = (unsigned) POP_FAILURE_INT ();				\
1436   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1437 									\
1438   low_reg = (unsigned) POP_FAILURE_INT ();				\
1439   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1440 									\
1441   if (1)								\
1442     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1443       {									\
1444 	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
1445 									\
1446 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1447 	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
1448 									\
1449 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1450 	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
1451 									\
1452 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1453 	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
1454       }									\
1455   else									\
1456     {									\
1457       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1458 	{								\
1459 	  reg_info[this_reg].word.integer = 0;				\
1460 	  regend[this_reg] = 0;						\
1461 	  regstart[this_reg] = 0;					\
1462 	}								\
1463       highest_active_reg = high_reg;					\
1464     }									\
1465 									\
1466   set_regs_matched_done = 0;						\
1467   DEBUG_STATEMENT (nfailure_points_popped++);				\
1468 } /* POP_FAILURE_POINT */
1469 
1470 
1471 
1472 /* Structure for per-register (a.k.a. per-group) information.
1473    Other register information, such as the
1474    starting and ending positions (which are addresses), and the list of
1475    inner groups (which is a bits list) are maintained in separate
1476    variables.
1477 
1478    We are making a (strictly speaking) nonportable assumption here: that
1479    the compiler will pack our bit fields into something that fits into
1480    the type of `word', i.e., is something that fits into one item on the
1481    failure stack.  */
1482 
1483 typedef union
1484 {
1485   fail_stack_elt_t word;
1486   struct
1487   {
1488       /* This field is one if this group can match the empty string,
1489 	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1490 #define MATCH_NULL_UNSET_VALUE 3
1491     unsigned match_null_string_p : 2;
1492     unsigned is_active : 1;
1493     unsigned matched_something : 1;
1494     unsigned ever_matched_something : 1;
1495   } bits;
1496 } register_info_type;
1497 
1498 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1499 #define IS_ACTIVE(R)  ((R).bits.is_active)
1500 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1501 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1502 
1503 
1504 /* Call this when have matched a real character; it sets `matched' flags
1505    for the subexpressions which we are currently inside.  Also records
1506    that those subexprs have matched.  */
1507 #define SET_REGS_MATCHED()						\
1508   do									\
1509     {									\
1510       if (!set_regs_matched_done)					\
1511 	{								\
1512 	  unsigned r;							\
1513 	  set_regs_matched_done = 1;					\
1514 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1515 	    {								\
1516 	      MATCHED_SOMETHING (reg_info[r])				\
1517 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1518 		= 1;							\
1519 	    }								\
1520 	}								\
1521     }									\
1522   while (0)
1523 
1524 /* Registers are set to a sentinel when they haven't yet matched.  */
1525 static char reg_unset_dummy;
1526 #define REG_UNSET_VALUE (&reg_unset_dummy)
1527 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1528 
1529 /* Subroutine declarations and macros for regex_compile.  */
1530 
1531 static void store_op1 (), store_op2 ();
1532 static void insert_op1 (), insert_op2 ();
1533 static boolean at_begline_loc_p (), at_endline_loc_p ();
1534 static boolean group_in_compile_stack ();
1535 static reg_errcode_t compile_range ();
1536 
1537 /* Fetch the next character in the uncompiled pattern---translating it
1538    if necessary.  Also cast from a signed character in the constant
1539    string passed to us by the user to an unsigned char that we can use
1540    as an array index (in, e.g., `translate').  */
1541 #ifndef PATFETCH
1542 #define PATFETCH(c)							\
1543   do {if (p == pend) return REG_EEND;					\
1544     c = (unsigned char) *p++;						\
1545     if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
1546   } while (0)
1547 #endif
1548 
1549 /* Fetch the next character in the uncompiled pattern, with no
1550    translation.	 */
1551 #define PATFETCH_RAW(c)							\
1552   do {if (p == pend) return REG_EEND;					\
1553     c = (unsigned char) *p++;						\
1554   } while (0)
1555 
1556 /* Go backwards one character in the pattern.  */
1557 #define PATUNFETCH p--
1558 
1559 
1560 /* If `translate' is non-null, return translate[D], else just D.  We
1561    cast the subscript to translate because some data is declared as
1562    `char *', to avoid warnings when a string constant is passed.  But
1563    when we use a character as a subscript we must make it unsigned.  */
1564 #ifndef TRANSLATE
1565 #define TRANSLATE(d) \
1566   (RE_TRANSLATE_P (translate) \
1567    ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
1568 #endif
1569 
1570 
1571 /* Macros for outputting the compiled pattern into `buffer'.  */
1572 
1573 /* If the buffer isn't allocated when it comes in, use this.  */
1574 #define INIT_BUF_SIZE  32
1575 
1576 /* Make sure we have at least N more bytes of space in buffer.	*/
1577 #define GET_BUFFER_SPACE(n)						\
1578     while (b - bufp->buffer + (n) > bufp->allocated)			\
1579       EXTEND_BUFFER ()
1580 
1581 /* Make sure we have one more byte of buffer space and then add C to it.  */
1582 #define BUF_PUSH(c)							\
1583   do {									\
1584     GET_BUFFER_SPACE (1);						\
1585     *b++ = (unsigned char) (c);						\
1586   } while (0)
1587 
1588 
1589 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1590 #define BUF_PUSH_2(c1, c2)						\
1591   do {									\
1592     GET_BUFFER_SPACE (2);						\
1593     *b++ = (unsigned char) (c1);					\
1594     *b++ = (unsigned char) (c2);					\
1595   } while (0)
1596 
1597 
1598 /* As with BUF_PUSH_2, except for three bytes.	*/
1599 #define BUF_PUSH_3(c1, c2, c3)						\
1600   do {									\
1601     GET_BUFFER_SPACE (3);						\
1602     *b++ = (unsigned char) (c1);					\
1603     *b++ = (unsigned char) (c2);					\
1604     *b++ = (unsigned char) (c3);					\
1605   } while (0)
1606 
1607 
1608 /* Store a jump with opcode OP at LOC to location TO.  We store a
1609    relative address offset by the three bytes the jump itself occupies.	 */
1610 #define STORE_JUMP(op, loc, to) \
1611   store_op1 (op, loc, (to) - (loc) - 3)
1612 
1613 /* Likewise, for a two-argument jump.  */
1614 #define STORE_JUMP2(op, loc, to, arg) \
1615   store_op2 (op, loc, (to) - (loc) - 3, arg)
1616 
1617 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
1618 #define INSERT_JUMP(op, loc, to) \
1619   insert_op1 (op, loc, (to) - (loc) - 3, b)
1620 
1621 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1622 #define INSERT_JUMP2(op, loc, to, arg) \
1623   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1624 
1625 
1626 /* This is not an arbitrary limit: the arguments which represent offsets
1627    into the pattern are two bytes long.	 So if 2^16 bytes turns out to
1628    be too small, many things would have to change.  */
1629 #define MAX_BUF_SIZE (1L << 16)
1630 
1631 
1632 /* Extend the buffer by twice its current size via realloc and
1633    reset the pointers that pointed into the old block to point to the
1634    correct places in the new one.  If extending the buffer results in it
1635    being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
1636 #define EXTEND_BUFFER()							\
1637   do {									\
1638     unsigned char *old_buffer = bufp->buffer;				\
1639     if (bufp->allocated == MAX_BUF_SIZE)				\
1640       return REG_ESIZE;							\
1641     bufp->allocated <<= 1;						\
1642     if (bufp->allocated > MAX_BUF_SIZE)					\
1643       bufp->allocated = MAX_BUF_SIZE;					\
1644     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1645     if (bufp->buffer == NULL)						\
1646       return REG_ESPACE;						\
1647     /* If the buffer moved, move all the pointers into it.  */		\
1648     if (old_buffer != bufp->buffer)					\
1649       {									\
1650 	b = (b - old_buffer) + bufp->buffer;				\
1651 	begalt = (begalt - old_buffer) + bufp->buffer;			\
1652 	if (fixup_alt_jump)						\
1653 	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1654 	if (laststart)							\
1655 	  laststart = (laststart - old_buffer) + bufp->buffer;		\
1656 	if (pending_exact)						\
1657 	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1658       }									\
1659   } while (0)
1660 
1661 
1662 /* Since we have one byte reserved for the register number argument to
1663    {start,stop}_memory, the maximum number of groups we can report
1664    things about is what fits in that byte.  */
1665 #define MAX_REGNUM 255
1666 
1667 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1668    ignore the excess.  */
1669 typedef unsigned regnum_t;
1670 
1671 
1672 /* Macros for the compile stack.  */
1673 
1674 /* Since offsets can go either forwards or backwards, this type needs to
1675    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
1676 typedef int pattern_offset_t;
1677 
1678 typedef struct
1679 {
1680   pattern_offset_t begalt_offset;
1681   pattern_offset_t fixup_alt_jump;
1682   pattern_offset_t inner_group_offset;
1683   pattern_offset_t laststart_offset;
1684   regnum_t regnum;
1685 } compile_stack_elt_t;
1686 
1687 
1688 typedef struct
1689 {
1690   compile_stack_elt_t *stack;
1691   unsigned size;
1692   unsigned avail;			/* Offset of next open position.  */
1693 } compile_stack_type;
1694 
1695 
1696 #define INIT_COMPILE_STACK_SIZE 32
1697 
1698 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1699 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1700 
1701 /* The next available element.	*/
1702 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1703 
1704 
1705 /* Structure to manage work area for range table.  */
1706 struct range_table_work_area
1707 {
1708   int *table;			/* actual work area.  */
1709   int allocated;		/* allocated size for work area in bytes.  */
1710   int used;			/* actually used size in words.	 */
1711 };
1712 
1713 /* Make sure that WORK_AREA can hold more N multibyte characters.  */
1714 #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
1715   do {									  \
1716     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
1717       {									  \
1718 	(work_area).allocated += 16 * sizeof (int);			  \
1719 	if ((work_area).table)						  \
1720 	  (work_area).table						  \
1721 	    = (int *) realloc ((work_area).table, (work_area).allocated); \
1722 	else								  \
1723 	  (work_area).table						  \
1724 	    = (int *) malloc ((work_area).allocated);			  \
1725 	if ((work_area).table == 0)					  \
1726 	  FREE_STACK_RETURN (REG_ESPACE);				  \
1727       }									  \
1728   } while (0)
1729 
1730 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
1731 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
1732   do {									\
1733     EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
1734     (work_area).table[(work_area).used++] = (range_start);		\
1735     (work_area).table[(work_area).used++] = (range_end);		\
1736   } while (0)
1737 
1738 /* Free allocated memory for WORK_AREA.	 */
1739 #define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
1740   do {						\
1741     if ((work_area).table)			\
1742       free ((work_area).table);			\
1743   } while (0)
1744 
1745 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
1746 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1747 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1748 
1749 
1750 /* Set the bit for character C in a list.  */
1751 #define SET_LIST_BIT(c)				      \
1752   (b[((unsigned char) (c)) / BYTEWIDTH]		      \
1753    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1754 
1755 
1756 /* Get the next unsigned number in the uncompiled pattern.  */
1757 #define GET_UNSIGNED_NUMBER(num)					\
1758   { if (p != pend)							\
1759      {									\
1760        PATFETCH (c);							\
1761        while (ISDIGIT (c))						\
1762 	 {								\
1763 	   if (num < 0)							\
1764 	      num = 0;							\
1765 	   num = num * 10 + c - '0';					\
1766 	   if (p == pend)						\
1767 	      break;							\
1768 	   PATFETCH (c);						\
1769 	 }								\
1770        }								\
1771     }
1772 
1773 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1774 
1775 #define IS_CHAR_CLASS(string)						\
1776    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1777     || STREQ (string, "lower") || STREQ (string, "digit")		\
1778     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1779     || STREQ (string, "space") || STREQ (string, "print")		\
1780     || STREQ (string, "punct") || STREQ (string, "graph")		\
1781     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1782 
1783 #ifndef MATCH_MAY_ALLOCATE
1784 
1785 /* If we cannot allocate large objects within re_match_2_internal,
1786    we make the fail stack and register vectors global.
1787    The fail stack, we grow to the maximum size when a regexp
1788    is compiled.
1789    The register vectors, we adjust in size each time we
1790    compile a regexp, according to the number of registers it needs.  */
1791 
1792 static fail_stack_type fail_stack;
1793 
1794 /* Size with which the following vectors are currently allocated.
1795    That is so we can make them bigger as needed,
1796    but never make them smaller.	 */
1797 static int regs_allocated_size;
1798 
1799 static const char **	 regstart, **	  regend;
1800 static const char ** old_regstart, ** old_regend;
1801 static const char **best_regstart, **best_regend;
1802 static register_info_type *reg_info;
1803 static const char **reg_dummy;
1804 static register_info_type *reg_info_dummy;
1805 
1806 /* Make the register vectors big enough for NUM_REGS registers,
1807    but don't make them smaller.	 */
1808 
1809 static
regex_grow_registers(num_regs)1810 regex_grow_registers (num_regs)
1811      int num_regs;
1812 {
1813   if (num_regs > regs_allocated_size)
1814     {
1815       RETALLOC_IF (regstart,	 num_regs, const char *);
1816       RETALLOC_IF (regend,	 num_regs, const char *);
1817       RETALLOC_IF (old_regstart, num_regs, const char *);
1818       RETALLOC_IF (old_regend,	 num_regs, const char *);
1819       RETALLOC_IF (best_regstart, num_regs, const char *);
1820       RETALLOC_IF (best_regend,	 num_regs, const char *);
1821       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1822       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1823       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1824 
1825       regs_allocated_size = num_regs;
1826     }
1827 }
1828 
1829 #endif /* not MATCH_MAY_ALLOCATE */
1830 
1831 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1832    Returns one of error codes defined in `regex.h', or zero for success.
1833 
1834    Assumes the `allocated' (and perhaps `buffer') and `translate'
1835    fields are set in BUFP on entry.
1836 
1837    If it succeeds, results are put in BUFP (if it returns an error, the
1838    contents of BUFP are undefined):
1839      `buffer' is the compiled pattern;
1840      `syntax' is set to SYNTAX;
1841      `used' is set to the length of the compiled pattern;
1842      `fastmap_accurate' is zero;
1843      `re_nsub' is the number of subexpressions in PATTERN;
1844      `not_bol' and `not_eol' are zero;
1845 
1846    The `fastmap' and `newline_anchor' fields are neither
1847    examined nor set.  */
1848 
1849 /* Return, freeing storage we allocated.  */
1850 #define FREE_STACK_RETURN(value)		\
1851   do {							\
1852     FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
1853     free (compile_stack.stack);				\
1854     return value;					\
1855   } while (0)
1856 
1857 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1858 regex_compile (pattern, size, syntax, bufp)
1859      const char *pattern;
1860      int size;
1861      reg_syntax_t syntax;
1862      struct re_pattern_buffer *bufp;
1863 {
1864   /* We fetch characters from PATTERN here.  Even though PATTERN is
1865      `char *' (i.e., signed), we declare these variables as unsigned, so
1866      they can be reliably used as array indices.  */
1867   register unsigned int c, c1;
1868 
1869   /* A random temporary spot in PATTERN.  */
1870   const char *p1;
1871 
1872   /* Points to the end of the buffer, where we should append.  */
1873   register unsigned char *b;
1874 
1875   /* Keeps track of unclosed groups.  */
1876   compile_stack_type compile_stack;
1877 
1878   /* Points to the current (ending) position in the pattern.  */
1879 #ifdef AIX
1880   /* `const' makes AIX compiler fail.  */
1881   char *p = pattern;
1882 #else
1883   const char *p = pattern;
1884 #endif
1885   const char *pend = pattern + size;
1886 
1887   /* How to translate the characters in the pattern.  */
1888   RE_TRANSLATE_TYPE translate = bufp->translate;
1889 
1890   /* Address of the count-byte of the most recently inserted `exactn'
1891      command.  This makes it possible to tell if a new exact-match
1892      character can be added to that command or if the character requires
1893      a new `exactn' command.  */
1894   unsigned char *pending_exact = 0;
1895 
1896   /* Address of start of the most recently finished expression.
1897      This tells, e.g., postfix * where to find the start of its
1898      operand.  Reset at the beginning of groups and alternatives.  */
1899   unsigned char *laststart = 0;
1900 
1901   /* Address of beginning of regexp, or inside of last group.  */
1902   unsigned char *begalt;
1903 
1904   /* Place in the uncompiled pattern (i.e., the {) to
1905      which to go back if the interval is invalid.  */
1906   const char *beg_interval;
1907 
1908   /* Address of the place where a forward jump should go to the end of
1909      the containing expression.	 Each alternative of an `or' -- except the
1910      last -- ends with a forward jump of this sort.  */
1911   unsigned char *fixup_alt_jump = 0;
1912 
1913   /* Counts open-groups as they are encountered.  Remembered for the
1914      matching close-group on the compile stack, so the same register
1915      number is put in the stop_memory as the start_memory.  */
1916   regnum_t regnum = 0;
1917 
1918   /* Work area for range table of charset.  */
1919   struct range_table_work_area range_table_work;
1920 
1921 #ifdef DEBUG
1922   DEBUG_PRINT1 ("\nCompiling pattern: ");
1923   if (debug)
1924     {
1925       unsigned debug_count;
1926 
1927       for (debug_count = 0; debug_count < size; debug_count++)
1928 	putchar (pattern[debug_count]);
1929       putchar ('\n');
1930     }
1931 #endif /* DEBUG */
1932 
1933   /* Initialize the compile stack.  */
1934   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1935   if (compile_stack.stack == NULL)
1936     return REG_ESPACE;
1937 
1938   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1939   compile_stack.avail = 0;
1940 
1941   range_table_work.table = 0;
1942   range_table_work.allocated = 0;
1943 
1944   /* Initialize the pattern buffer.  */
1945   bufp->syntax = syntax;
1946   bufp->fastmap_accurate = 0;
1947   bufp->not_bol = bufp->not_eol = 0;
1948 
1949   /* Set `used' to zero, so that if we return an error, the pattern
1950      printer (for debugging) will think there's no pattern.  We reset it
1951      at the end.  */
1952   bufp->used = 0;
1953 
1954   /* Always count groups, whether or not bufp->no_sub is set.  */
1955   bufp->re_nsub = 0;
1956 
1957 #ifdef emacs
1958   /* bufp->multibyte is set before regex_compile is called, so don't alter
1959      it. */
1960 #else  /* not emacs */
1961   /* Nothing is recognized as a multibyte character.  */
1962   bufp->multibyte = 0;
1963 #endif
1964 
1965 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1966   /* Initialize the syntax table.  */
1967    init_syntax_once ();
1968 #endif
1969 
1970   if (bufp->allocated == 0)
1971     {
1972       if (bufp->buffer)
1973 	{ /* If zero allocated, but buffer is non-null, try to realloc
1974 	     enough space.  This loses if buffer's address is bogus, but
1975 	     that is the user's responsibility.	 */
1976 	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1977 	}
1978       else
1979 	{ /* Caller did not allocate a buffer.	Do it for them.	 */
1980 	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1981 	}
1982       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1983 
1984       bufp->allocated = INIT_BUF_SIZE;
1985     }
1986 
1987   begalt = b = bufp->buffer;
1988 
1989   /* Loop through the uncompiled pattern until we're at the end.  */
1990   while (p != pend)
1991     {
1992       PATFETCH (c);
1993 
1994       switch (c)
1995 	{
1996 	case '^':
1997 	  {
1998 	    if (   /* If at start of pattern, it's an operator.	 */
1999 		   p == pattern + 1
2000 		   /* If context independent, it's an operator.	 */
2001 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2002 		   /* Otherwise, depends on what's come before.	 */
2003 		|| at_begline_loc_p (pattern, p, syntax))
2004 	      BUF_PUSH (begline);
2005 	    else
2006 	      goto normal_char;
2007 	  }
2008 	  break;
2009 
2010 
2011 	case '$':
2012 	  {
2013 	    if (   /* If at end of pattern, it's an operator.  */
2014 		   p == pend
2015 		   /* If context independent, it's an operator.	 */
2016 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2017 		   /* Otherwise, depends on what's next.  */
2018 		|| at_endline_loc_p (p, pend, syntax))
2019 	       BUF_PUSH (endline);
2020 	     else
2021 	       goto normal_char;
2022 	   }
2023 	   break;
2024 
2025 
2026 	case '+':
2027 	case '?':
2028 	  if ((syntax & RE_BK_PLUS_QM)
2029 	      || (syntax & RE_LIMITED_OPS))
2030 	    goto normal_char;
2031 	handle_plus:
2032 	case '*':
2033 	  /* If there is no previous pattern... */
2034 	  if (!laststart)
2035 	    {
2036 	      if (syntax & RE_CONTEXT_INVALID_OPS)
2037 		FREE_STACK_RETURN (REG_BADRPT);
2038 	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2039 		goto normal_char;
2040 	    }
2041 
2042 	  {
2043 	    /* Are we optimizing this jump?  */
2044 	    boolean keep_string_p = false;
2045 
2046 	    /* 1 means zero (many) matches is allowed.	*/
2047 	    char zero_times_ok = 0, many_times_ok = 0;
2048 
2049 	    /* If there is a sequence of repetition chars, collapse it
2050 	       down to just one (the right one).  We can't combine
2051 	       interval operators with these because of, e.g., `a{2}*',
2052 	       which should only match an even number of `a's.	*/
2053 
2054 	    for (;;)
2055 	      {
2056 		zero_times_ok |= c != '+';
2057 		many_times_ok |= c != '?';
2058 
2059 		if (p == pend)
2060 		  break;
2061 
2062 		PATFETCH (c);
2063 
2064 		if (c == '*'
2065 		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2066 		  ;
2067 
2068 		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
2069 		  {
2070 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2071 
2072 		    PATFETCH (c1);
2073 		    if (!(c1 == '+' || c1 == '?'))
2074 		      {
2075 			PATUNFETCH;
2076 			PATUNFETCH;
2077 			break;
2078 		      }
2079 
2080 		    c = c1;
2081 		  }
2082 		else
2083 		  {
2084 		    PATUNFETCH;
2085 		    break;
2086 		  }
2087 
2088 		/* If we get here, we found another repeat character.  */
2089 	       }
2090 
2091 	    /* Star, etc. applied to an empty pattern is equivalent
2092 	       to an empty pattern.  */
2093 	    if (!laststart)
2094 	      break;
2095 
2096 	    /* Now we know whether or not zero matches is allowed
2097 	       and also whether or not two or more matches is allowed.	*/
2098 	    if (many_times_ok)
2099 	      { /* More than one repetition is allowed, so put in at the
2100 		   end a backward relative jump from `b' to before the next
2101 		   jump we're going to put in below (which jumps from
2102 		   laststart to after this jump).
2103 
2104 		   But if we are at the `*' in the exact sequence `.*\n',
2105 		   insert an unconditional jump backwards to the .,
2106 		   instead of the beginning of the loop.  This way we only
2107 		   push a failure point once, instead of every time
2108 		   through the loop.  */
2109 		assert (p - 1 > pattern);
2110 
2111 		/* Allocate the space for the jump.  */
2112 		GET_BUFFER_SPACE (3);
2113 
2114 		/* We know we are not at the first character of the pattern,
2115 		   because laststart was nonzero.  And we've already
2116 		   incremented `p', by the way, to be the character after
2117 		   the `*'.  Do we have to do something analogous here
2118 		   for null bytes, because of RE_DOT_NOT_NULL?	*/
2119 		if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
2120 		    && zero_times_ok
2121 		    && p < pend
2122 		    && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
2123 		    && !(syntax & RE_DOT_NEWLINE))
2124 		  { /* We have .*\n.  */
2125 		    STORE_JUMP (jump, b, laststart);
2126 		    keep_string_p = true;
2127 		  }
2128 		else
2129 		  /* Anything else.  */
2130 		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2131 
2132 		/* We've added more stuff to the buffer.  */
2133 		b += 3;
2134 	      }
2135 
2136 	    /* On failure, jump from laststart to b + 3, which will be the
2137 	       end of the buffer after this jump is inserted.  */
2138 	    GET_BUFFER_SPACE (3);
2139 	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2140 				       : on_failure_jump,
2141 			 laststart, b + 3);
2142 	    pending_exact = 0;
2143 	    b += 3;
2144 
2145 	    if (!zero_times_ok)
2146 	      {
2147 		/* At least one repetition is required, so insert a
2148 		   `dummy_failure_jump' before the initial
2149 		   `on_failure_jump' instruction of the loop. This
2150 		   effects a skip over that instruction the first time
2151 		   we hit that loop.  */
2152 		GET_BUFFER_SPACE (3);
2153 		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2154 		b += 3;
2155 	      }
2156 	    }
2157 	  break;
2158 
2159 
2160 	case '.':
2161 	  laststart = b;
2162 	  BUF_PUSH (anychar);
2163 	  break;
2164 
2165 
2166 	case '[':
2167 	  {
2168 	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2169 
2170 	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2171 
2172 	    /* Ensure that we have enough space to push a charset: the
2173 	       opcode, the length count, and the bitset; 34 bytes in all.  */
2174 	    GET_BUFFER_SPACE (34);
2175 
2176 	    laststart = b;
2177 
2178 	    /* We test `*p == '^' twice, instead of using an if
2179 	       statement, so we only need one BUF_PUSH.	 */
2180 	    BUF_PUSH (*p == '^' ? charset_not : charset);
2181 	    if (*p == '^')
2182 	      p++;
2183 
2184 	    /* Remember the first position in the bracket expression.  */
2185 	    p1 = p;
2186 
2187 	    /* Push the number of bytes in the bitmap.	*/
2188 	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2189 
2190 	    /* Clear the whole map.  */
2191 	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2192 
2193 	    /* charset_not matches newline according to a syntax bit.  */
2194 	    if ((re_opcode_t) b[-2] == charset_not
2195 		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2196 	      SET_LIST_BIT ('\n');
2197 
2198 	    /* Read in characters and ranges, setting map bits.	 */
2199 	    for (;;)
2200 	      {
2201 		int len;
2202 		boolean escaped_char = false;
2203 
2204 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2205 
2206 		PATFETCH (c);
2207 
2208 		/* \ might escape characters inside [...] and [^...].  */
2209 		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2210 		  {
2211 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2212 
2213 		    PATFETCH (c);
2214 		    escaped_char = true;
2215 		  }
2216 		else
2217 		  {
2218 		    /* Could be the end of the bracket expression.	If it's
2219 		       not (i.e., when the bracket expression is `[]' so
2220 		       far), the ']' character bit gets set way below.  */
2221 		    if (c == ']' && p != p1 + 1)
2222 		      break;
2223 		  }
2224 
2225 		/* If C indicates start of multibyte char, get the
2226 		   actual character code in C, and set the pattern
2227 		   pointer P to the next character boundary.  */
2228 		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2229 		  {
2230 		    PATUNFETCH;
2231 		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2232 		    p += len;
2233 		  }
2234 		/* What should we do for the character which is
2235 		   greater than 0x7F, but not BASE_LEADING_CODE_P?
2236 		   XXX */
2237 
2238 		/* See if we're at the beginning of a possible character
2239 		   class.  */
2240 
2241 		else if (!escaped_char &&
2242 			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2243 		  {
2244 		    /* Leave room for the null.	 */
2245 		    char str[CHAR_CLASS_MAX_LENGTH + 1];
2246 
2247 		    PATFETCH (c);
2248 		    c1 = 0;
2249 
2250 		    /* If pattern is `[[:'.  */
2251 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2252 
2253 		    for (;;)
2254 		      {
2255 			PATFETCH (c);
2256 			if (c == ':' || c == ']' || p == pend
2257 			    || c1 == CHAR_CLASS_MAX_LENGTH)
2258 			  break;
2259 			str[c1++] = c;
2260 		      }
2261 		    str[c1] = '\0';
2262 
2263 		    /* If isn't a word bracketed by `[:' and `:]':
2264 		       undo the ending character, the letters, and
2265 		       leave the leading `:' and `[' (but set bits for
2266 		       them).  */
2267 		    if (c == ':' && *p == ']')
2268 		      {
2269 			int ch;
2270 			boolean is_alnum = STREQ (str, "alnum");
2271 			boolean is_alpha = STREQ (str, "alpha");
2272 			boolean is_blank = STREQ (str, "blank");
2273 			boolean is_cntrl = STREQ (str, "cntrl");
2274 			boolean is_digit = STREQ (str, "digit");
2275 			boolean is_graph = STREQ (str, "graph");
2276 			boolean is_lower = STREQ (str, "lower");
2277 			boolean is_print = STREQ (str, "print");
2278 			boolean is_punct = STREQ (str, "punct");
2279 			boolean is_space = STREQ (str, "space");
2280 			boolean is_upper = STREQ (str, "upper");
2281 			boolean is_xdigit = STREQ (str, "xdigit");
2282 
2283 			if (!IS_CHAR_CLASS (str))
2284 			  FREE_STACK_RETURN (REG_ECTYPE);
2285 
2286 			/* Throw away the ] at the end of the character
2287 			   class.  */
2288 			PATFETCH (c);
2289 
2290 			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2291 
2292 			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2293 			  {
2294 			    int translated = TRANSLATE (ch);
2295 			    /* This was split into 3 if's to
2296 			       avoid an arbitrary limit in some compiler.  */
2297 			    if (   (is_alnum  && ISALNUM (ch))
2298 				|| (is_alpha  && ISALPHA (ch))
2299 				|| (is_blank  && ISBLANK (ch))
2300 				|| (is_cntrl  && ISCNTRL (ch)))
2301 			      SET_LIST_BIT (translated);
2302 			    if (   (is_digit  && ISDIGIT (ch))
2303 				|| (is_graph  && ISGRAPH (ch))
2304 				|| (is_lower  && ISLOWER (ch))
2305 				|| (is_print  && ISPRINT (ch)))
2306 			      SET_LIST_BIT (translated);
2307 			    if (   (is_punct  && ISPUNCT (ch))
2308 				|| (is_space  && ISSPACE (ch))
2309 				|| (is_upper  && ISUPPER (ch))
2310 				|| (is_xdigit && ISXDIGIT (ch)))
2311 			      SET_LIST_BIT (translated);
2312 			  }
2313 
2314 			/* Repeat the loop. */
2315 			continue;
2316 		      }
2317 		    else
2318 		      {
2319 			c1++;
2320 			while (c1--)
2321 			  PATUNFETCH;
2322 			SET_LIST_BIT ('[');
2323 
2324 			/* Because the `:' may starts the range, we
2325 			   can't simply set bit and repeat the loop.
2326 			   Instead, just set it to C and handle below.	*/
2327 			c = ':';
2328 		      }
2329 		  }
2330 
2331 		if (p < pend && p[0] == '-' && p[1] != ']')
2332 		  {
2333 
2334 		    /* Discard the `-'. */
2335 		    PATFETCH (c1);
2336 
2337 		    /* Fetch the character which ends the range. */
2338 		    PATFETCH (c1);
2339 		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
2340 		      {
2341 			PATUNFETCH;
2342 			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2343 			p += len;
2344 		      }
2345 
2346 		    if (SINGLE_BYTE_CHAR_P (c)
2347 			&& ! SINGLE_BYTE_CHAR_P (c1))
2348 		      {
2349 			/* Handle a range such as \177-\377 in multibyte mode.
2350 			   Split that into two ranges,,
2351 			   the low one ending at 0237, and the high one
2352 			   starting at ...040.  */
2353 			int c1_base = (c1 & ~0177) | 040;
2354 			SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2355 			c1 = 0237;
2356 		      }
2357 		    else if (!SAME_CHARSET_P (c, c1))
2358 		      FREE_STACK_RETURN (REG_ERANGE);
2359 		  }
2360 		else
2361 		  /* Range from C to C. */
2362 		  c1 = c;
2363 
2364 		/* Set the range ... */
2365 		if (SINGLE_BYTE_CHAR_P (c))
2366 		  /* ... into bitmap.  */
2367 		  {
2368 		    unsigned this_char;
2369 		    int range_start = c, range_end = c1;
2370 
2371 		    /* If the start is after the end, the range is empty.  */
2372 		    if (range_start > range_end)
2373 		      {
2374 			if (syntax & RE_NO_EMPTY_RANGES)
2375 			  FREE_STACK_RETURN (REG_ERANGE);
2376 			/* Else, repeat the loop.  */
2377 		      }
2378 		    else
2379 		      {
2380 			for (this_char = range_start; this_char <= range_end;
2381 			     this_char++)
2382 			  SET_LIST_BIT (TRANSLATE (this_char));
2383 		      }
2384 		  }
2385 		else
2386 		  /* ... into range table.  */
2387 		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2388 	      }
2389 
2390 	    /* Discard any (non)matching list bytes that are all 0 at the
2391 	       end of the map.	Decrease the map-length byte too.  */
2392 	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2393 	      b[-1]--;
2394 	    b += b[-1];
2395 
2396 	    /* Build real range table from work area. */
2397 	    if (RANGE_TABLE_WORK_USED (range_table_work))
2398 	      {
2399 		int i;
2400 		int used = RANGE_TABLE_WORK_USED (range_table_work);
2401 
2402 		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
2403 		   bytes for COUNT and three bytes for each character.	*/
2404 		GET_BUFFER_SPACE (2 + used * 3);
2405 
2406 		/* Indicate the existence of range table.  */
2407 		laststart[1] |= 0x80;
2408 
2409 		STORE_NUMBER_AND_INCR (b, used / 2);
2410 		for (i = 0; i < used; i++)
2411 		  STORE_CHARACTER_AND_INCR
2412 		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2413 	      }
2414 	  }
2415 	  break;
2416 
2417 
2418 	case '(':
2419 	  if (syntax & RE_NO_BK_PARENS)
2420 	    goto handle_open;
2421 	  else
2422 	    goto normal_char;
2423 
2424 
2425 	case ')':
2426 	  if (syntax & RE_NO_BK_PARENS)
2427 	    goto handle_close;
2428 	  else
2429 	    goto normal_char;
2430 
2431 
2432 	case '\n':
2433 	  if (syntax & RE_NEWLINE_ALT)
2434 	    goto handle_alt;
2435 	  else
2436 	    goto normal_char;
2437 
2438 
2439 	case '|':
2440 	  if (syntax & RE_NO_BK_VBAR)
2441 	    goto handle_alt;
2442 	  else
2443 	    goto normal_char;
2444 
2445 
2446 	case '{':
2447 	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2448 	     goto handle_interval;
2449 	   else
2450 	     goto normal_char;
2451 
2452 
2453 	case '\\':
2454 	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2455 
2456 	  /* Do not translate the character after the \, so that we can
2457 	     distinguish, e.g., \B from \b, even if we normally would
2458 	     translate, e.g., B to b.  */
2459 	  PATFETCH_RAW (c);
2460 
2461 	  switch (c)
2462 	    {
2463 	    case '(':
2464 	      if (syntax & RE_NO_BK_PARENS)
2465 		goto normal_backslash;
2466 
2467 	    handle_open:
2468 	      bufp->re_nsub++;
2469 	      regnum++;
2470 
2471 	      if (COMPILE_STACK_FULL)
2472 		{
2473 		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2474 			    compile_stack_elt_t);
2475 		  if (compile_stack.stack == NULL) return REG_ESPACE;
2476 
2477 		  compile_stack.size <<= 1;
2478 		}
2479 
2480 	      /* These are the values to restore when we hit end of this
2481 		 group.	 They are all relative offsets, so that if the
2482 		 whole pattern moves because of realloc, they will still
2483 		 be valid.  */
2484 	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2485 	      COMPILE_STACK_TOP.fixup_alt_jump
2486 		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2487 	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2488 	      COMPILE_STACK_TOP.regnum = regnum;
2489 
2490 	      /* We will eventually replace the 0 with the number of
2491 		 groups inner to this one.  But do not push a
2492 		 start_memory for groups beyond the last one we can
2493 		 represent in the compiled pattern.  */
2494 	      if (regnum <= MAX_REGNUM)
2495 		{
2496 		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2497 		  BUF_PUSH_3 (start_memory, regnum, 0);
2498 		}
2499 
2500 	      compile_stack.avail++;
2501 
2502 	      fixup_alt_jump = 0;
2503 	      laststart = 0;
2504 	      begalt = b;
2505 	      /* If we've reached MAX_REGNUM groups, then this open
2506 		 won't actually generate any code, so we'll have to
2507 		 clear pending_exact explicitly.  */
2508 	      pending_exact = 0;
2509 	      break;
2510 
2511 
2512 	    case ')':
2513 	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2514 
2515 	      if (COMPILE_STACK_EMPTY)
2516 		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2517 		  goto normal_backslash;
2518 		else
2519 		  FREE_STACK_RETURN (REG_ERPAREN);
2520 
2521 	    handle_close:
2522 	      if (fixup_alt_jump)
2523 		{ /* Push a dummy failure point at the end of the
2524 		     alternative for a possible future
2525 		     `pop_failure_jump' to pop.	 See comments at
2526 		     `push_dummy_failure' in `re_match_2'.  */
2527 		  BUF_PUSH (push_dummy_failure);
2528 
2529 		  /* We allocated space for this jump when we assigned
2530 		     to `fixup_alt_jump', in the `handle_alt' case below.  */
2531 		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2532 		}
2533 
2534 	      /* See similar code for backslashed left paren above.  */
2535 	      if (COMPILE_STACK_EMPTY)
2536 		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2537 		  goto normal_char;
2538 		else
2539 		  FREE_STACK_RETURN (REG_ERPAREN);
2540 
2541 	      /* Since we just checked for an empty stack above, this
2542 		 ``can't happen''.  */
2543 	      assert (compile_stack.avail != 0);
2544 	      {
2545 		/* We don't just want to restore into `regnum', because
2546 		   later groups should continue to be numbered higher,
2547 		   as in `(ab)c(de)' -- the second group is #2.	 */
2548 		regnum_t this_group_regnum;
2549 
2550 		compile_stack.avail--;
2551 		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2552 		fixup_alt_jump
2553 		  = COMPILE_STACK_TOP.fixup_alt_jump
2554 		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2555 		    : 0;
2556 		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2557 		this_group_regnum = COMPILE_STACK_TOP.regnum;
2558 		/* If we've reached MAX_REGNUM groups, then this open
2559 		   won't actually generate any code, so we'll have to
2560 		   clear pending_exact explicitly.  */
2561 		pending_exact = 0;
2562 
2563 		/* We're at the end of the group, so now we know how many
2564 		   groups were inside this one.	 */
2565 		if (this_group_regnum <= MAX_REGNUM)
2566 		  {
2567 		    unsigned char *inner_group_loc
2568 		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2569 
2570 		    *inner_group_loc = regnum - this_group_regnum;
2571 		    BUF_PUSH_3 (stop_memory, this_group_regnum,
2572 				regnum - this_group_regnum);
2573 		  }
2574 	      }
2575 	      break;
2576 
2577 
2578 	    case '|':					/* `\|'.  */
2579 	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2580 		goto normal_backslash;
2581 	    handle_alt:
2582 	      if (syntax & RE_LIMITED_OPS)
2583 		goto normal_char;
2584 
2585 	      /* Insert before the previous alternative a jump which
2586 		 jumps to this alternative if the former fails.	 */
2587 	      GET_BUFFER_SPACE (3);
2588 	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
2589 	      pending_exact = 0;
2590 	      b += 3;
2591 
2592 	      /* The alternative before this one has a jump after it
2593 		 which gets executed if it gets matched.  Adjust that
2594 		 jump so it will jump to this alternative's analogous
2595 		 jump (put in below, which in turn will jump to the next
2596 		 (if any) alternative's such jump, etc.).  The last such
2597 		 jump jumps to the correct final destination.  A picture:
2598 			  _____ _____
2599 			  |   | |   |
2600 			  |   v |   v
2601 			 a | b	 | c
2602 
2603 		 If we are at `b', then fixup_alt_jump right now points to a
2604 		 three-byte space after `a'.  We'll put in the jump, set
2605 		 fixup_alt_jump to right after `b', and leave behind three
2606 		 bytes which we'll fill in when we get to after `c'.  */
2607 
2608 	      if (fixup_alt_jump)
2609 		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2610 
2611 	      /* Mark and leave space for a jump after this alternative,
2612 		 to be filled in later either by next alternative or
2613 		 when know we're at the end of a series of alternatives.  */
2614 	      fixup_alt_jump = b;
2615 	      GET_BUFFER_SPACE (3);
2616 	      b += 3;
2617 
2618 	      laststart = 0;
2619 	      begalt = b;
2620 	      break;
2621 
2622 
2623 	    case '{':
2624 	      /* If \{ is a literal.  */
2625 	      if (!(syntax & RE_INTERVALS)
2626 		     /* If we're at `\{' and it's not the open-interval
2627 			operator.  */
2628 		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2629 		  || (p - 2 == pattern	&&  p == pend))
2630 		goto normal_backslash;
2631 
2632 	    handle_interval:
2633 	      {
2634 		/* If got here, then the syntax allows intervals.  */
2635 
2636 		/* At least (most) this many matches must be made.  */
2637 		int lower_bound = -1, upper_bound = -1;
2638 
2639 		beg_interval = p - 1;
2640 
2641 		if (p == pend)
2642 		  {
2643 		    if (syntax & RE_NO_BK_BRACES)
2644 		      goto unfetch_interval;
2645 		    else
2646 		      FREE_STACK_RETURN (REG_EBRACE);
2647 		  }
2648 
2649 		GET_UNSIGNED_NUMBER (lower_bound);
2650 
2651 		if (c == ',')
2652 		  {
2653 		    GET_UNSIGNED_NUMBER (upper_bound);
2654 		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2655 		  }
2656 		else
2657 		  /* Interval such as `{1}' => match exactly once. */
2658 		  upper_bound = lower_bound;
2659 
2660 		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2661 		    || lower_bound > upper_bound)
2662 		  {
2663 		    if (syntax & RE_NO_BK_BRACES)
2664 		      goto unfetch_interval;
2665 		    else
2666 		      FREE_STACK_RETURN (REG_BADBR);
2667 		  }
2668 
2669 		if (!(syntax & RE_NO_BK_BRACES))
2670 		  {
2671 		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2672 
2673 		    PATFETCH (c);
2674 		  }
2675 
2676 		if (c != '}')
2677 		  {
2678 		    if (syntax & RE_NO_BK_BRACES)
2679 		      goto unfetch_interval;
2680 		    else
2681 		      FREE_STACK_RETURN (REG_BADBR);
2682 		  }
2683 
2684 		/* We just parsed a valid interval.  */
2685 
2686 		/* If it's invalid to have no preceding re.  */
2687 		if (!laststart)
2688 		  {
2689 		    if (syntax & RE_CONTEXT_INVALID_OPS)
2690 		      FREE_STACK_RETURN (REG_BADRPT);
2691 		    else if (syntax & RE_CONTEXT_INDEP_OPS)
2692 		      laststart = b;
2693 		    else
2694 		      goto unfetch_interval;
2695 		  }
2696 
2697 		/* If the upper bound is zero, don't want to succeed at
2698 		   all; jump from `laststart' to `b + 3', which will be
2699 		   the end of the buffer after we insert the jump.  */
2700 		 if (upper_bound == 0)
2701 		   {
2702 		     GET_BUFFER_SPACE (3);
2703 		     INSERT_JUMP (jump, laststart, b + 3);
2704 		     b += 3;
2705 		   }
2706 
2707 		 /* Otherwise, we have a nontrivial interval.  When
2708 		    we're all done, the pattern will look like:
2709 		      set_number_at <jump count> <upper bound>
2710 		      set_number_at <succeed_n count> <lower bound>
2711 		      succeed_n <after jump addr> <succeed_n count>
2712 		      <body of loop>
2713 		      jump_n <succeed_n addr> <jump count>
2714 		    (The upper bound and `jump_n' are omitted if
2715 		    `upper_bound' is 1, though.)  */
2716 		 else
2717 		   { /* If the upper bound is > 1, we need to insert
2718 			more at the end of the loop.  */
2719 		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2720 
2721 		     GET_BUFFER_SPACE (nbytes);
2722 
2723 		     /* Initialize lower bound of the `succeed_n', even
2724 			though it will be set during matching by its
2725 			attendant `set_number_at' (inserted next),
2726 			because `re_compile_fastmap' needs to know.
2727 			Jump to the `jump_n' we might insert below.  */
2728 		     INSERT_JUMP2 (succeed_n, laststart,
2729 				   b + 5 + (upper_bound > 1) * 5,
2730 				   lower_bound);
2731 		     b += 5;
2732 
2733 		     /* Code to initialize the lower bound.  Insert
2734 			before the `succeed_n'.	 The `5' is the last two
2735 			bytes of this `set_number_at', plus 3 bytes of
2736 			the following `succeed_n'.  */
2737 		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2738 		     b += 5;
2739 
2740 		     if (upper_bound > 1)
2741 		       { /* More than one repetition is allowed, so
2742 			    append a backward jump to the `succeed_n'
2743 			    that starts this interval.
2744 
2745 			    When we've reached this during matching,
2746 			    we'll have matched the interval once, so
2747 			    jump back only `upper_bound - 1' times.  */
2748 			 STORE_JUMP2 (jump_n, b, laststart + 5,
2749 				      upper_bound - 1);
2750 			 b += 5;
2751 
2752 			 /* The location we want to set is the second
2753 			    parameter of the `jump_n'; that is `b-2' as
2754 			    an absolute address.  `laststart' will be
2755 			    the `set_number_at' we're about to insert;
2756 			    `laststart+3' the number to set, the source
2757 			    for the relative address.  But we are
2758 			    inserting into the middle of the pattern --
2759 			    so everything is getting moved up by 5.
2760 			    Conclusion: (b - 2) - (laststart + 3) + 5,
2761 			    i.e., b - laststart.
2762 
2763 			    We insert this at the beginning of the loop
2764 			    so that if we fail during matching, we'll
2765 			    reinitialize the bounds.  */
2766 			 insert_op2 (set_number_at, laststart, b - laststart,
2767 				     upper_bound - 1, b);
2768 			 b += 5;
2769 		       }
2770 		   }
2771 		pending_exact = 0;
2772 		beg_interval = NULL;
2773 	      }
2774 	      break;
2775 
2776 	    unfetch_interval:
2777 	      /* If an invalid interval, match the characters as literals.  */
2778 	       assert (beg_interval);
2779 	       p = beg_interval;
2780 	       beg_interval = NULL;
2781 
2782 	       /* normal_char and normal_backslash need `c'.  */
2783 	       PATFETCH (c);
2784 
2785 	       if (!(syntax & RE_NO_BK_BRACES))
2786 		 {
2787 		   if (p > pattern  &&	p[-1] == '\\')
2788 		     goto normal_backslash;
2789 		 }
2790 	       goto normal_char;
2791 
2792 #ifdef emacs
2793 	    /* There is no way to specify the before_dot and after_dot
2794 	       operators.  rms says this is ok.	 --karl	 */
2795 	    case '=':
2796 	      BUF_PUSH (at_dot);
2797 	      break;
2798 
2799 	    case 's':
2800 	      laststart = b;
2801 	      PATFETCH (c);
2802 	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2803 	      break;
2804 
2805 	    case 'S':
2806 	      laststart = b;
2807 	      PATFETCH (c);
2808 	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2809 	      break;
2810 
2811 	    case 'c':
2812 	      laststart = b;
2813 	      PATFETCH_RAW (c);
2814 	      BUF_PUSH_2 (categoryspec, c);
2815 	      break;
2816 
2817 	    case 'C':
2818 	      laststart = b;
2819 	      PATFETCH_RAW (c);
2820 	      BUF_PUSH_2 (notcategoryspec, c);
2821 	      break;
2822 #endif /* emacs */
2823 
2824 
2825 	    case 'w':
2826 	      laststart = b;
2827 	      BUF_PUSH (wordchar);
2828 	      break;
2829 
2830 
2831 	    case 'W':
2832 	      laststart = b;
2833 	      BUF_PUSH (notwordchar);
2834 	      break;
2835 
2836 
2837 	    case '<':
2838 	      BUF_PUSH (wordbeg);
2839 	      break;
2840 
2841 	    case '>':
2842 	      BUF_PUSH (wordend);
2843 	      break;
2844 
2845 	    case 'b':
2846 	      BUF_PUSH (wordbound);
2847 	      break;
2848 
2849 	    case 'B':
2850 	      BUF_PUSH (notwordbound);
2851 	      break;
2852 
2853 	    case '`':
2854 	      BUF_PUSH (begbuf);
2855 	      break;
2856 
2857 	    case '\'':
2858 	      BUF_PUSH (endbuf);
2859 	      break;
2860 
2861 	    case '1': case '2': case '3': case '4': case '5':
2862 	    case '6': case '7': case '8': case '9':
2863 	      if (syntax & RE_NO_BK_REFS)
2864 		goto normal_char;
2865 
2866 	      c1 = c - '0';
2867 
2868 	      if (c1 > regnum)
2869 		FREE_STACK_RETURN (REG_ESUBREG);
2870 
2871 	      /* Can't back reference to a subexpression if inside of it.  */
2872 	      if (group_in_compile_stack (compile_stack, c1))
2873 		goto normal_char;
2874 
2875 	      laststart = b;
2876 	      BUF_PUSH_2 (duplicate, c1);
2877 	      break;
2878 
2879 
2880 	    case '+':
2881 	    case '?':
2882 	      if (syntax & RE_BK_PLUS_QM)
2883 		goto handle_plus;
2884 	      else
2885 		goto normal_backslash;
2886 
2887 	    default:
2888 	    normal_backslash:
2889 	      /* You might think it would be useful for \ to mean
2890 		 not to translate; but if we don't translate it
2891 		 it will never match anything.	*/
2892 	      c = TRANSLATE (c);
2893 	      goto normal_char;
2894 	    }
2895 	  break;
2896 
2897 
2898 	default:
2899 	/* Expects the character in `c'.  */
2900 	normal_char:
2901 	  p1 = p - 1;		/* P1 points the head of C.  */
2902 #ifdef emacs
2903 	  if (bufp->multibyte)
2904 	    {
2905 	      c = STRING_CHAR (p1, pend - p1);
2906 	      c = TRANSLATE (c);
2907 	      /* Set P to the next character boundary.  */
2908 	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
2909 	    }
2910 #endif
2911 	      /* If no exactn currently being built.  */
2912 	  if (!pending_exact
2913 
2914 	      /* If last exactn not at current position.  */
2915 	      || pending_exact + *pending_exact + 1 != b
2916 
2917 	      /* We have only one byte following the exactn for the count.  */
2918 	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
2919 
2920 	      /* If followed by a repetition operator.	*/
2921 	      || (p != pend && (*p == '*' || *p == '^'))
2922 	      || ((syntax & RE_BK_PLUS_QM)
2923 		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
2924 		  : p != pend && (*p == '+' || *p == '?'))
2925 	      || ((syntax & RE_INTERVALS)
2926 		  && ((syntax & RE_NO_BK_BRACES)
2927 		      ? p != pend && *p == '{'
2928 		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
2929 	    {
2930 	      /* Start building a new exactn.  */
2931 
2932 	      laststart = b;
2933 
2934 	      BUF_PUSH_2 (exactn, 0);
2935 	      pending_exact = b - 1;
2936 	    }
2937 
2938 #ifdef emacs
2939 	  if (! SINGLE_BYTE_CHAR_P (c))
2940 	    {
2941 	      unsigned char work[4], *str;
2942 	      int i = CHAR_STRING (c, work, str);
2943 	      int j;
2944 	      for (j = 0; j < i; j++)
2945 		{
2946 		  BUF_PUSH (str[j]);
2947 		  (*pending_exact)++;
2948 		}
2949 	    }
2950 	  else
2951 #endif
2952 	    {
2953 	      BUF_PUSH (c);
2954 	      (*pending_exact)++;
2955 	    }
2956 	  break;
2957 	} /* switch (c) */
2958     } /* while p != pend */
2959 
2960 
2961   /* Through the pattern now.  */
2962 
2963   if (fixup_alt_jump)
2964     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2965 
2966   if (!COMPILE_STACK_EMPTY)
2967     FREE_STACK_RETURN (REG_EPAREN);
2968 
2969   /* If we don't want backtracking, force success
2970      the first time we reach the end of the compiled pattern.  */
2971   if (syntax & RE_NO_POSIX_BACKTRACKING)
2972     BUF_PUSH (succeed);
2973 
2974   free (compile_stack.stack);
2975 
2976   /* We have succeeded; set the length of the buffer.  */
2977   bufp->used = b - bufp->buffer;
2978 
2979 #ifdef DEBUG
2980   if (debug)
2981     {
2982       DEBUG_PRINT1 ("\nCompiled pattern: \n");
2983       print_compiled_pattern (bufp);
2984     }
2985 #endif /* DEBUG */
2986 
2987 #ifndef MATCH_MAY_ALLOCATE
2988   /* Initialize the failure stack to the largest possible stack.  This
2989      isn't necessary unless we're trying to avoid calling alloca in
2990      the search and match routines.  */
2991   {
2992     int num_regs = bufp->re_nsub + 1;
2993 
2994     if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
2995       {
2996 	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
2997 
2998 #ifdef emacs
2999 	if (! fail_stack.stack)
3000 	  fail_stack.stack
3001 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
3002 					    * sizeof (fail_stack_elt_t));
3003 	else
3004 	  fail_stack.stack
3005 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3006 					     (fail_stack.size
3007 					      * sizeof (fail_stack_elt_t)));
3008 #else /* not emacs */
3009 	if (! fail_stack.stack)
3010 	  fail_stack.stack
3011 	    = (fail_stack_elt_t *) malloc (fail_stack.size
3012 					   * sizeof (fail_stack_elt_t));
3013 	else
3014 	  fail_stack.stack
3015 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3016 					    (fail_stack.size
3017 					     * sizeof (fail_stack_elt_t)));
3018 #endif /* not emacs */
3019       }
3020 
3021     regex_grow_registers (num_regs);
3022   }
3023 #endif /* not MATCH_MAY_ALLOCATE */
3024 
3025   return REG_NOERROR;
3026 } /* regex_compile */
3027 
3028 /* Subroutines for `regex_compile'.  */
3029 
3030 /* Store OP at LOC followed by two-byte integer parameter ARG.	*/
3031 
3032 static void
store_op1(op,loc,arg)3033 store_op1 (op, loc, arg)
3034     re_opcode_t op;
3035     unsigned char *loc;
3036     int arg;
3037 {
3038   *loc = (unsigned char) op;
3039   STORE_NUMBER (loc + 1, arg);
3040 }
3041 
3042 
3043 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3044 
3045 static void
store_op2(op,loc,arg1,arg2)3046 store_op2 (op, loc, arg1, arg2)
3047     re_opcode_t op;
3048     unsigned char *loc;
3049     int arg1, arg2;
3050 {
3051   *loc = (unsigned char) op;
3052   STORE_NUMBER (loc + 1, arg1);
3053   STORE_NUMBER (loc + 3, arg2);
3054 }
3055 
3056 
3057 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3058    for OP followed by two-byte integer parameter ARG.  */
3059 
3060 static void
insert_op1(op,loc,arg,end)3061 insert_op1 (op, loc, arg, end)
3062     re_opcode_t op;
3063     unsigned char *loc;
3064     int arg;
3065     unsigned char *end;
3066 {
3067   register unsigned char *pfrom = end;
3068   register unsigned char *pto = end + 3;
3069 
3070   while (pfrom != loc)
3071     *--pto = *--pfrom;
3072 
3073   store_op1 (op, loc, arg);
3074 }
3075 
3076 
3077 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3078 
3079 static void
insert_op2(op,loc,arg1,arg2,end)3080 insert_op2 (op, loc, arg1, arg2, end)
3081     re_opcode_t op;
3082     unsigned char *loc;
3083     int arg1, arg2;
3084     unsigned char *end;
3085 {
3086   register unsigned char *pfrom = end;
3087   register unsigned char *pto = end + 5;
3088 
3089   while (pfrom != loc)
3090     *--pto = *--pfrom;
3091 
3092   store_op2 (op, loc, arg1, arg2);
3093 }
3094 
3095 
3096 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3097    after an alternative or a begin-subexpression.  We assume there is at
3098    least one character before the ^.  */
3099 
3100 static boolean
at_begline_loc_p(pattern,p,syntax)3101 at_begline_loc_p (pattern, p, syntax)
3102     const char *pattern, *p;
3103     reg_syntax_t syntax;
3104 {
3105   const char *prev = p - 2;
3106   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3107 
3108   return
3109        /* After a subexpression?  */
3110        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3111        /* After an alternative?	 */
3112     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3113 }
3114 
3115 
3116 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3117    at least one character after the $, i.e., `P < PEND'.  */
3118 
3119 static boolean
at_endline_loc_p(p,pend,syntax)3120 at_endline_loc_p (p, pend, syntax)
3121     const char *p, *pend;
3122     int syntax;
3123 {
3124   const char *next = p;
3125   boolean next_backslash = *next == '\\';
3126   const char *next_next = p + 1 < pend ? p + 1 : 0;
3127 
3128   return
3129        /* Before a subexpression?  */
3130        (syntax & RE_NO_BK_PARENS ? *next == ')'
3131 	: next_backslash && next_next && *next_next == ')')
3132        /* Before an alternative?  */
3133     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3134 	: next_backslash && next_next && *next_next == '|');
3135 }
3136 
3137 
3138 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3139    false if it's not.  */
3140 
3141 static boolean
group_in_compile_stack(compile_stack,regnum)3142 group_in_compile_stack (compile_stack, regnum)
3143     compile_stack_type compile_stack;
3144     regnum_t regnum;
3145 {
3146   int this_element;
3147 
3148   for (this_element = compile_stack.avail - 1;
3149        this_element >= 0;
3150        this_element--)
3151     if (compile_stack.stack[this_element].regnum == regnum)
3152       return true;
3153 
3154   return false;
3155 }
3156 
3157 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3158    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3159    characters can start a string that matches the pattern.  This fastmap
3160    is used by re_search to skip quickly over impossible starting points.
3161 
3162    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3163    area as BUFP->fastmap.
3164 
3165    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3166    the pattern buffer.
3167 
3168    Returns 0 if we succeed, -2 if an internal error.   */
3169 
3170 int
re_compile_fastmap(bufp)3171 re_compile_fastmap (bufp)
3172      struct re_pattern_buffer *bufp;
3173 {
3174   int i, j, k;
3175 #ifdef MATCH_MAY_ALLOCATE
3176   fail_stack_type fail_stack;
3177 #endif
3178 #ifndef REGEX_MALLOC
3179   char *destination;
3180 #endif
3181   /* We don't push any register information onto the failure stack.  */
3182   unsigned num_regs = 0;
3183 
3184   register char *fastmap = bufp->fastmap;
3185   unsigned char *pattern = bufp->buffer;
3186   unsigned long size = bufp->used;
3187   unsigned char *p = pattern;
3188   register unsigned char *pend = pattern + size;
3189 
3190   /* This holds the pointer to the failure stack, when
3191      it is allocated relocatably.  */
3192   fail_stack_elt_t *failure_stack_ptr;
3193 
3194   /* Assume that each path through the pattern can be null until
3195      proven otherwise.	We set this false at the bottom of switch
3196      statement, to which we get only if a particular path doesn't
3197      match the empty string.  */
3198   boolean path_can_be_null = true;
3199 
3200   /* We aren't doing a `succeed_n' to begin with.  */
3201   boolean succeed_n_p = false;
3202 
3203   /* If all elements for base leading-codes in fastmap is set, this
3204      flag is set true.	*/
3205   boolean match_any_multibyte_characters = false;
3206 
3207   /* Maximum code of simple (single byte) character. */
3208   int simple_char_max;
3209 
3210   assert (fastmap != NULL && p != NULL);
3211 
3212   INIT_FAIL_STACK ();
3213   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
3214   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3215   bufp->can_be_null = 0;
3216 
3217   while (1)
3218     {
3219       if (p == pend || *p == succeed)
3220 	{
3221 	  /* We have reached the (effective) end of pattern.  */
3222 	  if (!FAIL_STACK_EMPTY ())
3223 	    {
3224 	      bufp->can_be_null |= path_can_be_null;
3225 
3226 	      /* Reset for next path.  */
3227 	      path_can_be_null = true;
3228 
3229 	      p = fail_stack.stack[--fail_stack.avail].pointer;
3230 
3231 	      continue;
3232 	    }
3233 	  else
3234 	    break;
3235 	}
3236 
3237       /* We should never be about to go beyond the end of the pattern.	*/
3238       assert (p < pend);
3239 
3240       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3241 	{
3242 
3243 	/* I guess the idea here is to simply not bother with a fastmap
3244 	   if a backreference is used, since it's too hard to figure out
3245 	   the fastmap for the corresponding group.  Setting
3246 	   `can_be_null' stops `re_search_2' from using the fastmap, so
3247 	   that is all we do.  */
3248 	case duplicate:
3249 	  bufp->can_be_null = 1;
3250 	  goto done;
3251 
3252 
3253       /* Following are the cases which match a character.  These end
3254 	 with `break'.	*/
3255 
3256 	case exactn:
3257 	  fastmap[p[1]] = 1;
3258 	  break;
3259 
3260 
3261 #ifndef emacs
3262 	case charset:
3263 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3264 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3265 	      fastmap[j] = 1;
3266 	  break;
3267 
3268 
3269 	case charset_not:
3270 	  /* Chars beyond end of map must be allowed.  */
3271 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3272 	    fastmap[j] = 1;
3273 
3274 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3275 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3276 	      fastmap[j] = 1;
3277 	  break;
3278 
3279 
3280 	case wordchar:
3281 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3282 	    if (SYNTAX (j) == Sword)
3283 	      fastmap[j] = 1;
3284 	  break;
3285 
3286 
3287 	case notwordchar:
3288 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3289 	    if (SYNTAX (j) != Sword)
3290 	      fastmap[j] = 1;
3291 	  break;
3292 #else  /* emacs */
3293 	case charset:
3294 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3295 	       j >= 0; j--)
3296 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3297 	      fastmap[j] = 1;
3298 
3299 	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3300 	      && match_any_multibyte_characters == false)
3301 	    {
3302 	      /* Set fastmap[I] 1 where I is a base leading code of each
3303 		 multibyte character in the range table. */
3304 	      int c, count;
3305 
3306 	      /* Make P points the range table. */
3307 	      p += CHARSET_BITMAP_SIZE (&p[-2]);
3308 
3309 	      /* Extract the number of ranges in range table into
3310 		 COUNT.	 */
3311 	      EXTRACT_NUMBER_AND_INCR (count, p);
3312 	      for (; count > 0; count--, p += 2 * 3) /* XXX */
3313 		{
3314 		  /* Extract the start of each range.  */
3315 		  EXTRACT_CHARACTER (c, p);
3316 		  j = CHAR_CHARSET (c);
3317 		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3318 		}
3319 	    }
3320 	  break;
3321 
3322 
3323 	case charset_not:
3324 	  /* Chars beyond end of bitmap are possible matches.
3325 	     All the single-byte codes can occur in multibyte buffers.
3326 	     So any that are not listed in the charset
3327 	     are possible matches, even in multibyte buffers.  */
3328 	  simple_char_max = (1 << BYTEWIDTH);
3329 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3330 	       j < simple_char_max; j++)
3331 	    fastmap[j] = 1;
3332 
3333 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3334 	       j >= 0; j--)
3335 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3336 	      fastmap[j] = 1;
3337 
3338 	  if (bufp->multibyte)
3339 	    /* Any character set can possibly contain a character
3340 	       which doesn't match the specified set of characters.  */
3341 	    {
3342 	    set_fastmap_for_multibyte_characters:
3343 	      if (match_any_multibyte_characters == false)
3344 		{
3345 		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
3346 		    if (BASE_LEADING_CODE_P (j))
3347 		      fastmap[j] = 1;
3348 		  match_any_multibyte_characters = true;
3349 		}
3350 	    }
3351 	  break;
3352 
3353 
3354 	case wordchar:
3355 	  /* All the single-byte codes can occur in multibyte buffers,
3356 	     and they may have word syntax.  So do consider them.  */
3357 	  simple_char_max = (1 << BYTEWIDTH);
3358 	  for (j = 0; j < simple_char_max; j++)
3359 	    if (SYNTAX (j) == Sword)
3360 	      fastmap[j] = 1;
3361 
3362 	  if (bufp->multibyte)
3363 	    /* Any character set can possibly contain a character
3364 	       whose syntax is `Sword'.	 */
3365 	    goto set_fastmap_for_multibyte_characters;
3366 	  break;
3367 
3368 
3369 	case notwordchar:
3370 	  /* All the single-byte codes can occur in multibyte buffers,
3371 	     and they may not have word syntax.  So do consider them.  */
3372 	  simple_char_max = (1 << BYTEWIDTH);
3373 	  for (j = 0; j < simple_char_max; j++)
3374 	    if (SYNTAX (j) != Sword)
3375 	      fastmap[j] = 1;
3376 
3377 	  if (bufp->multibyte)
3378 	    /* Any character set can possibly contain a character
3379 	       whose syntax is not `Sword'.  */
3380 	    goto set_fastmap_for_multibyte_characters;
3381 	  break;
3382 #endif
3383 
3384 	case anychar:
3385 	  {
3386 	    int fastmap_newline = fastmap['\n'];
3387 
3388 	    /* `.' matches anything, except perhaps newline.
3389 	       Even in a multibyte buffer, it should match any
3390 	       conceivable byte value for the fastmap.  */
3391 	    if (bufp->multibyte)
3392 	      match_any_multibyte_characters = true;
3393 
3394 	    simple_char_max = (1 << BYTEWIDTH);
3395 	    for (j = 0; j < simple_char_max; j++)
3396 	      fastmap[j] = 1;
3397 
3398 	    /* ... except perhaps newline.  */
3399 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3400 	      fastmap['\n'] = fastmap_newline;
3401 
3402 	    /* Return if we have already set `can_be_null'; if we have,
3403 	       then the fastmap is irrelevant.	Something's wrong here.	 */
3404 	    else if (bufp->can_be_null)
3405 	      goto done;
3406 
3407 	    /* Otherwise, have to check alternative paths.  */
3408 	    break;
3409 	  }
3410 
3411 #ifdef emacs
3412 	case wordbound:
3413 	case notwordbound:
3414 	case wordbeg:
3415 	case wordend:
3416 	case notsyntaxspec:
3417 	case syntaxspec:
3418 	  /* This match depends on text properties.  These end with
3419 	     aborting optimizations.  */
3420 	  bufp->can_be_null = 1;
3421 	  goto done;
3422 #if 0
3423 	  k = *p++;
3424 	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3425 	  for (j = 0; j < simple_char_max; j++)
3426 	    if (SYNTAX (j) == (enum syntaxcode) k)
3427 	      fastmap[j] = 1;
3428 
3429 	  if (bufp->multibyte)
3430 	    /* Any character set can possibly contain a character
3431 	       whose syntax is K.  */
3432 	    goto set_fastmap_for_multibyte_characters;
3433 	  break;
3434 
3435 	case notsyntaxspec:
3436 	  k = *p++;
3437 	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3438 	  for (j = 0; j < simple_char_max; j++)
3439 	    if (SYNTAX (j) != (enum syntaxcode) k)
3440 	      fastmap[j] = 1;
3441 
3442 	  if (bufp->multibyte)
3443 	    /* Any character set can possibly contain a character
3444 	       whose syntax is not K.  */
3445 	    goto set_fastmap_for_multibyte_characters;
3446 	  break;
3447 #endif
3448 
3449 
3450 	case categoryspec:
3451 	  k = *p++;
3452 	  simple_char_max = (1 << BYTEWIDTH);
3453 	  for (j = 0; j < simple_char_max; j++)
3454 	    if (CHAR_HAS_CATEGORY (j, k))
3455 	      fastmap[j] = 1;
3456 
3457 	  if (bufp->multibyte)
3458 	    /* Any character set can possibly contain a character
3459 	       whose category is K.  */
3460 	    goto set_fastmap_for_multibyte_characters;
3461 	  break;
3462 
3463 
3464 	case notcategoryspec:
3465 	  k = *p++;
3466 	  simple_char_max = (1 << BYTEWIDTH);
3467 	  for (j = 0; j < simple_char_max; j++)
3468 	    if (!CHAR_HAS_CATEGORY (j, k))
3469 	      fastmap[j] = 1;
3470 
3471 	  if (bufp->multibyte)
3472 	    /* Any character set can possibly contain a character
3473 	       whose category is not K.	 */
3474 	    goto set_fastmap_for_multibyte_characters;
3475 	  break;
3476 
3477       /* All cases after this match the empty string.  These end with
3478 	 `continue'.  */
3479 
3480 
3481 	case before_dot:
3482 	case at_dot:
3483 	case after_dot:
3484 	  continue;
3485 #endif /* emacs */
3486 
3487 
3488 	case no_op:
3489 	case begline:
3490 	case endline:
3491 	case begbuf:
3492 	case endbuf:
3493 #ifndef emacs
3494 	case wordbound:
3495 	case notwordbound:
3496 	case wordbeg:
3497 	case wordend:
3498 #endif
3499 	case push_dummy_failure:
3500 	  continue;
3501 
3502 
3503 	case jump_n:
3504 	case pop_failure_jump:
3505 	case maybe_pop_jump:
3506 	case jump:
3507 	case jump_past_alt:
3508 	case dummy_failure_jump:
3509 	  EXTRACT_NUMBER_AND_INCR (j, p);
3510 	  p += j;
3511 	  if (j > 0)
3512 	    continue;
3513 
3514 	  /* Jump backward implies we just went through the body of a
3515 	     loop and matched nothing.	Opcode jumped to should be
3516 	     `on_failure_jump' or `succeed_n'.	Just treat it like an
3517 	     ordinary jump.  For a * loop, it has pushed its failure
3518 	     point already; if so, discard that as redundant.  */
3519 	  if ((re_opcode_t) *p != on_failure_jump
3520 	      && (re_opcode_t) *p != succeed_n)
3521 	    continue;
3522 
3523 	  p++;
3524 	  EXTRACT_NUMBER_AND_INCR (j, p);
3525 	  p += j;
3526 
3527 	  /* If what's on the stack is where we are now, pop it.  */
3528 	  if (!FAIL_STACK_EMPTY ()
3529 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3530 	    fail_stack.avail--;
3531 
3532 	  continue;
3533 
3534 
3535 	case on_failure_jump:
3536 	case on_failure_keep_string_jump:
3537 	handle_on_failure_jump:
3538 	  EXTRACT_NUMBER_AND_INCR (j, p);
3539 
3540 	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3541 	     end of the pattern.  We don't want to push such a point,
3542 	     since when we restore it above, entering the switch will
3543 	     increment `p' past the end of the pattern.	 We don't need
3544 	     to push such a point since we obviously won't find any more
3545 	     fastmap entries beyond `pend'.  Such a pattern can match
3546 	     the null string, though.  */
3547 	  if (p + j < pend)
3548 	    {
3549 	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
3550 		{
3551 		  RESET_FAIL_STACK ();
3552 		  return -2;
3553 		}
3554 	    }
3555 	  else
3556 	    bufp->can_be_null = 1;
3557 
3558 	  if (succeed_n_p)
3559 	    {
3560 	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
3561 	      succeed_n_p = false;
3562 	    }
3563 
3564 	  continue;
3565 
3566 
3567 	case succeed_n:
3568 	  /* Get to the number of times to succeed.  */
3569 	  p += 2;
3570 
3571 	  /* Increment p past the n for when k != 0.  */
3572 	  EXTRACT_NUMBER_AND_INCR (k, p);
3573 	  if (k == 0)
3574 	    {
3575 	      p -= 4;
3576 	      succeed_n_p = true;  /* Spaghetti code alert.  */
3577 	      goto handle_on_failure_jump;
3578 	    }
3579 	  continue;
3580 
3581 
3582 	case set_number_at:
3583 	  p += 4;
3584 	  continue;
3585 
3586 
3587 	case start_memory:
3588 	case stop_memory:
3589 	  p += 2;
3590 	  continue;
3591 
3592 
3593 	default:
3594 	  abort (); /* We have listed all the cases.  */
3595 	} /* switch *p++ */
3596 
3597       /* Getting here means we have found the possible starting
3598 	 characters for one path of the pattern -- and that the empty
3599 	 string does not match.	 We need not follow this path further.
3600 	 Instead, look at the next alternative (remembered on the
3601 	 stack), or quit if no more.  The test at the top of the loop
3602 	 does these things.  */
3603       path_can_be_null = false;
3604       p = pend;
3605     } /* while p */
3606 
3607   /* Set `can_be_null' for the last path (also the first path, if the
3608      pattern is empty).	 */
3609   bufp->can_be_null |= path_can_be_null;
3610 
3611  done:
3612   RESET_FAIL_STACK ();
3613   return 0;
3614 } /* re_compile_fastmap */
3615 
3616 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3617    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3618    this memory for recording register information.  STARTS and ENDS
3619    must be allocated using the malloc library routine, and must each
3620    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3621 
3622    If NUM_REGS == 0, then subsequent matches should allocate their own
3623    register data.
3624 
3625    Unless this function is called, the first search or match using
3626    PATTERN_BUFFER will allocate its own register data, without
3627    freeing the old data.  */
3628 
3629 void
re_set_registers(bufp,regs,num_regs,starts,ends)3630 re_set_registers (bufp, regs, num_regs, starts, ends)
3631     struct re_pattern_buffer *bufp;
3632     struct re_registers *regs;
3633     unsigned num_regs;
3634     regoff_t *starts, *ends;
3635 {
3636   if (num_regs)
3637     {
3638       bufp->regs_allocated = REGS_REALLOCATE;
3639       regs->num_regs = num_regs;
3640       regs->start = starts;
3641       regs->end = ends;
3642     }
3643   else
3644     {
3645       bufp->regs_allocated = REGS_UNALLOCATED;
3646       regs->num_regs = 0;
3647       regs->start = regs->end = (regoff_t *) 0;
3648     }
3649 }
3650 
3651 /* Searching routines.	*/
3652 
3653 /* Like re_search_2, below, but only one string is specified, and
3654    doesn't let you say where to stop matching. */
3655 
3656 int
re_search(bufp,string,size,startpos,range,regs)3657 re_search (bufp, string, size, startpos, range, regs)
3658      struct re_pattern_buffer *bufp;
3659      const char *string;
3660      int size, startpos, range;
3661      struct re_registers *regs;
3662 {
3663   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3664 		      regs, size);
3665 }
3666 
3667 /* End address of virtual concatenation of string.  */
3668 #define STOP_ADDR_VSTRING(P)				\
3669   (((P) >= size1 ? string2 + size2 : string1 + size1))
3670 
3671 /* Address of POS in the concatenation of virtual string. */
3672 #define POS_ADDR_VSTRING(POS)					\
3673   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3674 
3675 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3676    virtual concatenation of STRING1 and STRING2, starting first at index
3677    STARTPOS, then at STARTPOS + 1, and so on.
3678 
3679    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3680 
3681    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3682    only at STARTPOS; in general, the last start tried is STARTPOS +
3683    RANGE.
3684 
3685    In REGS, return the indices of the virtual concatenation of STRING1
3686    and STRING2 that matched the entire BUFP->buffer and its contained
3687    subexpressions.
3688 
3689    Do not consider matching one past the index STOP in the virtual
3690    concatenation of STRING1 and STRING2.
3691 
3692    We return either the position in the strings at which the match was
3693    found, -1 if no match, or -2 if error (such as failure
3694    stack overflow).  */
3695 
3696 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)3697 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3698      struct re_pattern_buffer *bufp;
3699      const char *string1, *string2;
3700      int size1, size2;
3701      int startpos;
3702      int range;
3703      struct re_registers *regs;
3704      int stop;
3705 {
3706   int val;
3707   register char *fastmap = bufp->fastmap;
3708   register RE_TRANSLATE_TYPE translate = bufp->translate;
3709   int total_size = size1 + size2;
3710   int endpos = startpos + range;
3711   int anchored_start = 0;
3712 
3713   /* Nonzero if we have to concern multibyte character.	 */
3714   int multibyte = bufp->multibyte;
3715 
3716   /* Check for out-of-range STARTPOS.  */
3717   if (startpos < 0 || startpos > total_size)
3718     return -1;
3719 
3720   /* Fix up RANGE if it might eventually take us outside
3721      the virtual concatenation of STRING1 and STRING2.
3722      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3723   if (endpos < 0)
3724     range = 0 - startpos;
3725   else if (endpos > total_size)
3726     range = total_size - startpos;
3727 
3728   /* If the search isn't to be a backwards one, don't waste time in a
3729      search for a pattern anchored at beginning of buffer.  */
3730   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3731     {
3732       if (startpos > 0)
3733 	return -1;
3734       else
3735 	range = 0;
3736     }
3737 
3738 #ifdef emacs
3739   /* In a forward search for something that starts with \=.
3740      don't keep searching past point.  */
3741   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3742     {
3743       range = PT_BYTE - BEGV_BYTE - startpos;
3744       if (range < 0)
3745 	return -1;
3746     }
3747 #endif /* emacs */
3748 
3749   /* Update the fastmap now if not correct already.  */
3750   if (fastmap && !bufp->fastmap_accurate)
3751     if (re_compile_fastmap (bufp) == -2)
3752       return -2;
3753 
3754   /* See whether the pattern is anchored.  */
3755   if (bufp->buffer[0] == begline)
3756     anchored_start = 1;
3757 
3758 #ifdef emacs
3759   gl_state.object = re_match_object;
3760   {
3761     int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
3762     int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
3763 
3764     SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3765   }
3766 #endif
3767 
3768   /* Loop through the string, looking for a place to start matching.  */
3769   for (;;)
3770     {
3771       /* If the pattern is anchored,
3772 	 skip quickly past places we cannot match.
3773 	 We don't bother to treat startpos == 0 specially
3774 	 because that case doesn't repeat.  */
3775       if (anchored_start && startpos > 0)
3776 	{
3777 	  if (! (bufp->newline_anchor
3778 		 && ((startpos <= size1 ? string1[startpos - 1]
3779 		      : string2[startpos - size1 - 1])
3780 		     == '\n')))
3781 	    goto advance;
3782 	}
3783 
3784       /* If a fastmap is supplied, skip quickly over characters that
3785 	 cannot be the start of a match.  If the pattern can match the
3786 	 null string, however, we don't need to skip characters; we want
3787 	 the first null string.	 */
3788       if (fastmap && startpos < total_size && !bufp->can_be_null)
3789 	{
3790 	  register const char *d;
3791 	  register unsigned int buf_ch;
3792 
3793 	  d = POS_ADDR_VSTRING (startpos);
3794 
3795 	  if (range > 0)	/* Searching forwards.	*/
3796 	    {
3797 	      register int lim = 0;
3798 	      int irange = range;
3799 
3800 	      if (startpos < size1 && startpos + range >= size1)
3801 		lim = range - (size1 - startpos);
3802 
3803 	      /* Written out as an if-else to avoid testing `translate'
3804 		 inside the loop.  */
3805 	      if (RE_TRANSLATE_P (translate))
3806 		{
3807 		  if (multibyte)
3808 		    while (range > lim)
3809 		      {
3810 			int buf_charlen;
3811 
3812 			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3813 							 buf_charlen);
3814 
3815 			buf_ch = RE_TRANSLATE (translate, buf_ch);
3816 			if (buf_ch >= 0400
3817 			    || fastmap[buf_ch])
3818 			  break;
3819 
3820 			range -= buf_charlen;
3821 			d += buf_charlen;
3822 		      }
3823 		  else
3824 		    while (range > lim
3825 			   && !fastmap[(unsigned char)
3826 				       RE_TRANSLATE (translate, (unsigned char) *d)])
3827 		      {
3828 			d++;
3829 			range--;
3830 		      }
3831 		}
3832 	      else
3833 		while (range > lim && !fastmap[(unsigned char) *d])
3834 		  {
3835 		    d++;
3836 		    range--;
3837 		  }
3838 
3839 	      startpos += irange - range;
3840 	    }
3841 	  else				/* Searching backwards.	 */
3842 	    {
3843 	      int room = (size1 == 0 || startpos >= size1
3844 			  ? size2 + size1 - startpos
3845 			  : size1 - startpos);
3846 
3847 	      buf_ch = STRING_CHAR (d, room);
3848 	      if (RE_TRANSLATE_P (translate))
3849 		buf_ch = RE_TRANSLATE (translate, buf_ch);
3850 
3851 	      if (! (buf_ch >= 0400
3852 		     || fastmap[buf_ch]))
3853 		goto advance;
3854 	    }
3855 	}
3856 
3857       /* If can't match the null string, and that's all we have left, fail.  */
3858       if (range >= 0 && startpos == total_size && fastmap
3859 	  && !bufp->can_be_null)
3860 	return -1;
3861 
3862       val = re_match_2_internal (bufp, string1, size1, string2, size2,
3863 				 startpos, regs, stop);
3864 #ifndef REGEX_MALLOC
3865 #ifdef C_ALLOCA
3866       alloca (0);
3867 #endif
3868 #endif
3869 
3870       if (val >= 0)
3871 	return startpos;
3872 
3873       if (val == -2)
3874 	return -2;
3875 
3876     advance:
3877       if (!range)
3878 	break;
3879       else if (range > 0)
3880 	{
3881 	  /* Update STARTPOS to the next character boundary.  */
3882 	  if (multibyte)
3883 	    {
3884 	      const unsigned char *p
3885 		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
3886 	      const unsigned char *pend
3887 		= (const unsigned char *) STOP_ADDR_VSTRING (startpos);
3888 	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
3889 
3890 	      range -= len;
3891 	      if (range < 0)
3892 		break;
3893 	      startpos += len;
3894 	    }
3895 	  else
3896 	    {
3897 	      range--;
3898 	      startpos++;
3899 	    }
3900 	}
3901       else
3902 	{
3903 	  range++;
3904 	  startpos--;
3905 
3906 	  /* Update STARTPOS to the previous character boundary.  */
3907 	  if (multibyte)
3908 	    {
3909 	      const unsigned char *p
3910 		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
3911 	      int len = 0;
3912 
3913 	      /* Find the head of multibyte form.  */
3914 	      while (!CHAR_HEAD_P (*p))
3915 		p--, len++;
3916 
3917 	      /* Adjust it. */
3918 #if 0				/* XXX */
3919 	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
3920 		;
3921 	      else
3922 #endif
3923 		{
3924 		  range += len;
3925 		  if (range > 0)
3926 		    break;
3927 
3928 		  startpos -= len;
3929 		}
3930 	    }
3931 	}
3932     }
3933   return -1;
3934 } /* re_search_2 */
3935 
3936 /* Declarations and macros for re_match_2.  */
3937 
3938 static int bcmp_translate ();
3939 static boolean alt_match_null_string_p (),
3940 	       common_op_match_null_string_p (),
3941 	       group_match_null_string_p ();
3942 
3943 /* This converts PTR, a pointer into one of the search strings `string1'
3944    and `string2' into an offset from the beginning of that string.  */
3945 #define POINTER_TO_OFFSET(ptr)			\
3946   (FIRST_STRING_P (ptr)				\
3947    ? ((regoff_t) ((ptr) - string1))		\
3948    : ((regoff_t) ((ptr) - string2 + size1)))
3949 
3950 /* Macros for dealing with the split strings in re_match_2.  */
3951 
3952 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3953 
3954 /* Call before fetching a character with *d.  This switches over to
3955    string2 if necessary.  */
3956 #define PREFETCH()							\
3957   while (d == dend)							\
3958     {									\
3959       /* End of string2 => fail.  */					\
3960       if (dend == end_match_2)						\
3961 	goto fail;							\
3962       /* End of string1 => advance to string2.	*/			\
3963       d = string2;							\
3964       dend = end_match_2;						\
3965     }
3966 
3967 
3968 /* Test if at very beginning or at very end of the virtual concatenation
3969    of `string1' and `string2'.	If only one string, it's `string2'.  */
3970 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3971 #define AT_STRINGS_END(d) ((d) == end2)
3972 
3973 
3974 /* Test if D points to a character which is word-constituent.  We have
3975    two special cases to check for: if past the end of string1, look at
3976    the first character in string2; and if before the beginning of
3977    string2, look at the last character in string1.  */
3978 #define WORDCHAR_P(d)							\
3979   (SYNTAX ((d) == end1 ? *string2					\
3980 	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3981    == Sword)
3982 
3983 /* Disabled due to a compiler bug -- see comment at case wordbound */
3984 
3985 /* The comment at case wordbound is following one, but we don't use
3986    AT_WORD_BOUNDARY anymore to support multibyte form.
3987 
3988    The DEC Alpha C compiler 3.x generates incorrect code for the
3989    test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
3990    AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
3991    macro and introducing temporary variables works around the bug.  */
3992 
3993 #if 0
3994 /* Test if the character before D and the one at D differ with respect
3995    to being word-constituent.  */
3996 #define AT_WORD_BOUNDARY(d)						\
3997   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3998    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3999 #endif
4000 
4001 /* Free everything we malloc.  */
4002 #ifdef MATCH_MAY_ALLOCATE
4003 #define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4004 #define FREE_VARIABLES()						\
4005   do {									\
4006     REGEX_FREE_STACK (fail_stack.stack);				\
4007     FREE_VAR (regstart);						\
4008     FREE_VAR (regend);							\
4009     FREE_VAR (old_regstart);						\
4010     FREE_VAR (old_regend);						\
4011     FREE_VAR (best_regstart);						\
4012     FREE_VAR (best_regend);						\
4013     FREE_VAR (reg_info);						\
4014     FREE_VAR (reg_dummy);						\
4015     FREE_VAR (reg_info_dummy);						\
4016   } while (0)
4017 #else
4018 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4019 #endif /* not MATCH_MAY_ALLOCATE */
4020 
4021 /* These values must meet several constraints.	They must not be valid
4022    register values; since we have a limit of 255 registers (because
4023    we use only one byte in the pattern for the register number), we can
4024    use numbers larger than 255.	 They must differ by 1, because of
4025    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4026    be larger than the value for the highest register, so we do not try
4027    to actually save any registers when none are active.	 */
4028 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4029 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4030 
4031 /* Matching routines.  */
4032 
4033 #ifndef emacs	/* Emacs never uses this.  */
4034 /* re_match is like re_match_2 except it takes only a single string.  */
4035 
4036 int
re_match(bufp,string,size,pos,regs)4037 re_match (bufp, string, size, pos, regs)
4038      struct re_pattern_buffer *bufp;
4039      const char *string;
4040      int size, pos;
4041      struct re_registers *regs;
4042 {
4043   int result = re_match_2_internal (bufp, NULL, 0, string, size,
4044 				    pos, regs, size);
4045 #ifndef REGEX_MALLOC	/* CVS */
4046 #ifdef C_ALLOCA		/* CVS */
4047   alloca (0);
4048 #endif			/* CVS */
4049 #endif			/* CVS */
4050   return result;
4051 }
4052 #endif /* not emacs */
4053 
4054 #ifdef emacs
4055 /* In Emacs, this is the string or buffer in which we
4056    are matching.  It is used for looking up syntax properties.	*/
4057 Lisp_Object re_match_object;
4058 #endif
4059 
4060 /* re_match_2 matches the compiled pattern in BUFP against the
4061    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4062    and SIZE2, respectively).  We start matching at POS, and stop
4063    matching at STOP.
4064 
4065    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4066    store offsets for the substring each group matched in REGS.	See the
4067    documentation for exactly how many groups we fill.
4068 
4069    We return -1 if no match, -2 if an internal error (such as the
4070    failure stack overflowing).	Otherwise, we return the length of the
4071    matched substring.  */
4072 
4073 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)4074 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4075      struct re_pattern_buffer *bufp;
4076      const char *string1, *string2;
4077      int size1, size2;
4078      int pos;
4079      struct re_registers *regs;
4080      int stop;
4081 {
4082   int result;
4083 
4084 #ifdef emacs
4085   int charpos;
4086   int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
4087   gl_state.object = re_match_object;
4088   charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
4089   SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4090 #endif
4091 
4092   result = re_match_2_internal (bufp, string1, size1, string2, size2,
4093 				pos, regs, stop);
4094 #ifndef REGEX_MALLOC	/* CVS */
4095 #ifdef C_ALLOCA		/* CVS */
4096   alloca (0);
4097 #endif			/* CVS */
4098 #endif			/* CVS */
4099   return result;
4100 }
4101 
4102 /* This is a separate function so that we can force an alloca cleanup
4103    afterwards.	*/
4104 static int
re_match_2_internal(bufp,string1,size1,string2,size2,pos,regs,stop)4105 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4106      struct re_pattern_buffer *bufp;
4107      const char *string1, *string2;
4108      int size1, size2;
4109      int pos;
4110      struct re_registers *regs;
4111      int stop;
4112 {
4113   /* General temporaries.  */
4114   int mcnt;
4115   unsigned char *p1;
4116 
4117   /* Just past the end of the corresponding string.  */
4118   const char *end1, *end2;
4119 
4120   /* Pointers into string1 and string2, just past the last characters in
4121      each to consider matching.	 */
4122   const char *end_match_1, *end_match_2;
4123 
4124   /* Where we are in the data, and the end of the current string.  */
4125   const char *d, *dend;
4126 
4127   /* Where we are in the pattern, and the end of the pattern.  */
4128   unsigned char *p = bufp->buffer;
4129   register unsigned char *pend = p + bufp->used;
4130 
4131   /* Mark the opcode just after a start_memory, so we can test for an
4132      empty subpattern when we get to the stop_memory.  */
4133   unsigned char *just_past_start_mem = 0;
4134 
4135   /* We use this to map every character in the string.	*/
4136   RE_TRANSLATE_TYPE translate = bufp->translate;
4137 
4138   /* Nonzero if we have to concern multibyte character.	 */
4139   int multibyte = bufp->multibyte;
4140 
4141   /* Failure point stack.  Each place that can handle a failure further
4142      down the line pushes a failure point on this stack.  It consists of
4143      restart, regend, and reg_info for all registers corresponding to
4144      the subexpressions we're currently inside, plus the number of such
4145      registers, and, finally, two char *'s.  The first char * is where
4146      to resume scanning the pattern; the second one is where to resume
4147      scanning the strings.  If the latter is zero, the failure point is
4148      a ``dummy''; if a failure happens and the failure point is a dummy,
4149      it gets discarded and the next next one is tried.	*/
4150 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
4151   fail_stack_type fail_stack;
4152 #endif
4153 #ifdef DEBUG
4154   static unsigned failure_id = 0;
4155   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4156 #endif
4157 
4158   /* This holds the pointer to the failure stack, when
4159      it is allocated relocatably.  */
4160   fail_stack_elt_t *failure_stack_ptr;
4161 
4162   /* We fill all the registers internally, independent of what we
4163      return, for use in backreferences.	 The number here includes
4164      an element for register zero.  */
4165   unsigned num_regs = bufp->re_nsub + 1;
4166 
4167   /* The currently active registers.  */
4168   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4169   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4170 
4171   /* Information on the contents of registers. These are pointers into
4172      the input strings; they record just what was matched (on this
4173      attempt) by a subexpression part of the pattern, that is, the
4174      regnum-th regstart pointer points to where in the pattern we began
4175      matching and the regnum-th regend points to right after where we
4176      stopped matching the regnum-th subexpression.  (The zeroth register
4177      keeps track of what the whole pattern matches.)  */
4178 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4179   const char **regstart, **regend;
4180 #endif
4181 
4182   /* If a group that's operated upon by a repetition operator fails to
4183      match anything, then the register for its start will need to be
4184      restored because it will have been set to wherever in the string we
4185      are when we last see its open-group operator.  Similarly for a
4186      register's end.  */
4187 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4188   const char **old_regstart, **old_regend;
4189 #endif
4190 
4191   /* The is_active field of reg_info helps us keep track of which (possibly
4192      nested) subexpressions we are currently in. The matched_something
4193      field of reg_info[reg_num] helps us tell whether or not we have
4194      matched any of the pattern so far this time through the reg_num-th
4195      subexpression.  These two fields get reset each time through any
4196      loop their register is in.	 */
4197 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
4198   register_info_type *reg_info;
4199 #endif
4200 
4201   /* The following record the register info as found in the above
4202      variables when we find a match better than any we've seen before.
4203      This happens as we backtrack through the failure points, which in
4204      turn happens only if we have not yet matched the entire string. */
4205   unsigned best_regs_set = false;
4206 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4207   const char **best_regstart, **best_regend;
4208 #endif
4209 
4210   /* Logically, this is `best_regend[0]'.  But we don't want to have to
4211      allocate space for that if we're not allocating space for anything
4212      else (see below).	Also, we never need info about register 0 for
4213      any of the other register vectors, and it seems rather a kludge to
4214      treat `best_regend' differently than the rest.  So we keep track of
4215      the end of the best match so far in a separate variable.  We
4216      initialize this to NULL so that when we backtrack the first time
4217      and need to test it, it's not garbage.  */
4218   const char *match_end = NULL;
4219 
4220   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4221   int set_regs_matched_done = 0;
4222 
4223   /* Used when we pop values we don't care about.  */
4224 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4225   const char **reg_dummy;
4226   register_info_type *reg_info_dummy;
4227 #endif
4228 
4229 #ifdef DEBUG
4230   /* Counts the total number of registers pushed.  */
4231   unsigned num_regs_pushed = 0;
4232 #endif
4233 
4234   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4235 
4236   INIT_FAIL_STACK ();
4237 
4238 #ifdef MATCH_MAY_ALLOCATE
4239   /* Do not bother to initialize all the register variables if there are
4240      no groups in the pattern, as it takes a fair amount of time.  If
4241      there are groups, we include space for register 0 (the whole
4242      pattern), even though we never use it, since it simplifies the
4243      array indexing.  We should fix this.  */
4244   if (bufp->re_nsub)
4245     {
4246       regstart = REGEX_TALLOC (num_regs, const char *);
4247       regend = REGEX_TALLOC (num_regs, const char *);
4248       old_regstart = REGEX_TALLOC (num_regs, const char *);
4249       old_regend = REGEX_TALLOC (num_regs, const char *);
4250       best_regstart = REGEX_TALLOC (num_regs, const char *);
4251       best_regend = REGEX_TALLOC (num_regs, const char *);
4252       reg_info = REGEX_TALLOC (num_regs, register_info_type);
4253       reg_dummy = REGEX_TALLOC (num_regs, const char *);
4254       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4255 
4256       if (!(regstart && regend && old_regstart && old_regend && reg_info
4257 	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4258 	{
4259 	  FREE_VARIABLES ();
4260 	  return -2;
4261 	}
4262     }
4263   else
4264     {
4265       /* We must initialize all our variables to NULL, so that
4266 	 `FREE_VARIABLES' doesn't try to free them.  */
4267       regstart = regend = old_regstart = old_regend = best_regstart
4268 	= best_regend = reg_dummy = NULL;
4269       reg_info = reg_info_dummy = (register_info_type *) NULL;
4270     }
4271 #endif /* MATCH_MAY_ALLOCATE */
4272 
4273   /* The starting position is bogus.  */
4274   if (pos < 0 || pos > size1 + size2)
4275     {
4276       FREE_VARIABLES ();
4277       return -1;
4278     }
4279 
4280   /* Initialize subexpression text positions to -1 to mark ones that no
4281      start_memory/stop_memory has been seen for. Also initialize the
4282      register information struct.  */
4283   for (mcnt = 1; mcnt < num_regs; mcnt++)
4284     {
4285       regstart[mcnt] = regend[mcnt]
4286 	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4287 
4288       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4289       IS_ACTIVE (reg_info[mcnt]) = 0;
4290       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4291       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4292     }
4293 
4294   /* We move `string1' into `string2' if the latter's empty -- but not if
4295      `string1' is null.	 */
4296   if (size2 == 0 && string1 != NULL)
4297     {
4298       string2 = string1;
4299       size2 = size1;
4300       string1 = 0;
4301       size1 = 0;
4302     }
4303   end1 = string1 + size1;
4304   end2 = string2 + size2;
4305 
4306   /* Compute where to stop matching, within the two strings.  */
4307   if (stop <= size1)
4308     {
4309       end_match_1 = string1 + stop;
4310       end_match_2 = string2;
4311     }
4312   else
4313     {
4314       end_match_1 = end1;
4315       end_match_2 = string2 + stop - size1;
4316     }
4317 
4318   /* `p' scans through the pattern as `d' scans through the data.
4319      `dend' is the end of the input string that `d' points within.  `d'
4320      is advanced into the following input string whenever necessary, but
4321      this happens before fetching; therefore, at the beginning of the
4322      loop, `d' can be pointing at the end of a string, but it cannot
4323      equal `string2'.  */
4324   if (size1 > 0 && pos <= size1)
4325     {
4326       d = string1 + pos;
4327       dend = end_match_1;
4328     }
4329   else
4330     {
4331       d = string2 + pos - size1;
4332       dend = end_match_2;
4333     }
4334 
4335   DEBUG_PRINT1 ("The compiled pattern is: ");
4336   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4337   DEBUG_PRINT1 ("The string to match is: `");
4338   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4339   DEBUG_PRINT1 ("'\n");
4340 
4341   /* This loops over pattern commands.	It exits by returning from the
4342      function if the match is complete, or it drops through if the match
4343      fails at this starting point in the input data.  */
4344   for (;;)
4345     {
4346       DEBUG_PRINT2 ("\n0x%x: ", p);
4347 
4348       if (p == pend)
4349 	{ /* End of pattern means we might have succeeded.  */
4350 	  DEBUG_PRINT1 ("end of pattern ... ");
4351 
4352 	  /* If we haven't matched the entire string, and we want the
4353 	     longest match, try backtracking.  */
4354 	  if (d != end_match_2)
4355 	    {
4356 	      /* 1 if this match ends in the same string (string1 or string2)
4357 		 as the best previous match.  */
4358 	      boolean same_str_p = (FIRST_STRING_P (match_end)
4359 				    == MATCHING_IN_FIRST_STRING);
4360 	      /* 1 if this match is the best seen so far.  */
4361 	      boolean best_match_p;
4362 
4363 	      /* AIX compiler got confused when this was combined
4364 		 with the previous declaration.	 */
4365 	      if (same_str_p)
4366 		best_match_p = d > match_end;
4367 	      else
4368 		best_match_p = !MATCHING_IN_FIRST_STRING;
4369 
4370 	      DEBUG_PRINT1 ("backtracking.\n");
4371 
4372 	      if (!FAIL_STACK_EMPTY ())
4373 		{ /* More failure points to try.  */
4374 
4375 		  /* If exceeds best match so far, save it.  */
4376 		  if (!best_regs_set || best_match_p)
4377 		    {
4378 		      best_regs_set = true;
4379 		      match_end = d;
4380 
4381 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4382 
4383 		      for (mcnt = 1; mcnt < num_regs; mcnt++)
4384 			{
4385 			  best_regstart[mcnt] = regstart[mcnt];
4386 			  best_regend[mcnt] = regend[mcnt];
4387 			}
4388 		    }
4389 		  goto fail;
4390 		}
4391 
4392 	      /* If no failure points, don't restore garbage.  And if
4393 		 last match is real best match, don't restore second
4394 		 best one. */
4395 	      else if (best_regs_set && !best_match_p)
4396 		{
4397 		restore_best_regs:
4398 		  /* Restore best match.  It may happen that `dend ==
4399 		     end_match_1' while the restored d is in string2.
4400 		     For example, the pattern `x.*y.*z' against the
4401 		     strings `x-' and `y-z-', if the two strings are
4402 		     not consecutive in memory.	 */
4403 		  DEBUG_PRINT1 ("Restoring best registers.\n");
4404 
4405 		  d = match_end;
4406 		  dend = ((d >= string1 && d <= end1)
4407 			   ? end_match_1 : end_match_2);
4408 
4409 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
4410 		    {
4411 		      regstart[mcnt] = best_regstart[mcnt];
4412 		      regend[mcnt] = best_regend[mcnt];
4413 		    }
4414 		}
4415 	    } /* d != end_match_2 */
4416 
4417 	succeed_label:
4418 	  DEBUG_PRINT1 ("Accepting match.\n");
4419 
4420 	  /* If caller wants register contents data back, do it.  */
4421 	  if (regs && !bufp->no_sub)
4422 	    {
4423 	      /* Have the register data arrays been allocated?	*/
4424 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
4425 		{ /* No.  So allocate them with malloc.	 We need one
4426 		     extra element beyond `num_regs' for the `-1' marker
4427 		     GNU code uses.  */
4428 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4429 		  regs->start = TALLOC (regs->num_regs, regoff_t);
4430 		  regs->end = TALLOC (regs->num_regs, regoff_t);
4431 		  if (regs->start == NULL || regs->end == NULL)
4432 		    {
4433 		      FREE_VARIABLES ();
4434 		      return -2;
4435 		    }
4436 		  bufp->regs_allocated = REGS_REALLOCATE;
4437 		}
4438 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
4439 		{ /* Yes.  If we need more elements than were already
4440 		     allocated, reallocate them.  If we need fewer, just
4441 		     leave it alone.  */
4442 		  if (regs->num_regs < num_regs + 1)
4443 		    {
4444 		      regs->num_regs = num_regs + 1;
4445 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
4446 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
4447 		      if (regs->start == NULL || regs->end == NULL)
4448 			{
4449 			  FREE_VARIABLES ();
4450 			  return -2;
4451 			}
4452 		    }
4453 		}
4454 	      else
4455 		{
4456 		  /* These braces fend off a "empty body in an else-statement"
4457 		     warning under GCC when assert expands to nothing.	*/
4458 		  assert (bufp->regs_allocated == REGS_FIXED);
4459 		}
4460 
4461 	      /* Convert the pointer data in `regstart' and `regend' to
4462 		 indices.  Register zero has to be set differently,
4463 		 since we haven't kept track of any info for it.  */
4464 	      if (regs->num_regs > 0)
4465 		{
4466 		  regs->start[0] = pos;
4467 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
4468 				  ? ((regoff_t) (d - string1))
4469 				  : ((regoff_t) (d - string2 + size1)));
4470 		}
4471 
4472 	      /* Go through the first `min (num_regs, regs->num_regs)'
4473 		 registers, since that is all we initialized.  */
4474 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4475 		{
4476 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4477 		    regs->start[mcnt] = regs->end[mcnt] = -1;
4478 		  else
4479 		    {
4480 		      regs->start[mcnt]
4481 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4482 		      regs->end[mcnt]
4483 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4484 		    }
4485 		}
4486 
4487 	      /* If the regs structure we return has more elements than
4488 		 were in the pattern, set the extra elements to -1.  If
4489 		 we (re)allocated the registers, this is the case,
4490 		 because we always allocate enough to have at least one
4491 		 -1 at the end.	 */
4492 	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
4493 		regs->start[mcnt] = regs->end[mcnt] = -1;
4494 	    } /* regs && !bufp->no_sub */
4495 
4496 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4497 			nfailure_points_pushed, nfailure_points_popped,
4498 			nfailure_points_pushed - nfailure_points_popped);
4499 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4500 
4501 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4502 			    ? string1
4503 			    : string2 - size1);
4504 
4505 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4506 
4507 	  FREE_VARIABLES ();
4508 	  return mcnt;
4509 	}
4510 
4511       /* Otherwise match next pattern command.	*/
4512       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4513 	{
4514 	/* Ignore these.  Used to ignore the n of succeed_n's which
4515 	   currently have n == 0.  */
4516 	case no_op:
4517 	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
4518 	  break;
4519 
4520 	case succeed:
4521 	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
4522 	  goto succeed_label;
4523 
4524 	/* Match the next n pattern characters exactly.	 The following
4525 	   byte in the pattern defines n, and the n bytes after that
4526 	   are the characters to match.	 */
4527 	case exactn:
4528 	  mcnt = *p++;
4529 	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4530 
4531 	  /* This is written out as an if-else so we don't waste time
4532 	     testing `translate' inside the loop.  */
4533 	  if (RE_TRANSLATE_P (translate))
4534 	    {
4535 #ifdef emacs
4536 	      if (multibyte)
4537 		do
4538 		  {
4539 		    int pat_charlen, buf_charlen;
4540 		    unsigned int pat_ch, buf_ch;
4541 
4542 		    PREFETCH ();
4543 		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4544 		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4545 
4546 		    if (RE_TRANSLATE (translate, buf_ch)
4547 			!= pat_ch)
4548 		      goto fail;
4549 
4550 		    p += pat_charlen;
4551 		    d += buf_charlen;
4552 		    mcnt -= pat_charlen;
4553 		  }
4554 		while (mcnt > 0);
4555 	      else
4556 #endif /* not emacs */
4557 		do
4558 		  {
4559 		    PREFETCH ();
4560 		    if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
4561 			!= (unsigned char) *p++)
4562 		      goto fail;
4563 		    d++;
4564 		  }
4565 		while (--mcnt);
4566 	    }
4567 	  else
4568 	    {
4569 	      do
4570 		{
4571 		  PREFETCH ();
4572 		  if (*d++ != (char) *p++) goto fail;
4573 		}
4574 	      while (--mcnt);
4575 	    }
4576 	  SET_REGS_MATCHED ();
4577 	  break;
4578 
4579 
4580 	/* Match any character except possibly a newline or a null.  */
4581 	case anychar:
4582 	  {
4583 	    int buf_charlen;
4584 	    unsigned int buf_ch;
4585 
4586 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
4587 
4588 	    PREFETCH ();
4589 
4590 #ifdef emacs
4591 	    if (multibyte)
4592 	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4593 	    else
4594 #endif /* not emacs */
4595 	      {
4596 		buf_ch = (unsigned char) *d;
4597 		buf_charlen = 1;
4598 	      }
4599 
4600 	    buf_ch = TRANSLATE (buf_ch);
4601 
4602 	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
4603 		 && buf_ch == '\n')
4604 		|| ((bufp->syntax & RE_DOT_NOT_NULL)
4605 		    && buf_ch == '\000'))
4606 	      goto fail;
4607 
4608 	    SET_REGS_MATCHED ();
4609 	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4610 	    d += buf_charlen;
4611 	  }
4612 	  break;
4613 
4614 
4615 	case charset:
4616 	case charset_not:
4617 	  {
4618 	    register unsigned int c;
4619 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4620 	    int len;
4621 
4622 	    /* Start of actual range_table, or end of bitmap if there is no
4623 	       range table.  */
4624 	    unsigned char *range_table;
4625 
4626 	    /* Nonzero if there is range table.	 */
4627 	    int range_table_exists;
4628 
4629 	    /* Number of ranges of range table.	 Not in bytes.	*/
4630 	    int count;
4631 
4632 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4633 
4634 	    PREFETCH ();
4635 	    c = (unsigned char) *d;
4636 
4637 	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
4638 	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4639 	    if (range_table_exists)
4640 	      EXTRACT_NUMBER_AND_INCR (count, range_table);
4641 	    else
4642 	      count = 0;
4643 
4644 	    if (multibyte && BASE_LEADING_CODE_P (c))
4645 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
4646 
4647 	    if (SINGLE_BYTE_CHAR_P (c))
4648 	      {			/* Lookup bitmap.  */
4649 		c = TRANSLATE (c); /* The character to match.  */
4650 		len = 1;
4651 
4652 		/* Cast to `unsigned' instead of `unsigned char' in
4653 		   case the bit list is a full 32 bytes long.  */
4654 		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
4655 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4656 	      not = !not;
4657 	      }
4658 	    else if (range_table_exists)
4659 	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
4660 
4661 	    p = CHARSET_RANGE_TABLE_END (range_table, count);
4662 
4663 	    if (!not) goto fail;
4664 
4665 	    SET_REGS_MATCHED ();
4666 	    d += len;
4667 	    break;
4668 	  }
4669 
4670 
4671 	/* The beginning of a group is represented by start_memory.
4672 	   The arguments are the register number in the next byte, and the
4673 	   number of groups inner to this one in the next.  The text
4674 	   matched within the group is recorded (in the internal
4675 	   registers data structure) under the register number.	 */
4676 	case start_memory:
4677 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4678 
4679 	  /* Find out if this group can match the empty string.	 */
4680 	  p1 = p;		/* To send to group_match_null_string_p.  */
4681 
4682 	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4683 	    REG_MATCH_NULL_STRING_P (reg_info[*p])
4684 	      = group_match_null_string_p (&p1, pend, reg_info);
4685 
4686 	  /* Save the position in the string where we were the last time
4687 	     we were at this open-group operator in case the group is
4688 	     operated upon by a repetition operator, e.g., with `(a*)*b'
4689 	     against `ab'; then we want to ignore where we are now in
4690 	     the string in case this attempt to match fails.  */
4691 	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4692 			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4693 			     : regstart[*p];
4694 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4695 			 POINTER_TO_OFFSET (old_regstart[*p]));
4696 
4697 	  regstart[*p] = d;
4698 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4699 
4700 	  IS_ACTIVE (reg_info[*p]) = 1;
4701 	  MATCHED_SOMETHING (reg_info[*p]) = 0;
4702 
4703 	  /* Clear this whenever we change the register activity status.  */
4704 	  set_regs_matched_done = 0;
4705 
4706 	  /* This is the new highest active register.  */
4707 	  highest_active_reg = *p;
4708 
4709 	  /* If nothing was active before, this is the new lowest active
4710 	     register.	*/
4711 	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4712 	    lowest_active_reg = *p;
4713 
4714 	  /* Move past the register number and inner group count.  */
4715 	  p += 2;
4716 	  just_past_start_mem = p;
4717 
4718 	  break;
4719 
4720 
4721 	/* The stop_memory opcode represents the end of a group.  Its
4722 	   arguments are the same as start_memory's: the register
4723 	   number, and the number of inner groups.  */
4724 	case stop_memory:
4725 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4726 
4727 	  /* We need to save the string position the last time we were at
4728 	     this close-group operator in case the group is operated
4729 	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4730 	     against `aba'; then we want to ignore where we are now in
4731 	     the string in case this attempt to match fails.  */
4732 	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4733 			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
4734 			   : regend[*p];
4735 	  DEBUG_PRINT2 ("      old_regend: %d\n",
4736 			 POINTER_TO_OFFSET (old_regend[*p]));
4737 
4738 	  regend[*p] = d;
4739 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4740 
4741 	  /* This register isn't active anymore.  */
4742 	  IS_ACTIVE (reg_info[*p]) = 0;
4743 
4744 	  /* Clear this whenever we change the register activity status.  */
4745 	  set_regs_matched_done = 0;
4746 
4747 	  /* If this was the only register active, nothing is active
4748 	     anymore.  */
4749 	  if (lowest_active_reg == highest_active_reg)
4750 	    {
4751 	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4752 	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4753 	    }
4754 	  else
4755 	    { /* We must scan for the new highest active register, since
4756 		 it isn't necessarily one less than now: consider
4757 		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4758 		 new highest active register is 1.  */
4759 	      unsigned char r = *p - 1;
4760 	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
4761 		r--;
4762 
4763 	      /* If we end up at register zero, that means that we saved
4764 		 the registers as the result of an `on_failure_jump', not
4765 		 a `start_memory', and we jumped to past the innermost
4766 		 `stop_memory'.	 For example, in ((.)*) we save
4767 		 registers 1 and 2 as a result of the *, but when we pop
4768 		 back to the second ), we are at the stop_memory 1.
4769 		 Thus, nothing is active.  */
4770 	      if (r == 0)
4771 		{
4772 		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4773 		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4774 		}
4775 	      else
4776 		highest_active_reg = r;
4777 	    }
4778 
4779 	  /* If just failed to match something this time around with a
4780 	     group that's operated on by a repetition operator, try to
4781 	     force exit from the ``loop'', and restore the register
4782 	     information for this group that we had before trying this
4783 	     last match.  */
4784 	  if ((!MATCHED_SOMETHING (reg_info[*p])
4785 	       || just_past_start_mem == p - 1)
4786 	      && (p + 2) < pend)
4787 	    {
4788 	      boolean is_a_jump_n = false;
4789 
4790 	      p1 = p + 2;
4791 	      mcnt = 0;
4792 	      switch ((re_opcode_t) *p1++)
4793 		{
4794 		  case jump_n:
4795 		    is_a_jump_n = true;
4796 		  case pop_failure_jump:
4797 		  case maybe_pop_jump:
4798 		  case jump:
4799 		  case dummy_failure_jump:
4800 		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4801 		    if (is_a_jump_n)
4802 		      p1 += 2;
4803 		    break;
4804 
4805 		  default:
4806 		    /* do nothing */ ;
4807 		}
4808 	      p1 += mcnt;
4809 
4810 	      /* If the next operation is a jump backwards in the pattern
4811 		 to an on_failure_jump right before the start_memory
4812 		 corresponding to this stop_memory, exit from the loop
4813 		 by forcing a failure after pushing on the stack the
4814 		 on_failure_jump's jump in the pattern, and d.	*/
4815 	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4816 		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4817 		{
4818 		  /* If this group ever matched anything, then restore
4819 		     what its registers were before trying this last
4820 		     failed match, e.g., with `(a*)*b' against `ab' for
4821 		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4822 		     against `aba' for regend[3].
4823 
4824 		     Also restore the registers for inner groups for,
4825 		     e.g., `((a*)(b*))*' against `aba' (register 3 would
4826 		     otherwise get trashed).  */
4827 
4828 		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4829 		    {
4830 		      unsigned r;
4831 
4832 		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4833 
4834 		      /* Restore this and inner groups' (if any) registers.  */
4835 		      for (r = *p; r < *p + *(p + 1); r++)
4836 			{
4837 			  regstart[r] = old_regstart[r];
4838 
4839 			  /* xx why this test?	*/
4840 			  if (old_regend[r] >= regstart[r])
4841 			    regend[r] = old_regend[r];
4842 			}
4843 		    }
4844 		  p1++;
4845 		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4846 		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4847 
4848 		  goto fail;
4849 		}
4850 	    }
4851 
4852 	  /* Move past the register number and the inner group count.  */
4853 	  p += 2;
4854 	  break;
4855 
4856 
4857 	/* \<digit> has been turned into a `duplicate' command which is
4858 	   followed by the numeric value of <digit> as the register number.  */
4859 	case duplicate:
4860 	  {
4861 	    register const char *d2, *dend2;
4862 	    int regno = *p++;	/* Get which register to match against.	 */
4863 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4864 
4865 	    /* Can't back reference a group which we've never matched.	*/
4866 	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4867 	      goto fail;
4868 
4869 	    /* Where in input to try to start matching.	 */
4870 	    d2 = regstart[regno];
4871 
4872 	    /* Where to stop matching; if both the place to start and
4873 	       the place to stop matching are in the same string, then
4874 	       set to the place to stop, otherwise, for now have to use
4875 	       the end of the first string.  */
4876 
4877 	    dend2 = ((FIRST_STRING_P (regstart[regno])
4878 		      == FIRST_STRING_P (regend[regno]))
4879 		     ? regend[regno] : end_match_1);
4880 	    for (;;)
4881 	      {
4882 		/* If necessary, advance to next segment in register
4883 		   contents.  */
4884 		while (d2 == dend2)
4885 		  {
4886 		    if (dend2 == end_match_2) break;
4887 		    if (dend2 == regend[regno]) break;
4888 
4889 		    /* End of string1 => advance to string2. */
4890 		    d2 = string2;
4891 		    dend2 = regend[regno];
4892 		  }
4893 		/* At end of register contents => success */
4894 		if (d2 == dend2) break;
4895 
4896 		/* If necessary, advance to next segment in data.  */
4897 		PREFETCH ();
4898 
4899 		/* How many characters left in this segment to match.  */
4900 		mcnt = dend - d;
4901 
4902 		/* Want how many consecutive characters we can match in
4903 		   one shot, so, if necessary, adjust the count.  */
4904 		if (mcnt > dend2 - d2)
4905 		  mcnt = dend2 - d2;
4906 
4907 		/* Compare that many; failure if mismatch, else move
4908 		   past them.  */
4909 		if (RE_TRANSLATE_P (translate)
4910 		    ? bcmp_translate (d, d2, mcnt, translate)
4911 		    : bcmp (d, d2, mcnt))
4912 		  goto fail;
4913 		d += mcnt, d2 += mcnt;
4914 
4915 		/* Do this because we've match some characters.	 */
4916 		SET_REGS_MATCHED ();
4917 	      }
4918 	  }
4919 	  break;
4920 
4921 
4922 	/* begline matches the empty string at the beginning of the string
4923 	   (unless `not_bol' is set in `bufp'), and, if
4924 	   `newline_anchor' is set, after newlines.  */
4925 	case begline:
4926 	  DEBUG_PRINT1 ("EXECUTING begline.\n");
4927 
4928 	  if (AT_STRINGS_BEG (d))
4929 	    {
4930 	      if (!bufp->not_bol) break;
4931 	    }
4932 	  else if (d[-1] == '\n' && bufp->newline_anchor)
4933 	    {
4934 	      break;
4935 	    }
4936 	  /* In all other cases, we fail.  */
4937 	  goto fail;
4938 
4939 
4940 	/* endline is the dual of begline.  */
4941 	case endline:
4942 	  DEBUG_PRINT1 ("EXECUTING endline.\n");
4943 
4944 	  if (AT_STRINGS_END (d))
4945 	    {
4946 	      if (!bufp->not_eol) break;
4947 	    }
4948 
4949 	  /* We have to ``prefetch'' the next character.  */
4950 	  else if ((d == end1 ? *string2 : *d) == '\n'
4951 		   && bufp->newline_anchor)
4952 	    {
4953 	      break;
4954 	    }
4955 	  goto fail;
4956 
4957 
4958 	/* Match at the very beginning of the data.  */
4959 	case begbuf:
4960 	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4961 	  if (AT_STRINGS_BEG (d))
4962 	    break;
4963 	  goto fail;
4964 
4965 
4966 	/* Match at the very end of the data.  */
4967 	case endbuf:
4968 	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4969 	  if (AT_STRINGS_END (d))
4970 	    break;
4971 	  goto fail;
4972 
4973 
4974 	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4975 	   pushes NULL as the value for the string on the stack.  Then
4976 	   `pop_failure_point' will keep the current value for the
4977 	   string, instead of restoring it.  To see why, consider
4978 	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
4979 	   then the . fails against the \n.  But the next thing we want
4980 	   to do is match the \n against the \n; if we restored the
4981 	   string value, we would be back at the foo.
4982 
4983 	   Because this is used only in specific cases, we don't need to
4984 	   check all the things that `on_failure_jump' does, to make
4985 	   sure the right things get saved on the stack.  Hence we don't
4986 	   share its code.  The only reason to push anything on the
4987 	   stack at all is that otherwise we would have to change
4988 	   `anychar's code to do something besides goto fail in this
4989 	   case; that seems worse than this.  */
4990 	case on_failure_keep_string_jump:
4991 	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4992 
4993 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4994 	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4995 
4996 	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4997 	  break;
4998 
4999 
5000 	/* Uses of on_failure_jump:
5001 
5002 	   Each alternative starts with an on_failure_jump that points
5003 	   to the beginning of the next alternative.  Each alternative
5004 	   except the last ends with a jump that in effect jumps past
5005 	   the rest of the alternatives.  (They really jump to the
5006 	   ending jump of the following alternative, because tensioning
5007 	   these jumps is a hassle.)
5008 
5009 	   Repeats start with an on_failure_jump that points past both
5010 	   the repetition text and either the following jump or
5011 	   pop_failure_jump back to this on_failure_jump.  */
5012 	case on_failure_jump:
5013 	on_failure:
5014 	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5015 
5016 #if defined (WINDOWSNT) && defined (emacs)
5017 	  QUIT;
5018 #endif
5019 
5020 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5021 	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5022 
5023 	  /* If this on_failure_jump comes right before a group (i.e.,
5024 	     the original * applied to a group), save the information
5025 	     for that group and all inner ones, so that if we fail back
5026 	     to this point, the group's information will be correct.
5027 	     For example, in \(a*\)*\1, we need the preceding group,
5028 	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
5029 
5030 	  /* We can't use `p' to check ahead because we push
5031 	     a failure point to `p + mcnt' after we do this.  */
5032 	  p1 = p;
5033 
5034 	  /* We need to skip no_op's before we look for the
5035 	     start_memory in case this on_failure_jump is happening as
5036 	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5037 	     against aba.  */
5038 	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
5039 	    p1++;
5040 
5041 	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5042 	    {
5043 	      /* We have a new highest active register now.  This will
5044 		 get reset at the start_memory we are about to get to,
5045 		 but we will have saved all the registers relevant to
5046 		 this repetition op, as described above.  */
5047 	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
5048 	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5049 		lowest_active_reg = *(p1 + 1);
5050 	    }
5051 
5052 	  DEBUG_PRINT1 (":\n");
5053 	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
5054 	  break;
5055 
5056 
5057 	/* A smart repeat ends with `maybe_pop_jump'.
5058 	   We change it to either `pop_failure_jump' or `jump'.	 */
5059 	case maybe_pop_jump:
5060 #if defined (WINDOWSNT) && defined (emacs)
5061 	  QUIT;
5062 #endif
5063 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5064 	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5065 	  {
5066 	    register unsigned char *p2 = p;
5067 
5068 	    /* Compare the beginning of the repeat with what in the
5069 	       pattern follows its end. If we can establish that there
5070 	       is nothing that they would both match, i.e., that we
5071 	       would have to backtrack because of (as in, e.g., `a*a')
5072 	       then we can change to pop_failure_jump, because we'll
5073 	       never have to backtrack.
5074 
5075 	       This is not true in the case of alternatives: in
5076 	       `(a|ab)*' we do need to backtrack to the `ab' alternative
5077 	       (e.g., if the string was `ab').	But instead of trying to
5078 	       detect that here, the alternative has put on a dummy
5079 	       failure point which is what we will end up popping.  */
5080 
5081 	    /* Skip over open/close-group commands.
5082 	       If what follows this loop is a ...+ construct,
5083 	       look at what begins its body, since we will have to
5084 	       match at least one of that.  */
5085 	    while (1)
5086 	      {
5087 		if (p2 + 2 < pend
5088 		    && ((re_opcode_t) *p2 == stop_memory
5089 			|| (re_opcode_t) *p2 == start_memory))
5090 		  p2 += 3;
5091 		else if (p2 + 6 < pend
5092 			 && (re_opcode_t) *p2 == dummy_failure_jump)
5093 		  p2 += 6;
5094 		else
5095 		  break;
5096 	      }
5097 
5098 	    p1 = p + mcnt;
5099 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5100 	       to the `maybe_finalize_jump' of this case.  Examine what
5101 	       follows.	 */
5102 
5103 	    /* If we're at the end of the pattern, we can change.  */
5104 	    if (p2 == pend)
5105 	      {
5106 		/* Consider what happens when matching ":\(.*\)"
5107 		   against ":/".  I don't really understand this code
5108 		   yet.	 */
5109 		p[-3] = (unsigned char) pop_failure_jump;
5110 		DEBUG_PRINT1
5111 		  ("  End of pattern: change to `pop_failure_jump'.\n");
5112 	      }
5113 
5114 	    else if ((re_opcode_t) *p2 == exactn
5115 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5116 	      {
5117 		register unsigned int c
5118 		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
5119 
5120 		if ((re_opcode_t) p1[3] == exactn)
5121 		  {
5122 		    if (!(multibyte /* && (c != '\n') */
5123 			  && BASE_LEADING_CODE_P (c))
5124 			? c != p1[5]
5125 			: (STRING_CHAR (&p2[2], pend - &p2[2])
5126 			   != STRING_CHAR (&p1[5], pend - &p1[5])))
5127 		  {
5128 		    p[-3] = (unsigned char) pop_failure_jump;
5129 		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
5130 				  c, p1[5]);
5131 		  }
5132 		  }
5133 
5134 		else if ((re_opcode_t) p1[3] == charset
5135 			 || (re_opcode_t) p1[3] == charset_not)
5136 		  {
5137 		    int not = (re_opcode_t) p1[3] == charset_not;
5138 
5139 		    if (multibyte /* && (c != '\n') */
5140 			&& BASE_LEADING_CODE_P (c))
5141 		      c = STRING_CHAR (&p2[2], pend - &p2[2]);
5142 
5143 		    /* Test if C is listed in charset (or charset_not)
5144 		       at `&p1[3]'.  */
5145 		    if (SINGLE_BYTE_CHAR_P (c))
5146 		      {
5147 			if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
5148 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5149 		      not = !not;
5150 		      }
5151 		    else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
5152 		      CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
5153 
5154 		    /* `not' is equal to 1 if c would match, which means
5155 			that we can't change to pop_failure_jump.  */
5156 		    if (!not)
5157 		      {
5158 			p[-3] = (unsigned char) pop_failure_jump;
5159 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5160 		      }
5161 		  }
5162 	      }
5163 	    else if ((re_opcode_t) *p2 == charset)
5164 	      {
5165 		if ((re_opcode_t) p1[3] == exactn)
5166 		  {
5167 		    register unsigned int c = p1[5];
5168 		    int not = 0;
5169 
5170 		    if (multibyte && BASE_LEADING_CODE_P (c))
5171 		      c = STRING_CHAR (&p1[5], pend - &p1[5]);
5172 
5173 		    /* Test if C is listed in charset at `p2'.	*/
5174 		    if (SINGLE_BYTE_CHAR_P (c))
5175 		      {
5176 			if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
5177 			    && (p2[2 + c / BYTEWIDTH]
5178 				& (1 << (c % BYTEWIDTH))))
5179 			  not = !not;
5180 		      }
5181 		    else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
5182 		      CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
5183 
5184 		    if (!not)
5185 		  {
5186 		    p[-3] = (unsigned char) pop_failure_jump;
5187 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5188 		      }
5189 		  }
5190 
5191 		/* It is hard to list up all the character in charset
5192 		   P2 if it includes multibyte character.  Give up in
5193 		   such case.  */
5194 		else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
5195 		  {
5196 		    /* Now, we are sure that P2 has no range table.
5197 		       So, for the size of bitmap in P2, `p2[1]' is
5198 		       enough.	But P1 may have range table, so the
5199 		       size of bitmap table of P1 is extracted by
5200 		       using macro `CHARSET_BITMAP_SIZE'.
5201 
5202 		       Since we know that all the character listed in
5203 		       P2 is ASCII, it is enough to test only bitmap
5204 		       table of P1.  */
5205 
5206 		    if ((re_opcode_t) p1[3] == charset_not)
5207 		  {
5208 		    int idx;
5209 			/* We win if the charset_not inside the loop lists
5210 			   every character listed in the charset after.	 */
5211 		    for (idx = 0; idx < (int) p2[1]; idx++)
5212 		      if (! (p2[2 + idx] == 0
5213 				 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
5214 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5215 			break;
5216 
5217 		    if (idx == p2[1])
5218 		      {
5219 			p[-3] = (unsigned char) pop_failure_jump;
5220 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5221 		      }
5222 		  }
5223 		else if ((re_opcode_t) p1[3] == charset)
5224 		  {
5225 		    int idx;
5226 		    /* We win if the charset inside the loop
5227 		       has no overlap with the one after the loop.  */
5228 		    for (idx = 0;
5229 			     (idx < (int) p2[1]
5230 			      && idx < CHARSET_BITMAP_SIZE (&p1[3]));
5231 			 idx++)
5232 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
5233 			break;
5234 
5235 			if (idx == p2[1]
5236 			    || idx == CHARSET_BITMAP_SIZE (&p1[3]))
5237 		      {
5238 			p[-3] = (unsigned char) pop_failure_jump;
5239 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
5240 		      }
5241 		  }
5242 	      }
5243 	  }
5244 	  }
5245 	  p -= 2;		/* Point at relative address again.  */
5246 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
5247 	    {
5248 	      p[-1] = (unsigned char) jump;
5249 	      DEBUG_PRINT1 ("  Match => jump.\n");
5250 	      goto unconditional_jump;
5251 	    }
5252 	/* Note fall through.  */
5253 
5254 
5255 	/* The end of a simple repeat has a pop_failure_jump back to
5256 	   its matching on_failure_jump, where the latter will push a
5257 	   failure point.  The pop_failure_jump takes off failure
5258 	   points put on by this pop_failure_jump's matching
5259 	   on_failure_jump; we got through the pattern to here from the
5260 	   matching on_failure_jump, so didn't fail.  */
5261 	case pop_failure_jump:
5262 	  {
5263 	    /* We need to pass separate storage for the lowest and
5264 	       highest registers, even though we don't care about the
5265 	       actual values.  Otherwise, we will restore only one
5266 	       register from the stack, since lowest will == highest in
5267 	       `pop_failure_point'.  */
5268 	    unsigned dummy_low_reg, dummy_high_reg;
5269 	    unsigned char *pdummy;
5270 	    const char *sdummy;
5271 
5272 	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5273 	    POP_FAILURE_POINT (sdummy, pdummy,
5274 			       dummy_low_reg, dummy_high_reg,
5275 			       reg_dummy, reg_dummy, reg_info_dummy);
5276 	  }
5277 	  /* Note fall through.	 */
5278 
5279 
5280 	/* Unconditionally jump (without popping any failure points).  */
5281 	case jump:
5282 	unconditional_jump:
5283 #if defined (WINDOWSNT) && defined (emacs)
5284 	  QUIT;
5285 #endif
5286 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5287 	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5288 	  p += mcnt;				/* Do the jump.	 */
5289 	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
5290 	  break;
5291 
5292 
5293 	/* We need this opcode so we can detect where alternatives end
5294 	   in `group_match_null_string_p' et al.  */
5295 	case jump_past_alt:
5296 	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5297 	  goto unconditional_jump;
5298 
5299 
5300 	/* Normally, the on_failure_jump pushes a failure point, which
5301 	   then gets popped at pop_failure_jump.  We will end up at
5302 	   pop_failure_jump, also, and with a pattern of, say, `a+', we
5303 	   are skipping over the on_failure_jump, so we have to push
5304 	   something meaningless for pop_failure_jump to pop.  */
5305 	case dummy_failure_jump:
5306 	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5307 	  /* It doesn't matter what we push for the string here.  What
5308 	     the code at `fail' tests is the value for the pattern.  */
5309 	  PUSH_FAILURE_POINT (0, 0, -2);
5310 	  goto unconditional_jump;
5311 
5312 
5313 	/* At the end of an alternative, we need to push a dummy failure
5314 	   point in case we are followed by a `pop_failure_jump', because
5315 	   we don't want the failure point for the alternative to be
5316 	   popped.  For example, matching `(a|ab)*' against `aab'
5317 	   requires that we match the `ab' alternative.	 */
5318 	case push_dummy_failure:
5319 	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5320 	  /* See comments just above at `dummy_failure_jump' about the
5321 	     two zeroes.  */
5322 	  PUSH_FAILURE_POINT (0, 0, -2);
5323 	  break;
5324 
5325 	/* Have to succeed matching what follows at least n times.
5326 	   After that, handle like `on_failure_jump'.  */
5327 	case succeed_n:
5328 	  EXTRACT_NUMBER (mcnt, p + 2);
5329 	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5330 
5331 	  assert (mcnt >= 0);
5332 	  /* Originally, this is how many times we HAVE to succeed.  */
5333 	  if (mcnt > 0)
5334 	    {
5335 	       mcnt--;
5336 	       p += 2;
5337 	       STORE_NUMBER_AND_INCR (p, mcnt);
5338 	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
5339 	    }
5340 	  else if (mcnt == 0)
5341 	    {
5342 	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5343 	      p[2] = (unsigned char) no_op;
5344 	      p[3] = (unsigned char) no_op;
5345 	      goto on_failure;
5346 	    }
5347 	  break;
5348 
5349 	case jump_n:
5350 	  EXTRACT_NUMBER (mcnt, p + 2);
5351 	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5352 
5353 	  /* Originally, this is how many times we CAN jump.  */
5354 	  if (mcnt)
5355 	    {
5356 	       mcnt--;
5357 	       STORE_NUMBER (p + 2, mcnt);
5358 	       goto unconditional_jump;
5359 	    }
5360 	  /* If don't have to jump any more, skip over the rest of command.  */
5361 	  else
5362 	    p += 4;
5363 	  break;
5364 
5365 	case set_number_at:
5366 	  {
5367 	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5368 
5369 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5370 	    p1 = p + mcnt;
5371 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5372 	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5373 	    STORE_NUMBER (p1, mcnt);
5374 	    break;
5375 	  }
5376 
5377 	case wordbound:
5378 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5379 
5380 	  /* We SUCCEED in one of the following cases: */
5381 
5382 	  /* Case 1: D is at the beginning or the end of string.  */
5383 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5384 	    break;
5385 	  else
5386 	    {
5387 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5388 		 is the character at D, and S2 is the syntax of C2.  */
5389 	      int c1, c2, s1, s2;
5390 	      int pos1 = PTR_TO_OFFSET (d - 1);
5391 	      int charpos;
5392 
5393 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5394 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5395 #ifdef emacs
5396 	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5397 	      UPDATE_SYNTAX_TABLE (charpos);
5398 #endif
5399 	      s1 = SYNTAX (c1);
5400 #ifdef emacs
5401 	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5402 #endif
5403 	      s2 = SYNTAX (c2);
5404 
5405 	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5406 		  ((s1 == Sword) != (s2 == Sword))
5407 		  /* Case 3: Both of S1 and S2 are Sword, and macro
5408 		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
5409 		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5410 	    break;
5411 	}
5412 	  goto fail;
5413 
5414       case notwordbound:
5415 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5416 
5417 	  /* We FAIL in one of the following cases: */
5418 
5419 	  /* Case 1: D is at the beginning or the end of string.  */
5420 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5421 	    goto fail;
5422 	  else
5423 	    {
5424 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5425 		 is the character at D, and S2 is the syntax of C2.  */
5426 	      int c1, c2, s1, s2;
5427 	      int pos1 = PTR_TO_OFFSET (d - 1);
5428 	      int charpos;
5429 
5430 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5431 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5432 #ifdef emacs
5433 	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5434 	      UPDATE_SYNTAX_TABLE (charpos);
5435 #endif
5436 	      s1 = SYNTAX (c1);
5437 #ifdef emacs
5438 	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5439 #endif
5440 	      s2 = SYNTAX (c2);
5441 
5442 	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5443 		  ((s1 == Sword) != (s2 == Sword))
5444 		  /* Case 3: Both of S1 and S2 are Sword, and macro
5445 		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
5446 		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5447 	    goto fail;
5448 	}
5449 	  break;
5450 
5451 	case wordbeg:
5452 	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5453 
5454 	  /* We FAIL in one of the following cases: */
5455 
5456 	  /* Case 1: D is at the end of string.	 */
5457 	  if (AT_STRINGS_END (d))
5458 	  goto fail;
5459 	  else
5460 	    {
5461 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5462 		 is the character at D, and S2 is the syntax of C2.  */
5463 	      int c1, c2, s1, s2;
5464 	      int pos1 = PTR_TO_OFFSET (d);
5465 	      int charpos;
5466 
5467 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5468 #ifdef emacs
5469 	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
5470 	      UPDATE_SYNTAX_TABLE (charpos);
5471 #endif
5472 	      s2 = SYNTAX (c2);
5473 
5474 	      /* Case 2: S2 is not Sword. */
5475 	      if (s2 != Sword)
5476 		goto fail;
5477 
5478 	      /* Case 3: D is not at the beginning of string ... */
5479 	      if (!AT_STRINGS_BEG (d))
5480 		{
5481 		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5482 #ifdef emacs
5483 		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5484 #endif
5485 		  s1 = SYNTAX (c1);
5486 
5487 		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5488 		     returns 0.	 */
5489 		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5490 		    goto fail;
5491 		}
5492 	    }
5493 	  break;
5494 
5495 	case wordend:
5496 	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
5497 
5498 	  /* We FAIL in one of the following cases: */
5499 
5500 	  /* Case 1: D is at the beginning of string.  */
5501 	  if (AT_STRINGS_BEG (d))
5502 	    goto fail;
5503 	  else
5504 	    {
5505 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5506 		 is the character at D, and S2 is the syntax of C2.  */
5507 	      int c1, c2, s1, s2;
5508 	      int pos1 = PTR_TO_OFFSET (d);
5509 	      int charpos;
5510 
5511 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5512 #ifdef emacs
5513 	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
5514 	      UPDATE_SYNTAX_TABLE (charpos);
5515 #endif
5516 	      s1 = SYNTAX (c1);
5517 
5518 	      /* Case 2: S1 is not Sword.  */
5519 	      if (s1 != Sword)
5520 		goto fail;
5521 
5522 	      /* Case 3: D is not at the end of string ... */
5523 	      if (!AT_STRINGS_END (d))
5524 		{
5525 		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5526 #ifdef emacs
5527 		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5528 #endif
5529 		  s2 = SYNTAX (c2);
5530 
5531 		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5532 		     returns 0.	 */
5533 		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5534 	  goto fail;
5535 		}
5536 	    }
5537 	  break;
5538 
5539 #ifdef emacs
5540 	case before_dot:
5541 	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5542 	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
5543 	    goto fail;
5544 	  break;
5545 
5546 	case at_dot:
5547 	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5548 	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
5549 	    goto fail;
5550 	  break;
5551 
5552 	case after_dot:
5553 	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5554 	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
5555 	    goto fail;
5556 	  break;
5557 
5558 	case syntaxspec:
5559 	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5560 	  mcnt = *p++;
5561 	  goto matchsyntax;
5562 
5563 	case wordchar:
5564 	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5565 	  mcnt = (int) Sword;
5566 	matchsyntax:
5567 	  PREFETCH ();
5568 #ifdef emacs
5569 	  {
5570 	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5571 	    UPDATE_SYNTAX_TABLE (pos1);
5572 	  }
5573 #endif
5574 	  {
5575 	    int c, len;
5576 
5577 	    if (multibyte)
5578 	      /* we must concern about multibyte form, ... */
5579 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5580 	    else
5581 	      /* everything should be handled as ASCII, even though it
5582 		 looks like multibyte form.  */
5583 	      c = *d, len = 1;
5584 
5585 	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
5586 	    goto fail;
5587 	    d += len;
5588 	  }
5589 	  SET_REGS_MATCHED ();
5590 	  break;
5591 
5592 	case notsyntaxspec:
5593 	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5594 	  mcnt = *p++;
5595 	  goto matchnotsyntax;
5596 
5597 	case notwordchar:
5598 	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5599 	  mcnt = (int) Sword;
5600 	matchnotsyntax:
5601 	  PREFETCH ();
5602 #ifdef emacs
5603 	  {
5604 	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5605 	    UPDATE_SYNTAX_TABLE (pos1);
5606 	  }
5607 #endif
5608 	  {
5609 	    int c, len;
5610 
5611 	    if (multibyte)
5612 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5613 	    else
5614 	      c = *d, len = 1;
5615 
5616 	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
5617 	    goto fail;
5618 	    d += len;
5619 	  }
5620 	  SET_REGS_MATCHED ();
5621 	  break;
5622 
5623 	case categoryspec:
5624 	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
5625 	  mcnt = *p++;
5626 	  PREFETCH ();
5627 	  {
5628 	    int c, len;
5629 
5630 	    if (multibyte)
5631 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5632 	    else
5633 	      c = *d, len = 1;
5634 
5635 	    if (!CHAR_HAS_CATEGORY (c, mcnt))
5636 	      goto fail;
5637 	    d += len;
5638 	  }
5639 	  SET_REGS_MATCHED ();
5640 	  break;
5641 
5642 	case notcategoryspec:
5643 	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
5644 	  mcnt = *p++;
5645 	  PREFETCH ();
5646 	  {
5647 	    int c, len;
5648 
5649 	    if (multibyte)
5650 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5651 	    else
5652 	      c = *d, len = 1;
5653 
5654 	    if (CHAR_HAS_CATEGORY (c, mcnt))
5655 	      goto fail;
5656 	    d += len;
5657 	  }
5658 	  SET_REGS_MATCHED ();
5659           break;
5660 
5661 #else /* not emacs */
5662 	case wordchar:
5663           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5664 	  PREFETCH ();
5665           if (!WORDCHAR_P (d))
5666             goto fail;
5667 	  SET_REGS_MATCHED ();
5668           d++;
5669 	  break;
5670 
5671 	case notwordchar:
5672           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5673 	  PREFETCH ();
5674 	  if (WORDCHAR_P (d))
5675             goto fail;
5676           SET_REGS_MATCHED ();
5677           d++;
5678 	  break;
5679 #endif /* not emacs */
5680 
5681         default:
5682           abort ();
5683 	}
5684       continue;  /* Successfully executed one pattern command; keep going.  */
5685 
5686 
5687     /* We goto here if a matching operation fails. */
5688     fail:
5689 #if defined (WINDOWSNT) && defined (emacs)
5690       QUIT;
5691 #endif
5692       if (!FAIL_STACK_EMPTY ())
5693 	{ /* A restart point is known.  Restore to that state.  */
5694           DEBUG_PRINT1 ("\nFAIL:\n");
5695           POP_FAILURE_POINT (d, p,
5696                              lowest_active_reg, highest_active_reg,
5697                              regstart, regend, reg_info);
5698 
5699           /* If this failure point is a dummy, try the next one.  */
5700           if (!p)
5701 	    goto fail;
5702 
5703           /* If we failed to the end of the pattern, don't examine *p.  */
5704 	  assert (p <= pend);
5705           if (p < pend)
5706             {
5707               boolean is_a_jump_n = false;
5708 
5709               /* If failed to a backwards jump that's part of a repetition
5710                  loop, need to pop this failure point and use the next one.  */
5711               switch ((re_opcode_t) *p)
5712                 {
5713                 case jump_n:
5714                   is_a_jump_n = true;
5715                 case maybe_pop_jump:
5716                 case pop_failure_jump:
5717                 case jump:
5718                   p1 = p + 1;
5719                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5720                   p1 += mcnt;
5721 
5722                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5723                       || (!is_a_jump_n
5724                           && (re_opcode_t) *p1 == on_failure_jump))
5725                     goto fail;
5726                   break;
5727                 default:
5728                   /* do nothing */ ;
5729                 }
5730             }
5731 
5732           if (d >= string1 && d <= end1)
5733 	    dend = end_match_1;
5734         }
5735       else
5736         break;   /* Matching at this starting point really fails.  */
5737     } /* for (;;) */
5738 
5739   if (best_regs_set)
5740     goto restore_best_regs;
5741 
5742   FREE_VARIABLES ();
5743 
5744   return -1;         			/* Failure to match.  */
5745 } /* re_match_2 */
5746 
5747 /* Subroutine definitions for re_match_2.  */
5748 
5749 
5750 /* We are passed P pointing to a register number after a start_memory.
5751 
5752    Return true if the pattern up to the corresponding stop_memory can
5753    match the empty string, and false otherwise.
5754 
5755    If we find the matching stop_memory, sets P to point to one past its number.
5756    Otherwise, sets P to an undefined byte less than or equal to END.
5757 
5758    We don't handle duplicates properly (yet).  */
5759 
5760 static boolean
group_match_null_string_p(p,end,reg_info)5761 group_match_null_string_p (p, end, reg_info)
5762     unsigned char **p, *end;
5763     register_info_type *reg_info;
5764 {
5765   int mcnt;
5766   /* Point to after the args to the start_memory.  */
5767   unsigned char *p1 = *p + 2;
5768 
5769   while (p1 < end)
5770     {
5771       /* Skip over opcodes that can match nothing, and return true or
5772 	 false, as appropriate, when we get to one that can't, or to the
5773          matching stop_memory.  */
5774 
5775       switch ((re_opcode_t) *p1)
5776         {
5777         /* Could be either a loop or a series of alternatives.  */
5778         case on_failure_jump:
5779           p1++;
5780           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5781 
5782           /* If the next operation is not a jump backwards in the
5783 	     pattern.  */
5784 
5785 	  if (mcnt >= 0)
5786 	    {
5787               /* Go through the on_failure_jumps of the alternatives,
5788                  seeing if any of the alternatives cannot match nothing.
5789                  The last alternative starts with only a jump,
5790                  whereas the rest start with on_failure_jump and end
5791                  with a jump, e.g., here is the pattern for `a|b|c':
5792 
5793                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5794                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5795                  /exactn/1/c
5796 
5797                  So, we have to first go through the first (n-1)
5798                  alternatives and then deal with the last one separately.  */
5799 
5800 
5801               /* Deal with the first (n-1) alternatives, which start
5802                  with an on_failure_jump (see above) that jumps to right
5803                  past a jump_past_alt.  */
5804 
5805               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5806                 {
5807                   /* `mcnt' holds how many bytes long the alternative
5808                      is, including the ending `jump_past_alt' and
5809                      its number.  */
5810 
5811                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5812 				                      reg_info))
5813                     return false;
5814 
5815                   /* Move to right after this alternative, including the
5816 		     jump_past_alt.  */
5817                   p1 += mcnt;
5818 
5819                   /* Break if it's the beginning of an n-th alternative
5820                      that doesn't begin with an on_failure_jump.  */
5821                   if ((re_opcode_t) *p1 != on_failure_jump)
5822                     break;
5823 
5824 		  /* Still have to check that it's not an n-th
5825 		     alternative that starts with an on_failure_jump.  */
5826 		  p1++;
5827                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5828                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5829                     {
5830 		      /* Get to the beginning of the n-th alternative.  */
5831                       p1 -= 3;
5832                       break;
5833                     }
5834                 }
5835 
5836               /* Deal with the last alternative: go back and get number
5837                  of the `jump_past_alt' just before it.  `mcnt' contains
5838                  the length of the alternative.  */
5839               EXTRACT_NUMBER (mcnt, p1 - 2);
5840 
5841               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5842                 return false;
5843 
5844               p1 += mcnt;	/* Get past the n-th alternative.  */
5845             } /* if mcnt > 0 */
5846           break;
5847 
5848 
5849         case stop_memory:
5850 	  assert (p1[1] == **p);
5851           *p = p1 + 2;
5852           return true;
5853 
5854 
5855         default:
5856           if (!common_op_match_null_string_p (&p1, end, reg_info))
5857             return false;
5858         }
5859     } /* while p1 < end */
5860 
5861   return false;
5862 } /* group_match_null_string_p */
5863 
5864 
5865 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5866    It expects P to be the first byte of a single alternative and END one
5867    byte past the last. The alternative can contain groups.  */
5868 
5869 static boolean
alt_match_null_string_p(p,end,reg_info)5870 alt_match_null_string_p (p, end, reg_info)
5871     unsigned char *p, *end;
5872     register_info_type *reg_info;
5873 {
5874   int mcnt;
5875   unsigned char *p1 = p;
5876 
5877   while (p1 < end)
5878     {
5879       /* Skip over opcodes that can match nothing, and break when we get
5880          to one that can't.  */
5881 
5882       switch ((re_opcode_t) *p1)
5883         {
5884 	/* It's a loop.  */
5885         case on_failure_jump:
5886           p1++;
5887           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5888           p1 += mcnt;
5889           break;
5890 
5891 	default:
5892           if (!common_op_match_null_string_p (&p1, end, reg_info))
5893             return false;
5894         }
5895     }  /* while p1 < end */
5896 
5897   return true;
5898 } /* alt_match_null_string_p */
5899 
5900 
5901 /* Deals with the ops common to group_match_null_string_p and
5902    alt_match_null_string_p.
5903 
5904    Sets P to one after the op and its arguments, if any.  */
5905 
5906 static boolean
common_op_match_null_string_p(p,end,reg_info)5907 common_op_match_null_string_p (p, end, reg_info)
5908     unsigned char **p, *end;
5909     register_info_type *reg_info;
5910 {
5911   int mcnt;
5912   boolean ret;
5913   int reg_no;
5914   unsigned char *p1 = *p;
5915 
5916   switch ((re_opcode_t) *p1++)
5917     {
5918     case no_op:
5919     case begline:
5920     case endline:
5921     case begbuf:
5922     case endbuf:
5923     case wordbeg:
5924     case wordend:
5925     case wordbound:
5926     case notwordbound:
5927 #ifdef emacs
5928     case before_dot:
5929     case at_dot:
5930     case after_dot:
5931 #endif
5932       break;
5933 
5934     case start_memory:
5935       reg_no = *p1;
5936       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5937       ret = group_match_null_string_p (&p1, end, reg_info);
5938 
5939       /* Have to set this here in case we're checking a group which
5940          contains a group and a back reference to it.  */
5941 
5942       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5943         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5944 
5945       if (!ret)
5946         return false;
5947       break;
5948 
5949     /* If this is an optimized succeed_n for zero times, make the jump.  */
5950     case jump:
5951       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5952       if (mcnt >= 0)
5953         p1 += mcnt;
5954       else
5955         return false;
5956       break;
5957 
5958     case succeed_n:
5959       /* Get to the number of times to succeed.  */
5960       p1 += 2;
5961       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5962 
5963       if (mcnt == 0)
5964         {
5965           p1 -= 4;
5966           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5967           p1 += mcnt;
5968         }
5969       else
5970         return false;
5971       break;
5972 
5973     case duplicate:
5974       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5975         return false;
5976       break;
5977 
5978     case set_number_at:
5979       p1 += 4;
5980 
5981     default:
5982       /* All other opcodes mean we cannot match the empty string.  */
5983       return false;
5984   }
5985 
5986   *p = p1;
5987   return true;
5988 } /* common_op_match_null_string_p */
5989 
5990 
5991 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5992    bytes; nonzero otherwise.  */
5993 
5994 static int
bcmp_translate(s1,s2,len,translate)5995 bcmp_translate (s1, s2, len, translate)
5996      unsigned char *s1, *s2;
5997      register int len;
5998      RE_TRANSLATE_TYPE translate;
5999 {
6000   register unsigned char *p1 = s1, *p2 = s2;
6001   unsigned char *p1_end = s1 + len;
6002   unsigned char *p2_end = s2 + len;
6003 
6004   while (p1 != p1_end && p2 != p2_end)
6005     {
6006       int p1_charlen, p2_charlen;
6007       int p1_ch, p2_ch;
6008 
6009       p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
6010       p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
6011 
6012       if (RE_TRANSLATE (translate, p1_ch)
6013 	  != RE_TRANSLATE (translate, p2_ch))
6014 	return 1;
6015 
6016       p1 += p1_charlen, p2 += p2_charlen;
6017     }
6018 
6019   if (p1 != p1_end || p2 != p2_end)
6020     return 1;
6021 
6022   return 0;
6023 }
6024 
6025 /* Entry points for GNU code.  */
6026 
6027 /* re_compile_pattern is the GNU regular expression compiler: it
6028    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6029    Returns 0 if the pattern was valid, otherwise an error string.
6030 
6031    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6032    are set in BUFP on entry.
6033 
6034    We call regex_compile to do the actual compilation.  */
6035 
6036 const char *
re_compile_pattern(pattern,length,bufp)6037 re_compile_pattern (pattern, length, bufp)
6038      const char *pattern;
6039      int length;
6040      struct re_pattern_buffer *bufp;
6041 {
6042   reg_errcode_t ret;
6043 
6044   /* GNU code is written to assume at least RE_NREGS registers will be set
6045      (and at least one extra will be -1).  */
6046   bufp->regs_allocated = REGS_UNALLOCATED;
6047 
6048   /* And GNU code determines whether or not to get register information
6049      by passing null for the REGS argument to re_match, etc., not by
6050      setting no_sub.  */
6051   bufp->no_sub = 0;
6052 
6053   /* Match anchors at newline.  */
6054   bufp->newline_anchor = 1;
6055 
6056   ret = regex_compile (pattern, length, re_syntax_options, bufp);
6057 
6058   if (!ret)
6059     return NULL;
6060   return gettext (re_error_msgid[(int) ret]);
6061 }
6062 
6063 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6064    them unless specifically requested.  */
6065 
6066 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
6067 
6068 /* BSD has one and only one pattern buffer.  */
6069 static struct re_pattern_buffer re_comp_buf;
6070 
6071 char *
6072 #ifdef _LIBC
6073 /* Make these definitions weak in libc, so POSIX programs can redefine
6074    these names if they don't use our functions, and still use
6075    regcomp/regexec below without link errors.  */
6076 weak_function
6077 #endif
re_comp(s)6078 re_comp (s)
6079     const char *s;
6080 {
6081   reg_errcode_t ret;
6082 
6083   if (!s)
6084     {
6085       if (!re_comp_buf.buffer)
6086 	return gettext ("No previous regular expression");
6087       return 0;
6088     }
6089 
6090   if (!re_comp_buf.buffer)
6091     {
6092       re_comp_buf.buffer = (unsigned char *) malloc (200);
6093       if (re_comp_buf.buffer == NULL)
6094         /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6095         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6096       re_comp_buf.allocated = 200;
6097 
6098       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6099       if (re_comp_buf.fastmap == NULL)
6100 	/* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6101 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6102     }
6103 
6104   /* Since `re_exec' always passes NULL for the `regs' argument, we
6105      don't need to initialize the pattern buffer fields which affect it.  */
6106 
6107   /* Match anchors at newlines.  */
6108   re_comp_buf.newline_anchor = 1;
6109 
6110   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6111 
6112   if (!ret)
6113     return NULL;
6114 
6115   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6116   return (char *) gettext (re_error_msgid[(int) ret]);
6117 }
6118 
6119 
6120 int
6121 #ifdef _LIBC
6122 weak_function
6123 #endif
re_exec(s)6124 re_exec (s)
6125     const char *s;
6126 {
6127   const int len = strlen (s);
6128   return
6129     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6130 }
6131 #endif /* _REGEX_RE_COMP */
6132 
6133 /* POSIX.2 functions.  Don't define these for Emacs.  */
6134 
6135 #ifndef emacs
6136 
6137 /* regcomp takes a regular expression as a string and compiles it.
6138 
6139    PREG is a regex_t *.  We do not expect any fields to be initialized,
6140    since POSIX says we shouldn't.  Thus, we set
6141 
6142      `buffer' to the compiled pattern;
6143      `used' to the length of the compiled pattern;
6144      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6145        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6146        RE_SYNTAX_POSIX_BASIC;
6147      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6148      `fastmap' and `fastmap_accurate' to zero;
6149      `re_nsub' to the number of subexpressions in PATTERN.
6150 
6151    PATTERN is the address of the pattern string.
6152 
6153    CFLAGS is a series of bits which affect compilation.
6154 
6155      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6156      use POSIX basic syntax.
6157 
6158      If REG_NEWLINE is set, then . and [^...] don't match newline.
6159      Also, regexec will try a match beginning after every newline.
6160 
6161      If REG_ICASE is set, then we considers upper- and lowercase
6162      versions of letters to be equivalent when matching.
6163 
6164      If REG_NOSUB is set, then when PREG is passed to regexec, that
6165      routine will report only success or failure, and nothing about the
6166      registers.
6167 
6168    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6169    the return codes and their meanings.)  */
6170 
6171 int
regcomp(preg,pattern,cflags)6172 regcomp (preg, pattern, cflags)
6173     regex_t *preg;
6174     const char *pattern;
6175     int cflags;
6176 {
6177   reg_errcode_t ret;
6178   unsigned syntax
6179     = (cflags & REG_EXTENDED) ?
6180       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6181 
6182   /* regex_compile will allocate the space for the compiled pattern.  */
6183   preg->buffer = 0;
6184   preg->allocated = 0;
6185   preg->used = 0;
6186 
6187   /* Don't bother to use a fastmap when searching.  This simplifies the
6188      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6189      characters after newlines into the fastmap.  This way, we just try
6190      every character.  */
6191   preg->fastmap = 0;
6192 
6193   if (cflags & REG_ICASE)
6194     {
6195       unsigned i;
6196 
6197       preg->translate
6198 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6199 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
6200       if (preg->translate == NULL)
6201         return (int) REG_ESPACE;
6202 
6203       /* Map uppercase characters to corresponding lowercase ones.  */
6204       for (i = 0; i < CHAR_SET_SIZE; i++)
6205         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
6206     }
6207   else
6208     preg->translate = NULL;
6209 
6210   /* If REG_NEWLINE is set, newlines are treated differently.  */
6211   if (cflags & REG_NEWLINE)
6212     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6213       syntax &= ~RE_DOT_NEWLINE;
6214       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6215       /* It also changes the matching behavior.  */
6216       preg->newline_anchor = 1;
6217     }
6218   else
6219     preg->newline_anchor = 0;
6220 
6221   preg->no_sub = !!(cflags & REG_NOSUB);
6222 
6223   /* POSIX says a null character in the pattern terminates it, so we
6224      can use strlen here in compiling the pattern.  */
6225   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6226 
6227   /* POSIX doesn't distinguish between an unmatched open-group and an
6228      unmatched close-group: both are REG_EPAREN.  */
6229   if (ret == REG_ERPAREN) ret = REG_EPAREN;
6230 
6231   return (int) ret;
6232 }
6233 
6234 
6235 /* regexec searches for a given pattern, specified by PREG, in the
6236    string STRING.
6237 
6238    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6239    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6240    least NMATCH elements, and we set them to the offsets of the
6241    corresponding matched substrings.
6242 
6243    EFLAGS specifies `execution flags' which affect matching: if
6244    REG_NOTBOL is set, then ^ does not match at the beginning of the
6245    string; if REG_NOTEOL is set, then $ does not match at the end.
6246 
6247    We return 0 if we find a match and REG_NOMATCH if not.  */
6248 
6249 int
regexec(preg,string,nmatch,pmatch,eflags)6250 regexec (preg, string, nmatch, pmatch, eflags)
6251     const regex_t *preg;
6252     const char *string;
6253     size_t nmatch;
6254     regmatch_t pmatch[];
6255     int eflags;
6256 {
6257   int ret;
6258   struct re_registers regs;
6259   regex_t private_preg;
6260   int len = strlen (string);
6261   boolean want_reg_info = !preg->no_sub && nmatch > 0;
6262 
6263   private_preg = *preg;
6264 
6265   private_preg.not_bol = !!(eflags & REG_NOTBOL);
6266   private_preg.not_eol = !!(eflags & REG_NOTEOL);
6267 
6268   /* The user has told us exactly how many registers to return
6269      information about, via `nmatch'.  We have to pass that on to the
6270      matching routines.  */
6271   private_preg.regs_allocated = REGS_FIXED;
6272 
6273   if (want_reg_info)
6274     {
6275       regs.num_regs = nmatch;
6276       regs.start = TALLOC (nmatch, regoff_t);
6277       regs.end = TALLOC (nmatch, regoff_t);
6278       if (regs.start == NULL || regs.end == NULL)
6279         return (int) REG_NOMATCH;
6280     }
6281 
6282   /* Perform the searching operation.  */
6283   ret = re_search (&private_preg, string, len,
6284                    /* start: */ 0, /* range: */ len,
6285                    want_reg_info ? &regs : (struct re_registers *) 0);
6286 
6287   /* Copy the register information to the POSIX structure.  */
6288   if (want_reg_info)
6289     {
6290       if (ret >= 0)
6291         {
6292           unsigned r;
6293 
6294           for (r = 0; r < nmatch; r++)
6295             {
6296               pmatch[r].rm_so = regs.start[r];
6297               pmatch[r].rm_eo = regs.end[r];
6298             }
6299         }
6300 
6301       /* If we needed the temporary register info, free the space now.  */
6302       free (regs.start);
6303       free (regs.end);
6304     }
6305 
6306   /* We want zero return to mean success, unlike `re_search'.  */
6307   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6308 }
6309 
6310 
6311 /* Returns a message corresponding to an error code, ERRCODE, returned
6312    from either regcomp or regexec.   We don't use PREG here.  */
6313 
6314 size_t
regerror(errcode,preg,errbuf,errbuf_size)6315 regerror (errcode, preg, errbuf, errbuf_size)
6316     int errcode;
6317     const regex_t *preg;
6318     char *errbuf;
6319     size_t errbuf_size;
6320 {
6321   const char *msg;
6322   size_t msg_size;
6323 
6324   if (errcode < 0
6325       || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6326     /* Only error codes returned by the rest of the code should be passed
6327        to this routine.  If we are given anything else, or if other regex
6328        code generates an invalid error code, then the program has a bug.
6329        Dump core so we can fix it.  */
6330     abort ();
6331 
6332   msg = gettext (re_error_msgid[errcode]);
6333 
6334   msg_size = strlen (msg) + 1; /* Includes the null.  */
6335 
6336   if (errbuf_size != 0)
6337     {
6338       if (msg_size > errbuf_size)
6339         {
6340           strncpy (errbuf, msg, errbuf_size - 1);
6341           errbuf[errbuf_size - 1] = 0;
6342         }
6343       else
6344         strcpy (errbuf, msg);
6345     }
6346 
6347   return msg_size;
6348 }
6349 
6350 
6351 /* Free dynamically allocated space used by PREG.  */
6352 
6353 void
regfree(preg)6354 regfree (preg)
6355     regex_t *preg;
6356 {
6357   if (preg->buffer != NULL)
6358     free (preg->buffer);
6359   preg->buffer = NULL;
6360 
6361   preg->allocated = 0;
6362   preg->used = 0;
6363 
6364   if (preg->fastmap != NULL)
6365     free (preg->fastmap);
6366   preg->fastmap = NULL;
6367   preg->fastmap_accurate = 0;
6368 
6369   if (preg->translate != NULL)
6370     free (preg->translate);
6371   preg->translate = NULL;
6372 }
6373 
6374 #endif /* not emacs  */
6375