xref: /openbsd-src/gnu/usr.bin/cvs/lib/regex.c (revision a4afd6dad3fba28f80e70208181c06c482259988)
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /* Trying to define this in the makefile would get hairy, unless we can
23    more gracefully do it for NT, OS/2, unix, etc.  */
24 #define REGEX_MALLOC 1
25 
26 /* AIX requires this to be the first thing in the file. */
27 #if defined (_AIX) && !defined (REGEX_MALLOC)
28   #pragma alloca
29 #endif
30 
31 #define _GNU_SOURCE
32 
33 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
34 #include <sys/types.h>
35 
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39 
40 /* The `emacs' switch turns on certain matching commands
41    that make sense only in Emacs. */
42 #ifdef emacs
43 
44 #include "lisp.h"
45 #include "buffer.h"
46 #include "syntax.h"
47 
48 /* Emacs uses `NULL' as a predicate.  */
49 #undef NULL
50 
51 #else  /* not emacs */
52 
53 /* We used to test for `BSTRING' here, but only GCC and Emacs define
54    `BSTRING', as far as I know, and neither of them use this code.  */
55 #if HAVE_STRING_H || STDC_HEADERS
56 #include <string.h>
57 #ifndef bcmp
58 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
59 #endif
60 #ifndef bcopy
61 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
62 #endif
63 #ifndef bzero
64 #define bzero(s, n)	memset ((s), 0, (n))
65 #endif
66 #else
67 #include <strings.h>
68 #endif
69 
70 #ifdef STDC_HEADERS
71 #include <stdlib.h>
72 #else
73 char *malloc ();
74 char *realloc ();
75 #endif
76 
77 
78 /* Define the syntax stuff for \<, \>, etc.  */
79 
80 /* This must be nonzero for the wordchar and notwordchar pattern
81    commands in re_match_2.  */
82 #ifndef Sword
83 #define Sword 1
84 #endif
85 
86 #ifdef SYNTAX_TABLE
87 
88 extern char *re_syntax_table;
89 
90 #else /* not SYNTAX_TABLE */
91 
92 /* How many characters in the character set.  */
93 #define CHAR_SET_SIZE 256
94 
95 static char re_syntax_table[CHAR_SET_SIZE];
96 
97 static void
98 init_syntax_once ()
99 {
100    register int c;
101    static int done = 0;
102 
103    if (done)
104      return;
105 
106    bzero (re_syntax_table, sizeof re_syntax_table);
107 
108    for (c = 'a'; c <= 'z'; c++)
109      re_syntax_table[c] = Sword;
110 
111    for (c = 'A'; c <= 'Z'; c++)
112      re_syntax_table[c] = Sword;
113 
114    for (c = '0'; c <= '9'; c++)
115      re_syntax_table[c] = Sword;
116 
117    re_syntax_table['_'] = Sword;
118 
119    done = 1;
120 }
121 
122 #endif /* not SYNTAX_TABLE */
123 
124 #define SYNTAX(c) re_syntax_table[c]
125 
126 #endif /* not emacs */
127 
128 /* Get the interface, including the syntax bits.  */
129 #include "regex.h"
130 
131 /* isalpha etc. are used for the character classes.  */
132 #include <ctype.h>
133 
134 #ifndef isascii
135 #define isascii(c) 1
136 #endif
137 
138 #ifdef isblank
139 #define ISBLANK(c) (isascii (c) && isblank (c))
140 #else
141 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
142 #endif
143 #ifdef isgraph
144 #define ISGRAPH(c) (isascii (c) && isgraph (c))
145 #else
146 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
147 #endif
148 
149 #define ISPRINT(c) (isascii (c) && isprint (c))
150 #define ISDIGIT(c) (isascii (c) && isdigit (c))
151 #define ISALNUM(c) (isascii (c) && isalnum (c))
152 #define ISALPHA(c) (isascii (c) && isalpha (c))
153 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
154 #define ISLOWER(c) (isascii (c) && islower (c))
155 #define ISPUNCT(c) (isascii (c) && ispunct (c))
156 #define ISSPACE(c) (isascii (c) && isspace (c))
157 #define ISUPPER(c) (isascii (c) && isupper (c))
158 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
159 
160 #ifndef NULL
161 #define NULL 0
162 #endif
163 
164 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
165    since ours (we hope) works properly with all combinations of
166    machines, compilers, `char' and `unsigned char' argument types.
167    (Per Bothner suggested the basic approach.)  */
168 #undef SIGN_EXTEND_CHAR
169 #if __STDC__
170 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
171 #else  /* not __STDC__ */
172 /* As in Harbison and Steele.  */
173 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
174 #endif
175 
176 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
177    use `alloca' instead of `malloc'.  This is because using malloc in
178    re_search* or re_match* could cause memory leaks when C-g is used in
179    Emacs; also, malloc is slower and causes storage fragmentation.  On
180    the other hand, malloc is more portable, and easier to debug.
181 
182    Because we sometimes use alloca, some routines have to be macros,
183    not functions -- `alloca'-allocated space disappears at the end of the
184    function it is called in.  */
185 
186 #ifdef REGEX_MALLOC
187 
188 #define REGEX_ALLOCATE malloc
189 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
190 
191 #else /* not REGEX_MALLOC  */
192 
193 /* Emacs already defines alloca, sometimes.  */
194 #ifndef alloca
195 
196 /* Make alloca work the best possible way.  */
197 #ifdef __GNUC__
198 #define alloca __builtin_alloca
199 #else /* not __GNUC__ */
200 #if HAVE_ALLOCA_H
201 #include <alloca.h>
202 #else /* not __GNUC__ or HAVE_ALLOCA_H */
203 #ifndef _AIX /* Already did AIX, up at the top.  */
204 char *alloca ();
205 #endif /* not _AIX */
206 #endif /* not HAVE_ALLOCA_H */
207 #endif /* not __GNUC__ */
208 
209 #endif /* not alloca */
210 
211 #define REGEX_ALLOCATE alloca
212 
213 /* Assumes a `char *destination' variable.  */
214 #define REGEX_REALLOCATE(source, osize, nsize)				\
215   (destination = (char *) alloca (nsize),				\
216    bcopy (source, destination, osize),					\
217    destination)
218 
219 #endif /* not REGEX_MALLOC */
220 
221 
222 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
223    `string1' or just past its end.  This works if PTR is NULL, which is
224    a good thing.  */
225 #define FIRST_STRING_P(ptr) 					\
226   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
227 
228 /* (Re)Allocate N items of type T using malloc, or fail.  */
229 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
230 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
231 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
232 
233 #define BYTEWIDTH 8 /* In bits.  */
234 
235 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
236 
237 /* The Mac CodeWarrier9 compiler defines MAX and MIN.  */
238 #ifndef MAX
239 #define MAX(a, b) ((a) > (b) ? (a) : (b))
240 #endif
241 #ifndef MIN
242 #define MIN(a, b) ((a) < (b) ? (a) : (b))
243 #endif
244 
245 typedef char boolean;
246 #define false 0
247 #define true 1
248 
249 /* These are the command codes that appear in compiled regular
250    expressions.  Some opcodes are followed by argument bytes.  A
251    command code can specify any interpretation whatsoever for its
252    arguments.  Zero bytes may appear in the compiled regular expression.
253 
254    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
255    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
256    `exactn' we use here must also be 1.  */
257 
258 typedef enum
259 {
260   no_op = 0,
261 
262         /* Followed by one byte giving n, then by n literal bytes.  */
263   exactn = 1,
264 
265         /* Matches any (more or less) character.  */
266   anychar,
267 
268         /* Matches any one char belonging to specified set.  First
269            following byte is number of bitmap bytes.  Then come bytes
270            for a bitmap saying which chars are in.  Bits in each byte
271            are ordered low-bit-first.  A character is in the set if its
272            bit is 1.  A character too large to have a bit in the map is
273            automatically not in the set.  */
274   charset,
275 
276         /* Same parameters as charset, but match any character that is
277            not one of those specified.  */
278   charset_not,
279 
280         /* Start remembering the text that is matched, for storing in a
281            register.  Followed by one byte with the register number, in
282            the range 0 to one less than the pattern buffer's re_nsub
283            field.  Then followed by one byte with the number of groups
284            inner to this one.  (This last has to be part of the
285            start_memory only because we need it in the on_failure_jump
286            of re_match_2.)  */
287   start_memory,
288 
289         /* Stop remembering the text that is matched and store it in a
290            memory register.  Followed by one byte with the register
291            number, in the range 0 to one less than `re_nsub' in the
292            pattern buffer, and one byte with the number of inner groups,
293            just like `start_memory'.  (We need the number of inner
294            groups here because we don't have any easy way of finding the
295            corresponding start_memory when we're at a stop_memory.)  */
296   stop_memory,
297 
298         /* Match a duplicate of something remembered. Followed by one
299            byte containing the register number.  */
300   duplicate,
301 
302         /* Fail unless at beginning of line.  */
303   begline,
304 
305         /* Fail unless at end of line.  */
306   endline,
307 
308         /* Succeeds if at beginning of buffer (if emacs) or at beginning
309            of string to be matched (if not).  */
310   begbuf,
311 
312         /* Analogously, for end of buffer/string.  */
313   endbuf,
314 
315         /* Followed by two byte relative address to which to jump.  */
316   jump,
317 
318 	/* Same as jump, but marks the end of an alternative.  */
319   jump_past_alt,
320 
321         /* Followed by two-byte relative address of place to resume at
322            in case of failure.  */
323   on_failure_jump,
324 
325         /* Like on_failure_jump, but pushes a placeholder instead of the
326            current string position when executed.  */
327   on_failure_keep_string_jump,
328 
329         /* Throw away latest failure point and then jump to following
330            two-byte relative address.  */
331   pop_failure_jump,
332 
333         /* Change to pop_failure_jump if know won't have to backtrack to
334            match; otherwise change to jump.  This is used to jump
335            back to the beginning of a repeat.  If what follows this jump
336            clearly won't match what the repeat does, such that we can be
337            sure that there is no use backtracking out of repetitions
338            already matched, then we change it to a pop_failure_jump.
339            Followed by two-byte address.  */
340   maybe_pop_jump,
341 
342         /* Jump to following two-byte address, and push a dummy failure
343            point. This failure point will be thrown away if an attempt
344            is made to use it for a failure.  A `+' construct makes this
345            before the first repeat.  Also used as an intermediary kind
346            of jump when compiling an alternative.  */
347   dummy_failure_jump,
348 
349 	/* Push a dummy failure point and continue.  Used at the end of
350 	   alternatives.  */
351   push_dummy_failure,
352 
353         /* Followed by two-byte relative address and two-byte number n.
354            After matching N times, jump to the address upon failure.  */
355   succeed_n,
356 
357         /* Followed by two-byte relative address, and two-byte number n.
358            Jump to the address N times, then fail.  */
359   jump_n,
360 
361         /* Set the following two-byte relative address to the
362            subsequent two-byte number.  The address *includes* the two
363            bytes of number.  */
364   set_number_at,
365 
366   wordchar,	/* Matches any word-constituent character.  */
367   notwordchar,	/* Matches any char that is not a word-constituent.  */
368 
369   wordbeg,	/* Succeeds if at word beginning.  */
370   wordend,	/* Succeeds if at word end.  */
371 
372   wordbound,	/* Succeeds if at a word boundary.  */
373   notwordbound	/* Succeeds if not at a word boundary.  */
374 
375 #ifdef emacs
376   ,before_dot,	/* Succeeds if before point.  */
377   at_dot,	/* Succeeds if at point.  */
378   after_dot,	/* Succeeds if after point.  */
379 
380 	/* Matches any character whose syntax is specified.  Followed by
381            a byte which contains a syntax code, e.g., Sword.  */
382   syntaxspec,
383 
384 	/* Matches any character whose syntax is not that specified.  */
385   notsyntaxspec
386 #endif /* emacs */
387 } re_opcode_t;
388 
389 /* Common operations on the compiled pattern.  */
390 
391 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
392 
393 #define STORE_NUMBER(destination, number)				\
394   do {									\
395     (destination)[0] = (number) & 0377;					\
396     (destination)[1] = (number) >> 8;					\
397   } while (0)
398 
399 /* Same as STORE_NUMBER, except increment DESTINATION to
400    the byte after where the number is stored.  Therefore, DESTINATION
401    must be an lvalue.  */
402 
403 #define STORE_NUMBER_AND_INCR(destination, number)			\
404   do {									\
405     STORE_NUMBER (destination, number);					\
406     (destination) += 2;							\
407   } while (0)
408 
409 /* Put into DESTINATION a number stored in two contiguous bytes starting
410    at SOURCE.  */
411 
412 #define EXTRACT_NUMBER(destination, source)				\
413   do {									\
414     (destination) = *(source) & 0377;					\
415     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
416   } while (0)
417 
418 #ifdef DEBUG
419 static void
420 extract_number (dest, source)
421     int *dest;
422     unsigned char *source;
423 {
424   int temp = SIGN_EXTEND_CHAR (*(source + 1));
425   *dest = *source & 0377;
426   *dest += temp << 8;
427 }
428 
429 #ifndef EXTRACT_MACROS /* To debug the macros.  */
430 #undef EXTRACT_NUMBER
431 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
432 #endif /* not EXTRACT_MACROS */
433 
434 #endif /* DEBUG */
435 
436 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
437    SOURCE must be an lvalue.  */
438 
439 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
440   do {									\
441     EXTRACT_NUMBER (destination, source);				\
442     (source) += 2; 							\
443   } while (0)
444 
445 #ifdef DEBUG
446 static void
447 extract_number_and_incr (destination, source)
448     int *destination;
449     unsigned char **source;
450 {
451   extract_number (destination, *source);
452   *source += 2;
453 }
454 
455 #ifndef EXTRACT_MACROS
456 #undef EXTRACT_NUMBER_AND_INCR
457 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
458   extract_number_and_incr (&dest, &src)
459 #endif /* not EXTRACT_MACROS */
460 
461 #endif /* DEBUG */
462 
463 /* If DEBUG is defined, Regex prints many voluminous messages about what
464    it is doing (if the variable `debug' is nonzero).  If linked with the
465    main program in `iregex.c', you can enter patterns and strings
466    interactively.  And if linked with the main program in `main.c' and
467    the other test files, you can run the already-written tests.  */
468 
469 #ifdef DEBUG
470 
471 /* We use standard I/O for debugging.  */
472 #include <stdio.h>
473 
474 /* It is useful to test things that ``must'' be true when debugging.  */
475 #include <assert.h>
476 
477 static int debug = 0;
478 
479 #define DEBUG_STATEMENT(e) e
480 #define DEBUG_PRINT1(x) if (debug) printf (x)
481 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
482 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
483 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
484 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
485   if (debug) print_partial_compiled_pattern (s, e)
486 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
487   if (debug) print_double_string (w, s1, sz1, s2, sz2)
488 
489 
490 extern void printchar ();
491 
492 /* Print the fastmap in human-readable form.  */
493 
494 void
495 print_fastmap (fastmap)
496     char *fastmap;
497 {
498   unsigned was_a_range = 0;
499   unsigned i = 0;
500 
501   while (i < (1 << BYTEWIDTH))
502     {
503       if (fastmap[i++])
504 	{
505 	  was_a_range = 0;
506           printchar (i - 1);
507           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
508             {
509               was_a_range = 1;
510               i++;
511             }
512 	  if (was_a_range)
513             {
514               printf ("-");
515               printchar (i - 1);
516             }
517         }
518     }
519   putchar ('\n');
520 }
521 
522 
523 /* Print a compiled pattern string in human-readable form, starting at
524    the START pointer into it and ending just before the pointer END.  */
525 
526 void
527 print_partial_compiled_pattern (start, end)
528     unsigned char *start;
529     unsigned char *end;
530 {
531   int mcnt, mcnt2;
532   unsigned char *p = start;
533   unsigned char *pend = end;
534 
535   if (start == NULL)
536     {
537       printf ("(null)\n");
538       return;
539     }
540 
541   /* Loop over pattern commands.  */
542   while (p < pend)
543     {
544       switch ((re_opcode_t) *p++)
545 	{
546         case no_op:
547           printf ("/no_op");
548           break;
549 
550 	case exactn:
551 	  mcnt = *p++;
552           printf ("/exactn/%d", mcnt);
553           do
554 	    {
555               putchar ('/');
556 	      printchar (*p++);
557             }
558           while (--mcnt);
559           break;
560 
561 	case start_memory:
562           mcnt = *p++;
563           printf ("/start_memory/%d/%d", mcnt, *p++);
564           break;
565 
566 	case stop_memory:
567           mcnt = *p++;
568 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
569           break;
570 
571 	case duplicate:
572 	  printf ("/duplicate/%d", *p++);
573 	  break;
574 
575 	case anychar:
576 	  printf ("/anychar");
577 	  break;
578 
579 	case charset:
580         case charset_not:
581           {
582             register int c;
583 
584             printf ("/charset%s",
585 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
586 
587             assert (p + *p < pend);
588 
589             for (c = 0; c < *p; c++)
590               {
591                 unsigned bit;
592                 unsigned char map_byte = p[1 + c];
593 
594                 putchar ('/');
595 
596 		for (bit = 0; bit < BYTEWIDTH; bit++)
597                   if (map_byte & (1 << bit))
598                     printchar (c * BYTEWIDTH + bit);
599               }
600 	    p += 1 + *p;
601 	    break;
602 	  }
603 
604 	case begline:
605 	  printf ("/begline");
606           break;
607 
608 	case endline:
609           printf ("/endline");
610           break;
611 
612 	case on_failure_jump:
613           extract_number_and_incr (&mcnt, &p);
614   	  printf ("/on_failure_jump/0/%d", mcnt);
615           break;
616 
617 	case on_failure_keep_string_jump:
618           extract_number_and_incr (&mcnt, &p);
619   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
620           break;
621 
622 	case dummy_failure_jump:
623           extract_number_and_incr (&mcnt, &p);
624   	  printf ("/dummy_failure_jump/0/%d", mcnt);
625           break;
626 
627 	case push_dummy_failure:
628           printf ("/push_dummy_failure");
629           break;
630 
631         case maybe_pop_jump:
632           extract_number_and_incr (&mcnt, &p);
633   	  printf ("/maybe_pop_jump/0/%d", mcnt);
634 	  break;
635 
636         case pop_failure_jump:
637 	  extract_number_and_incr (&mcnt, &p);
638   	  printf ("/pop_failure_jump/0/%d", mcnt);
639 	  break;
640 
641         case jump_past_alt:
642 	  extract_number_and_incr (&mcnt, &p);
643   	  printf ("/jump_past_alt/0/%d", mcnt);
644 	  break;
645 
646         case jump:
647 	  extract_number_and_incr (&mcnt, &p);
648   	  printf ("/jump/0/%d", mcnt);
649 	  break;
650 
651         case succeed_n:
652           extract_number_and_incr (&mcnt, &p);
653           extract_number_and_incr (&mcnt2, &p);
654  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
655           break;
656 
657         case jump_n:
658           extract_number_and_incr (&mcnt, &p);
659           extract_number_and_incr (&mcnt2, &p);
660  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
661           break;
662 
663         case set_number_at:
664           extract_number_and_incr (&mcnt, &p);
665           extract_number_and_incr (&mcnt2, &p);
666  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
667           break;
668 
669         case wordbound:
670 	  printf ("/wordbound");
671 	  break;
672 
673 	case notwordbound:
674 	  printf ("/notwordbound");
675           break;
676 
677 	case wordbeg:
678 	  printf ("/wordbeg");
679 	  break;
680 
681 	case wordend:
682 	  printf ("/wordend");
683 
684 #ifdef emacs
685 	case before_dot:
686 	  printf ("/before_dot");
687           break;
688 
689 	case at_dot:
690 	  printf ("/at_dot");
691           break;
692 
693 	case after_dot:
694 	  printf ("/after_dot");
695           break;
696 
697 	case syntaxspec:
698           printf ("/syntaxspec");
699 	  mcnt = *p++;
700 	  printf ("/%d", mcnt);
701           break;
702 
703 	case notsyntaxspec:
704           printf ("/notsyntaxspec");
705 	  mcnt = *p++;
706 	  printf ("/%d", mcnt);
707 	  break;
708 #endif /* emacs */
709 
710 	case wordchar:
711 	  printf ("/wordchar");
712           break;
713 
714 	case notwordchar:
715 	  printf ("/notwordchar");
716           break;
717 
718 	case begbuf:
719 	  printf ("/begbuf");
720           break;
721 
722 	case endbuf:
723 	  printf ("/endbuf");
724           break;
725 
726         default:
727           printf ("?%d", *(p-1));
728 	}
729     }
730   printf ("/\n");
731 }
732 
733 
734 void
735 print_compiled_pattern (bufp)
736     struct re_pattern_buffer *bufp;
737 {
738   unsigned char *buffer = bufp->buffer;
739 
740   print_partial_compiled_pattern (buffer, buffer + bufp->used);
741   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
742 
743   if (bufp->fastmap_accurate && bufp->fastmap)
744     {
745       printf ("fastmap: ");
746       print_fastmap (bufp->fastmap);
747     }
748 
749   printf ("re_nsub: %d\t", bufp->re_nsub);
750   printf ("regs_alloc: %d\t", bufp->regs_allocated);
751   printf ("can_be_null: %d\t", bufp->can_be_null);
752   printf ("newline_anchor: %d\n", bufp->newline_anchor);
753   printf ("no_sub: %d\t", bufp->no_sub);
754   printf ("not_bol: %d\t", bufp->not_bol);
755   printf ("not_eol: %d\t", bufp->not_eol);
756   printf ("syntax: %d\n", bufp->syntax);
757   /* Perhaps we should print the translate table?  */
758 }
759 
760 
761 void
762 print_double_string (where, string1, size1, string2, size2)
763     const char *where;
764     const char *string1;
765     const char *string2;
766     int size1;
767     int size2;
768 {
769   unsigned this_char;
770 
771   if (where == NULL)
772     printf ("(null)");
773   else
774     {
775       if (FIRST_STRING_P (where))
776         {
777           for (this_char = where - string1; this_char < size1; this_char++)
778             printchar (string1[this_char]);
779 
780           where = string2;
781         }
782 
783       for (this_char = where - string2; this_char < size2; this_char++)
784         printchar (string2[this_char]);
785     }
786 }
787 
788 #else /* not DEBUG */
789 
790 #undef assert
791 #define assert(e)
792 
793 #define DEBUG_STATEMENT(e)
794 #define DEBUG_PRINT1(x)
795 #define DEBUG_PRINT2(x1, x2)
796 #define DEBUG_PRINT3(x1, x2, x3)
797 #define DEBUG_PRINT4(x1, x2, x3, x4)
798 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
799 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
800 
801 #endif /* not DEBUG */
802 
803 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
804    also be assigned to arbitrarily: each pattern buffer stores its own
805    syntax, so it can be changed between regex compilations.  */
806 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
807 
808 
809 /* Specify the precise syntax of regexps for compilation.  This provides
810    for compatibility for various utilities which historically have
811    different, incompatible syntaxes.
812 
813    The argument SYNTAX is a bit mask comprised of the various bits
814    defined in regex.h.  We return the old syntax.  */
815 
816 reg_syntax_t
817 re_set_syntax (syntax)
818     reg_syntax_t syntax;
819 {
820   reg_syntax_t ret = re_syntax_options;
821 
822   re_syntax_options = syntax;
823   return ret;
824 }
825 
826 /* This table gives an error message for each of the error codes listed
827    in regex.h.  Obviously the order here has to be same as there.  */
828 
829 static const char *re_error_msg[] =
830   { NULL,					/* REG_NOERROR */
831     "No match",					/* REG_NOMATCH */
832     "Invalid regular expression",		/* REG_BADPAT */
833     "Invalid collation character",		/* REG_ECOLLATE */
834     "Invalid character class name",		/* REG_ECTYPE */
835     "Trailing backslash",			/* REG_EESCAPE */
836     "Invalid back reference",			/* REG_ESUBREG */
837     "Unmatched [ or [^",			/* REG_EBRACK */
838     "Unmatched ( or \\(",			/* REG_EPAREN */
839     "Unmatched \\{",				/* REG_EBRACE */
840     "Invalid content of \\{\\}",		/* REG_BADBR */
841     "Invalid range end",			/* REG_ERANGE */
842     "Memory exhausted",				/* REG_ESPACE */
843     "Invalid preceding regular expression",	/* REG_BADRPT */
844     "Premature end of regular expression",	/* REG_EEND */
845     "Regular expression too big",		/* REG_ESIZE */
846     "Unmatched ) or \\)",			/* REG_ERPAREN */
847   };
848 
849 /* Subroutine declarations and macros for regex_compile.  */
850 
851 static void store_op1 (), store_op2 ();
852 static void insert_op1 (), insert_op2 ();
853 static boolean at_begline_loc_p (), at_endline_loc_p ();
854 static boolean group_in_compile_stack ();
855 static reg_errcode_t compile_range ();
856 
857 /* Fetch the next character in the uncompiled pattern---translating it
858    if necessary.  Also cast from a signed character in the constant
859    string passed to us by the user to an unsigned char that we can use
860    as an array index (in, e.g., `translate').  */
861 #define PATFETCH(c)							\
862   do {if (p == pend) return REG_EEND;					\
863     c = (unsigned char) *p++;						\
864     if (translate) c = translate[c]; 					\
865   } while (0)
866 
867 /* Fetch the next character in the uncompiled pattern, with no
868    translation.  */
869 #define PATFETCH_RAW(c)							\
870   do {if (p == pend) return REG_EEND;					\
871     c = (unsigned char) *p++; 						\
872   } while (0)
873 
874 /* Go backwards one character in the pattern.  */
875 #define PATUNFETCH p--
876 
877 
878 /* If `translate' is non-null, return translate[D], else just D.  We
879    cast the subscript to translate because some data is declared as
880    `char *', to avoid warnings when a string constant is passed.  But
881    when we use a character as a subscript we must make it unsigned.  */
882 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
883 
884 
885 /* Macros for outputting the compiled pattern into `buffer'.  */
886 
887 /* If the buffer isn't allocated when it comes in, use this.  */
888 #define INIT_BUF_SIZE  32
889 
890 /* Make sure we have at least N more bytes of space in buffer.  */
891 #define GET_BUFFER_SPACE(n)						\
892     while (b - bufp->buffer + (n) > bufp->allocated)			\
893       EXTEND_BUFFER ()
894 
895 /* Make sure we have one more byte of buffer space and then add C to it.  */
896 #define BUF_PUSH(c)							\
897   do {									\
898     GET_BUFFER_SPACE (1);						\
899     *b++ = (unsigned char) (c);						\
900   } while (0)
901 
902 
903 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
904 #define BUF_PUSH_2(c1, c2)						\
905   do {									\
906     GET_BUFFER_SPACE (2);						\
907     *b++ = (unsigned char) (c1);					\
908     *b++ = (unsigned char) (c2);					\
909   } while (0)
910 
911 
912 /* As with BUF_PUSH_2, except for three bytes.  */
913 #define BUF_PUSH_3(c1, c2, c3)						\
914   do {									\
915     GET_BUFFER_SPACE (3);						\
916     *b++ = (unsigned char) (c1);					\
917     *b++ = (unsigned char) (c2);					\
918     *b++ = (unsigned char) (c3);					\
919   } while (0)
920 
921 
922 /* Store a jump with opcode OP at LOC to location TO.  We store a
923    relative address offset by the three bytes the jump itself occupies.  */
924 #define STORE_JUMP(op, loc, to) \
925   store_op1 (op, loc, (to) - (loc) - 3)
926 
927 /* Likewise, for a two-argument jump.  */
928 #define STORE_JUMP2(op, loc, to, arg) \
929   store_op2 (op, loc, (to) - (loc) - 3, arg)
930 
931 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
932 #define INSERT_JUMP(op, loc, to) \
933   insert_op1 (op, loc, (to) - (loc) - 3, b)
934 
935 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
936 #define INSERT_JUMP2(op, loc, to, arg) \
937   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
938 
939 
940 /* This is not an arbitrary limit: the arguments which represent offsets
941    into the pattern are two bytes long.  So if 2^16 bytes turns out to
942    be too small, many things would have to change.  */
943 #define MAX_BUF_SIZE (1L << 16)
944 
945 
946 /* Extend the buffer by twice its current size via realloc and
947    reset the pointers that pointed into the old block to point to the
948    correct places in the new one.  If extending the buffer results in it
949    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
950 #define EXTEND_BUFFER()							\
951   do { 									\
952     unsigned char *old_buffer = bufp->buffer;				\
953     if (bufp->allocated == MAX_BUF_SIZE) 				\
954       return REG_ESIZE;							\
955     bufp->allocated <<= 1;						\
956     if (bufp->allocated > MAX_BUF_SIZE)					\
957       bufp->allocated = MAX_BUF_SIZE; 					\
958     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
959     if (bufp->buffer == NULL)						\
960       return REG_ESPACE;						\
961     /* If the buffer moved, move all the pointers into it.  */		\
962     if (old_buffer != bufp->buffer)					\
963       {									\
964         b = (b - old_buffer) + bufp->buffer;				\
965         begalt = (begalt - old_buffer) + bufp->buffer;			\
966         if (fixup_alt_jump)						\
967           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
968         if (laststart)							\
969           laststart = (laststart - old_buffer) + bufp->buffer;		\
970         if (pending_exact)						\
971           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
972       }									\
973   } while (0)
974 
975 
976 /* Since we have one byte reserved for the register number argument to
977    {start,stop}_memory, the maximum number of groups we can report
978    things about is what fits in that byte.  */
979 #define MAX_REGNUM 255
980 
981 /* But patterns can have more than `MAX_REGNUM' registers.  We just
982    ignore the excess.  */
983 typedef unsigned regnum_t;
984 
985 
986 /* Macros for the compile stack.  */
987 
988 /* Since offsets can go either forwards or backwards, this type needs to
989    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
990 typedef int pattern_offset_t;
991 
992 typedef struct
993 {
994   pattern_offset_t begalt_offset;
995   pattern_offset_t fixup_alt_jump;
996   pattern_offset_t inner_group_offset;
997   pattern_offset_t laststart_offset;
998   regnum_t regnum;
999 } compile_stack_elt_t;
1000 
1001 
1002 typedef struct
1003 {
1004   compile_stack_elt_t *stack;
1005   unsigned size;
1006   unsigned avail;			/* Offset of next open position.  */
1007 } compile_stack_type;
1008 
1009 
1010 #define INIT_COMPILE_STACK_SIZE 32
1011 
1012 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1013 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1014 
1015 /* The next available element.  */
1016 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1017 
1018 
1019 /* Set the bit for character C in a list.  */
1020 #define SET_LIST_BIT(c)                               \
1021   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1022    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1023 
1024 
1025 /* Get the next unsigned number in the uncompiled pattern.  */
1026 #define GET_UNSIGNED_NUMBER(num) 					\
1027   { if (p != pend)							\
1028      {									\
1029        PATFETCH (c); 							\
1030        while (ISDIGIT (c)) 						\
1031          { 								\
1032            if (num < 0)							\
1033               num = 0;							\
1034            num = num * 10 + c - '0'; 					\
1035            if (p == pend) 						\
1036               break; 							\
1037            PATFETCH (c);						\
1038          } 								\
1039        } 								\
1040     }
1041 
1042 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1043 
1044 #define IS_CHAR_CLASS(string)						\
1045    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1046     || STREQ (string, "lower") || STREQ (string, "digit")		\
1047     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1048     || STREQ (string, "space") || STREQ (string, "print")		\
1049     || STREQ (string, "punct") || STREQ (string, "graph")		\
1050     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1051 
1052 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1053    Returns one of error codes defined in `regex.h', or zero for success.
1054 
1055    Assumes the `allocated' (and perhaps `buffer') and `translate'
1056    fields are set in BUFP on entry.
1057 
1058    If it succeeds, results are put in BUFP (if it returns an error, the
1059    contents of BUFP are undefined):
1060      `buffer' is the compiled pattern;
1061      `syntax' is set to SYNTAX;
1062      `used' is set to the length of the compiled pattern;
1063      `fastmap_accurate' is zero;
1064      `re_nsub' is the number of subexpressions in PATTERN;
1065      `not_bol' and `not_eol' are zero;
1066 
1067    The `fastmap' and `newline_anchor' fields are neither
1068    examined nor set.  */
1069 
1070 static reg_errcode_t
1071 regex_compile (pattern, size, syntax, bufp)
1072      const char *pattern;
1073      int size;
1074      reg_syntax_t syntax;
1075      struct re_pattern_buffer *bufp;
1076 {
1077   /* We fetch characters from PATTERN here.  Even though PATTERN is
1078      `char *' (i.e., signed), we declare these variables as unsigned, so
1079      they can be reliably used as array indices.  */
1080   register unsigned char c, c1;
1081 
1082   /* A random tempory spot in PATTERN.  */
1083   const char *p1;
1084 
1085   /* Points to the end of the buffer, where we should append.  */
1086   register unsigned char *b;
1087 
1088   /* Keeps track of unclosed groups.  */
1089   compile_stack_type compile_stack;
1090 
1091   /* Points to the current (ending) position in the pattern.  */
1092   const char *p = pattern;
1093   const char *pend = pattern + size;
1094 
1095   /* How to translate the characters in the pattern.  */
1096   char *translate = bufp->translate;
1097 
1098   /* Address of the count-byte of the most recently inserted `exactn'
1099      command.  This makes it possible to tell if a new exact-match
1100      character can be added to that command or if the character requires
1101      a new `exactn' command.  */
1102   unsigned char *pending_exact = 0;
1103 
1104   /* Address of start of the most recently finished expression.
1105      This tells, e.g., postfix * where to find the start of its
1106      operand.  Reset at the beginning of groups and alternatives.  */
1107   unsigned char *laststart = 0;
1108 
1109   /* Address of beginning of regexp, or inside of last group.  */
1110   unsigned char *begalt;
1111 
1112   /* Place in the uncompiled pattern (i.e., the {) to
1113      which to go back if the interval is invalid.  */
1114   const char *beg_interval;
1115 
1116   /* Address of the place where a forward jump should go to the end of
1117      the containing expression.  Each alternative of an `or' -- except the
1118      last -- ends with a forward jump of this sort.  */
1119   unsigned char *fixup_alt_jump = 0;
1120 
1121   /* Counts open-groups as they are encountered.  Remembered for the
1122      matching close-group on the compile stack, so the same register
1123      number is put in the stop_memory as the start_memory.  */
1124   regnum_t regnum = 0;
1125 
1126 #ifdef DEBUG
1127   DEBUG_PRINT1 ("\nCompiling pattern: ");
1128   if (debug)
1129     {
1130       unsigned debug_count;
1131 
1132       for (debug_count = 0; debug_count < size; debug_count++)
1133         printchar (pattern[debug_count]);
1134       putchar ('\n');
1135     }
1136 #endif /* DEBUG */
1137 
1138   /* Initialize the compile stack.  */
1139   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1140   if (compile_stack.stack == NULL)
1141     return REG_ESPACE;
1142 
1143   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1144   compile_stack.avail = 0;
1145 
1146   /* Initialize the pattern buffer.  */
1147   bufp->syntax = syntax;
1148   bufp->fastmap_accurate = 0;
1149   bufp->not_bol = bufp->not_eol = 0;
1150 
1151   /* Set `used' to zero, so that if we return an error, the pattern
1152      printer (for debugging) will think there's no pattern.  We reset it
1153      at the end.  */
1154   bufp->used = 0;
1155 
1156   /* Always count groups, whether or not bufp->no_sub is set.  */
1157   bufp->re_nsub = 0;
1158 
1159 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1160   /* Initialize the syntax table.  */
1161    init_syntax_once ();
1162 #endif
1163 
1164   if (bufp->allocated == 0)
1165     {
1166       if (bufp->buffer)
1167 	{ /* If zero allocated, but buffer is non-null, try to realloc
1168              enough space.  This loses if buffer's address is bogus, but
1169              that is the user's responsibility.  */
1170           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1171         }
1172       else
1173         { /* Caller did not allocate a buffer.  Do it for them.  */
1174           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1175         }
1176       if (!bufp->buffer) return REG_ESPACE;
1177 
1178       bufp->allocated = INIT_BUF_SIZE;
1179     }
1180 
1181   begalt = b = bufp->buffer;
1182 
1183   /* Loop through the uncompiled pattern until we're at the end.  */
1184   while (p != pend)
1185     {
1186       PATFETCH (c);
1187 
1188       switch (c)
1189         {
1190         case '^':
1191           {
1192             if (   /* If at start of pattern, it's an operator.  */
1193                    p == pattern + 1
1194                    /* If context independent, it's an operator.  */
1195                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1196                    /* Otherwise, depends on what's come before.  */
1197                 || at_begline_loc_p (pattern, p, syntax))
1198               BUF_PUSH (begline);
1199             else
1200               goto normal_char;
1201           }
1202           break;
1203 
1204 
1205         case '$':
1206           {
1207             if (   /* If at end of pattern, it's an operator.  */
1208                    p == pend
1209                    /* If context independent, it's an operator.  */
1210                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1211                    /* Otherwise, depends on what's next.  */
1212                 || at_endline_loc_p (p, pend, syntax))
1213                BUF_PUSH (endline);
1214              else
1215                goto normal_char;
1216            }
1217            break;
1218 
1219 
1220 	case '+':
1221         case '?':
1222           if ((syntax & RE_BK_PLUS_QM)
1223               || (syntax & RE_LIMITED_OPS))
1224             goto normal_char;
1225         handle_plus:
1226         case '*':
1227           /* If there is no previous pattern... */
1228           if (!laststart)
1229             {
1230               if (syntax & RE_CONTEXT_INVALID_OPS)
1231                 return REG_BADRPT;
1232               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1233                 goto normal_char;
1234             }
1235 
1236           {
1237             /* Are we optimizing this jump?  */
1238             boolean keep_string_p = false;
1239 
1240             /* 1 means zero (many) matches is allowed.  */
1241             char zero_times_ok = 0, many_times_ok = 0;
1242 
1243             /* If there is a sequence of repetition chars, collapse it
1244                down to just one (the right one).  We can't combine
1245                interval operators with these because of, e.g., `a{2}*',
1246                which should only match an even number of `a's.  */
1247 
1248             for (;;)
1249               {
1250                 zero_times_ok |= c != '+';
1251                 many_times_ok |= c != '?';
1252 
1253                 if (p == pend)
1254                   break;
1255 
1256                 PATFETCH (c);
1257 
1258                 if (c == '*'
1259                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1260                   ;
1261 
1262                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1263                   {
1264                     if (p == pend) return REG_EESCAPE;
1265 
1266                     PATFETCH (c1);
1267                     if (!(c1 == '+' || c1 == '?'))
1268                       {
1269                         PATUNFETCH;
1270                         PATUNFETCH;
1271                         break;
1272                       }
1273 
1274                     c = c1;
1275                   }
1276                 else
1277                   {
1278                     PATUNFETCH;
1279                     break;
1280                   }
1281 
1282                 /* If we get here, we found another repeat character.  */
1283                }
1284 
1285             /* Star, etc. applied to an empty pattern is equivalent
1286                to an empty pattern.  */
1287             if (!laststart)
1288               break;
1289 
1290             /* Now we know whether or not zero matches is allowed
1291                and also whether or not two or more matches is allowed.  */
1292             if (many_times_ok)
1293               { /* More than one repetition is allowed, so put in at the
1294                    end a backward relative jump from `b' to before the next
1295                    jump we're going to put in below (which jumps from
1296                    laststart to after this jump).
1297 
1298                    But if we are at the `*' in the exact sequence `.*\n',
1299                    insert an unconditional jump backwards to the .,
1300                    instead of the beginning of the loop.  This way we only
1301                    push a failure point once, instead of every time
1302                    through the loop.  */
1303                 assert (p - 1 > pattern);
1304 
1305                 /* Allocate the space for the jump.  */
1306                 GET_BUFFER_SPACE (3);
1307 
1308                 /* We know we are not at the first character of the pattern,
1309                    because laststart was nonzero.  And we've already
1310                    incremented `p', by the way, to be the character after
1311                    the `*'.  Do we have to do something analogous here
1312                    for null bytes, because of RE_DOT_NOT_NULL?  */
1313                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1314 		    && zero_times_ok
1315                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1316                     && !(syntax & RE_DOT_NEWLINE))
1317                   { /* We have .*\n.  */
1318                     STORE_JUMP (jump, b, laststart);
1319                     keep_string_p = true;
1320                   }
1321                 else
1322                   /* Anything else.  */
1323                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1324 
1325                 /* We've added more stuff to the buffer.  */
1326                 b += 3;
1327               }
1328 
1329             /* On failure, jump from laststart to b + 3, which will be the
1330                end of the buffer after this jump is inserted.  */
1331             GET_BUFFER_SPACE (3);
1332             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1333                                        : on_failure_jump,
1334                          laststart, b + 3);
1335             pending_exact = 0;
1336             b += 3;
1337 
1338             if (!zero_times_ok)
1339               {
1340                 /* At least one repetition is required, so insert a
1341                    `dummy_failure_jump' before the initial
1342                    `on_failure_jump' instruction of the loop. This
1343                    effects a skip over that instruction the first time
1344                    we hit that loop.  */
1345                 GET_BUFFER_SPACE (3);
1346                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1347                 b += 3;
1348               }
1349             }
1350 	  break;
1351 
1352 
1353 	case '.':
1354           laststart = b;
1355           BUF_PUSH (anychar);
1356           break;
1357 
1358 
1359         case '[':
1360           {
1361             boolean had_char_class = false;
1362 
1363             if (p == pend) return REG_EBRACK;
1364 
1365             /* Ensure that we have enough space to push a charset: the
1366                opcode, the length count, and the bitset; 34 bytes in all.  */
1367 	    GET_BUFFER_SPACE (34);
1368 
1369             laststart = b;
1370 
1371             /* We test `*p == '^' twice, instead of using an if
1372                statement, so we only need one BUF_PUSH.  */
1373             BUF_PUSH (*p == '^' ? charset_not : charset);
1374             if (*p == '^')
1375               p++;
1376 
1377             /* Remember the first position in the bracket expression.  */
1378             p1 = p;
1379 
1380             /* Push the number of bytes in the bitmap.  */
1381             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1382 
1383             /* Clear the whole map.  */
1384             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1385 
1386             /* charset_not matches newline according to a syntax bit.  */
1387             if ((re_opcode_t) b[-2] == charset_not
1388                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1389               SET_LIST_BIT ('\n');
1390 
1391             /* Read in characters and ranges, setting map bits.  */
1392             for (;;)
1393               {
1394                 if (p == pend) return REG_EBRACK;
1395 
1396                 PATFETCH (c);
1397 
1398                 /* \ might escape characters inside [...] and [^...].  */
1399                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1400                   {
1401                     if (p == pend) return REG_EESCAPE;
1402 
1403                     PATFETCH (c1);
1404                     SET_LIST_BIT (c1);
1405                     continue;
1406                   }
1407 
1408                 /* Could be the end of the bracket expression.  If it's
1409                    not (i.e., when the bracket expression is `[]' so
1410                    far), the ']' character bit gets set way below.  */
1411                 if (c == ']' && p != p1 + 1)
1412                   break;
1413 
1414                 /* Look ahead to see if it's a range when the last thing
1415                    was a character class.  */
1416                 if (had_char_class && c == '-' && *p != ']')
1417                   return REG_ERANGE;
1418 
1419                 /* Look ahead to see if it's a range when the last thing
1420                    was a character: if this is a hyphen not at the
1421                    beginning or the end of a list, then it's the range
1422                    operator.  */
1423                 if (c == '-'
1424                     && !(p - 2 >= pattern && p[-2] == '[')
1425                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1426                     && *p != ']')
1427                   {
1428                     reg_errcode_t ret
1429                       = compile_range (&p, pend, translate, syntax, b);
1430                     if (ret != REG_NOERROR) return ret;
1431                   }
1432 
1433                 else if (p[0] == '-' && p[1] != ']')
1434                   { /* This handles ranges made up of characters only.  */
1435                     reg_errcode_t ret;
1436 
1437 		    /* Move past the `-'.  */
1438                     PATFETCH (c1);
1439 
1440                     ret = compile_range (&p, pend, translate, syntax, b);
1441                     if (ret != REG_NOERROR) return ret;
1442                   }
1443 
1444                 /* See if we're at the beginning of a possible character
1445                    class.  */
1446 
1447                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1448                   { /* Leave room for the null.  */
1449                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1450 
1451                     PATFETCH (c);
1452                     c1 = 0;
1453 
1454                     /* If pattern is `[[:'.  */
1455                     if (p == pend) return REG_EBRACK;
1456 
1457                     for (;;)
1458                       {
1459                         PATFETCH (c);
1460                         if (c == ':' || c == ']' || p == pend
1461                             || c1 == CHAR_CLASS_MAX_LENGTH)
1462                           break;
1463                         str[c1++] = c;
1464                       }
1465                     str[c1] = '\0';
1466 
1467                     /* If isn't a word bracketed by `[:' and:`]':
1468                        undo the ending character, the letters, and leave
1469                        the leading `:' and `[' (but set bits for them).  */
1470                     if (c == ':' && *p == ']')
1471                       {
1472                         int ch;
1473                         boolean is_alnum = STREQ (str, "alnum");
1474                         boolean is_alpha = STREQ (str, "alpha");
1475                         boolean is_blank = STREQ (str, "blank");
1476                         boolean is_cntrl = STREQ (str, "cntrl");
1477                         boolean is_digit = STREQ (str, "digit");
1478                         boolean is_graph = STREQ (str, "graph");
1479                         boolean is_lower = STREQ (str, "lower");
1480                         boolean is_print = STREQ (str, "print");
1481                         boolean is_punct = STREQ (str, "punct");
1482                         boolean is_space = STREQ (str, "space");
1483                         boolean is_upper = STREQ (str, "upper");
1484                         boolean is_xdigit = STREQ (str, "xdigit");
1485 
1486                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1487 
1488                         /* Throw away the ] at the end of the character
1489                            class.  */
1490                         PATFETCH (c);
1491 
1492                         if (p == pend) return REG_EBRACK;
1493 
1494                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1495                           {
1496                             if (   (is_alnum  && ISALNUM (ch))
1497                                 || (is_alpha  && ISALPHA (ch))
1498                                 || (is_blank  && ISBLANK (ch))
1499                                 || (is_cntrl  && ISCNTRL (ch))
1500                                 || (is_digit  && ISDIGIT (ch))
1501                                 || (is_graph  && ISGRAPH (ch))
1502                                 || (is_lower  && ISLOWER (ch))
1503                                 || (is_print  && ISPRINT (ch))
1504                                 || (is_punct  && ISPUNCT (ch))
1505                                 || (is_space  && ISSPACE (ch))
1506                                 || (is_upper  && ISUPPER (ch))
1507                                 || (is_xdigit && ISXDIGIT (ch)))
1508                             SET_LIST_BIT (ch);
1509                           }
1510                         had_char_class = true;
1511                       }
1512                     else
1513                       {
1514                         c1++;
1515                         while (c1--)
1516                           PATUNFETCH;
1517                         SET_LIST_BIT ('[');
1518                         SET_LIST_BIT (':');
1519                         had_char_class = false;
1520                       }
1521                   }
1522                 else
1523                   {
1524                     had_char_class = false;
1525                     SET_LIST_BIT (c);
1526                   }
1527               }
1528 
1529             /* Discard any (non)matching list bytes that are all 0 at the
1530                end of the map.  Decrease the map-length byte too.  */
1531             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1532               b[-1]--;
1533             b += b[-1];
1534           }
1535           break;
1536 
1537 
1538 	case '(':
1539           if (syntax & RE_NO_BK_PARENS)
1540             goto handle_open;
1541           else
1542             goto normal_char;
1543 
1544 
1545         case ')':
1546           if (syntax & RE_NO_BK_PARENS)
1547             goto handle_close;
1548           else
1549             goto normal_char;
1550 
1551 
1552         case '\n':
1553           if (syntax & RE_NEWLINE_ALT)
1554             goto handle_alt;
1555           else
1556             goto normal_char;
1557 
1558 
1559 	case '|':
1560           if (syntax & RE_NO_BK_VBAR)
1561             goto handle_alt;
1562           else
1563             goto normal_char;
1564 
1565 
1566         case '{':
1567            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1568              goto handle_interval;
1569            else
1570              goto normal_char;
1571 
1572 
1573         case '\\':
1574           if (p == pend) return REG_EESCAPE;
1575 
1576           /* Do not translate the character after the \, so that we can
1577              distinguish, e.g., \B from \b, even if we normally would
1578              translate, e.g., B to b.  */
1579           PATFETCH_RAW (c);
1580 
1581           switch (c)
1582             {
1583             case '(':
1584               if (syntax & RE_NO_BK_PARENS)
1585                 goto normal_backslash;
1586 
1587             handle_open:
1588               bufp->re_nsub++;
1589               regnum++;
1590 
1591               if (COMPILE_STACK_FULL)
1592                 {
1593                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1594                             compile_stack_elt_t);
1595                   if (compile_stack.stack == NULL) return REG_ESPACE;
1596 
1597                   compile_stack.size <<= 1;
1598                 }
1599 
1600               /* These are the values to restore when we hit end of this
1601                  group.  They are all relative offsets, so that if the
1602                  whole pattern moves because of realloc, they will still
1603                  be valid.  */
1604               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1605               COMPILE_STACK_TOP.fixup_alt_jump
1606                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1607               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1608               COMPILE_STACK_TOP.regnum = regnum;
1609 
1610               /* We will eventually replace the 0 with the number of
1611                  groups inner to this one.  But do not push a
1612                  start_memory for groups beyond the last one we can
1613                  represent in the compiled pattern.  */
1614               if (regnum <= MAX_REGNUM)
1615                 {
1616                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1617                   BUF_PUSH_3 (start_memory, regnum, 0);
1618                 }
1619 
1620               compile_stack.avail++;
1621 
1622               fixup_alt_jump = 0;
1623               laststart = 0;
1624               begalt = b;
1625 	      /* If we've reached MAX_REGNUM groups, then this open
1626 		 won't actually generate any code, so we'll have to
1627 		 clear pending_exact explicitly.  */
1628 	      pending_exact = 0;
1629               break;
1630 
1631 
1632             case ')':
1633               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1634 
1635               if (COMPILE_STACK_EMPTY)
1636                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1637                   goto normal_backslash;
1638                 else
1639                   return REG_ERPAREN;
1640 
1641             handle_close:
1642               if (fixup_alt_jump)
1643                 { /* Push a dummy failure point at the end of the
1644                      alternative for a possible future
1645                      `pop_failure_jump' to pop.  See comments at
1646                      `push_dummy_failure' in `re_match_2'.  */
1647                   BUF_PUSH (push_dummy_failure);
1648 
1649                   /* We allocated space for this jump when we assigned
1650                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1651                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1652                 }
1653 
1654               /* See similar code for backslashed left paren above.  */
1655               if (COMPILE_STACK_EMPTY)
1656                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1657                   goto normal_char;
1658                 else
1659                   return REG_ERPAREN;
1660 
1661               /* Since we just checked for an empty stack above, this
1662                  ``can't happen''.  */
1663               assert (compile_stack.avail != 0);
1664               {
1665                 /* We don't just want to restore into `regnum', because
1666                    later groups should continue to be numbered higher,
1667                    as in `(ab)c(de)' -- the second group is #2.  */
1668                 regnum_t this_group_regnum;
1669 
1670                 compile_stack.avail--;
1671                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1672                 fixup_alt_jump
1673                   = COMPILE_STACK_TOP.fixup_alt_jump
1674                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1675                     : 0;
1676                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1677                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1678 		/* If we've reached MAX_REGNUM groups, then this open
1679 		   won't actually generate any code, so we'll have to
1680 		   clear pending_exact explicitly.  */
1681 		pending_exact = 0;
1682 
1683                 /* We're at the end of the group, so now we know how many
1684                    groups were inside this one.  */
1685                 if (this_group_regnum <= MAX_REGNUM)
1686                   {
1687                     unsigned char *inner_group_loc
1688                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1689 
1690                     *inner_group_loc = regnum - this_group_regnum;
1691                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1692                                 regnum - this_group_regnum);
1693                   }
1694               }
1695               break;
1696 
1697 
1698             case '|':					/* `\|'.  */
1699               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1700                 goto normal_backslash;
1701             handle_alt:
1702               if (syntax & RE_LIMITED_OPS)
1703                 goto normal_char;
1704 
1705               /* Insert before the previous alternative a jump which
1706                  jumps to this alternative if the former fails.  */
1707               GET_BUFFER_SPACE (3);
1708               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1709               pending_exact = 0;
1710               b += 3;
1711 
1712               /* The alternative before this one has a jump after it
1713                  which gets executed if it gets matched.  Adjust that
1714                  jump so it will jump to this alternative's analogous
1715                  jump (put in below, which in turn will jump to the next
1716                  (if any) alternative's such jump, etc.).  The last such
1717                  jump jumps to the correct final destination.  A picture:
1718                           _____ _____
1719                           |   | |   |
1720                           |   v |   v
1721                          a | b   | c
1722 
1723                  If we are at `b', then fixup_alt_jump right now points to a
1724                  three-byte space after `a'.  We'll put in the jump, set
1725                  fixup_alt_jump to right after `b', and leave behind three
1726                  bytes which we'll fill in when we get to after `c'.  */
1727 
1728               if (fixup_alt_jump)
1729                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1730 
1731               /* Mark and leave space for a jump after this alternative,
1732                  to be filled in later either by next alternative or
1733                  when know we're at the end of a series of alternatives.  */
1734               fixup_alt_jump = b;
1735               GET_BUFFER_SPACE (3);
1736               b += 3;
1737 
1738               laststart = 0;
1739               begalt = b;
1740               break;
1741 
1742 
1743             case '{':
1744               /* If \{ is a literal.  */
1745               if (!(syntax & RE_INTERVALS)
1746                      /* If we're at `\{' and it's not the open-interval
1747                         operator.  */
1748                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1749                   || (p - 2 == pattern  &&  p == pend))
1750                 goto normal_backslash;
1751 
1752             handle_interval:
1753               {
1754                 /* If got here, then the syntax allows intervals.  */
1755 
1756                 /* At least (most) this many matches must be made.  */
1757                 int lower_bound = -1, upper_bound = -1;
1758 
1759                 beg_interval = p - 1;
1760 
1761                 if (p == pend)
1762                   {
1763                     if (syntax & RE_NO_BK_BRACES)
1764                       goto unfetch_interval;
1765                     else
1766                       return REG_EBRACE;
1767                   }
1768 
1769                 GET_UNSIGNED_NUMBER (lower_bound);
1770 
1771                 if (c == ',')
1772                   {
1773                     GET_UNSIGNED_NUMBER (upper_bound);
1774                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1775                   }
1776                 else
1777                   /* Interval such as `{1}' => match exactly once. */
1778                   upper_bound = lower_bound;
1779 
1780                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1781                     || lower_bound > upper_bound)
1782                   {
1783                     if (syntax & RE_NO_BK_BRACES)
1784                       goto unfetch_interval;
1785                     else
1786                       return REG_BADBR;
1787                   }
1788 
1789                 if (!(syntax & RE_NO_BK_BRACES))
1790                   {
1791                     if (c != '\\') return REG_EBRACE;
1792 
1793                     PATFETCH (c);
1794                   }
1795 
1796                 if (c != '}')
1797                   {
1798                     if (syntax & RE_NO_BK_BRACES)
1799                       goto unfetch_interval;
1800                     else
1801                       return REG_BADBR;
1802                   }
1803 
1804                 /* We just parsed a valid interval.  */
1805 
1806                 /* If it's invalid to have no preceding re.  */
1807                 if (!laststart)
1808                   {
1809                     if (syntax & RE_CONTEXT_INVALID_OPS)
1810                       return REG_BADRPT;
1811                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1812                       laststart = b;
1813                     else
1814                       goto unfetch_interval;
1815                   }
1816 
1817                 /* If the upper bound is zero, don't want to succeed at
1818                    all; jump from `laststart' to `b + 3', which will be
1819                    the end of the buffer after we insert the jump.  */
1820                  if (upper_bound == 0)
1821                    {
1822                      GET_BUFFER_SPACE (3);
1823                      INSERT_JUMP (jump, laststart, b + 3);
1824                      b += 3;
1825                    }
1826 
1827                  /* Otherwise, we have a nontrivial interval.  When
1828                     we're all done, the pattern will look like:
1829                       set_number_at <jump count> <upper bound>
1830                       set_number_at <succeed_n count> <lower bound>
1831                       succeed_n <after jump addr> <succed_n count>
1832                       <body of loop>
1833                       jump_n <succeed_n addr> <jump count>
1834                     (The upper bound and `jump_n' are omitted if
1835                     `upper_bound' is 1, though.)  */
1836                  else
1837                    { /* If the upper bound is > 1, we need to insert
1838                         more at the end of the loop.  */
1839                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1840 
1841                      GET_BUFFER_SPACE (nbytes);
1842 
1843                      /* Initialize lower bound of the `succeed_n', even
1844                         though it will be set during matching by its
1845                         attendant `set_number_at' (inserted next),
1846                         because `re_compile_fastmap' needs to know.
1847                         Jump to the `jump_n' we might insert below.  */
1848                      INSERT_JUMP2 (succeed_n, laststart,
1849                                    b + 5 + (upper_bound > 1) * 5,
1850                                    lower_bound);
1851                      b += 5;
1852 
1853                      /* Code to initialize the lower bound.  Insert
1854                         before the `succeed_n'.  The `5' is the last two
1855                         bytes of this `set_number_at', plus 3 bytes of
1856                         the following `succeed_n'.  */
1857                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1858                      b += 5;
1859 
1860                      if (upper_bound > 1)
1861                        { /* More than one repetition is allowed, so
1862                             append a backward jump to the `succeed_n'
1863                             that starts this interval.
1864 
1865                             When we've reached this during matching,
1866                             we'll have matched the interval once, so
1867                             jump back only `upper_bound - 1' times.  */
1868                          STORE_JUMP2 (jump_n, b, laststart + 5,
1869                                       upper_bound - 1);
1870                          b += 5;
1871 
1872                          /* The location we want to set is the second
1873                             parameter of the `jump_n'; that is `b-2' as
1874                             an absolute address.  `laststart' will be
1875                             the `set_number_at' we're about to insert;
1876                             `laststart+3' the number to set, the source
1877                             for the relative address.  But we are
1878                             inserting into the middle of the pattern --
1879                             so everything is getting moved up by 5.
1880                             Conclusion: (b - 2) - (laststart + 3) + 5,
1881                             i.e., b - laststart.
1882 
1883                             We insert this at the beginning of the loop
1884                             so that if we fail during matching, we'll
1885                             reinitialize the bounds.  */
1886                          insert_op2 (set_number_at, laststart, b - laststart,
1887                                      upper_bound - 1, b);
1888                          b += 5;
1889                        }
1890                    }
1891                 pending_exact = 0;
1892                 beg_interval = NULL;
1893               }
1894               break;
1895 
1896             unfetch_interval:
1897               /* If an invalid interval, match the characters as literals.  */
1898                assert (beg_interval);
1899                p = beg_interval;
1900                beg_interval = NULL;
1901 
1902                /* normal_char and normal_backslash need `c'.  */
1903                PATFETCH (c);
1904 
1905                if (!(syntax & RE_NO_BK_BRACES))
1906                  {
1907                    if (p > pattern  &&  p[-1] == '\\')
1908                      goto normal_backslash;
1909                  }
1910                goto normal_char;
1911 
1912 #ifdef emacs
1913             /* There is no way to specify the before_dot and after_dot
1914                operators.  rms says this is ok.  --karl  */
1915             case '=':
1916               BUF_PUSH (at_dot);
1917               break;
1918 
1919             case 's':
1920               laststart = b;
1921               PATFETCH (c);
1922               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1923               break;
1924 
1925             case 'S':
1926               laststart = b;
1927               PATFETCH (c);
1928               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1929               break;
1930 #endif /* emacs */
1931 
1932 
1933             case 'w':
1934               laststart = b;
1935               BUF_PUSH (wordchar);
1936               break;
1937 
1938 
1939             case 'W':
1940               laststart = b;
1941               BUF_PUSH (notwordchar);
1942               break;
1943 
1944 
1945             case '<':
1946               BUF_PUSH (wordbeg);
1947               break;
1948 
1949             case '>':
1950               BUF_PUSH (wordend);
1951               break;
1952 
1953             case 'b':
1954               BUF_PUSH (wordbound);
1955               break;
1956 
1957             case 'B':
1958               BUF_PUSH (notwordbound);
1959               break;
1960 
1961             case '`':
1962               BUF_PUSH (begbuf);
1963               break;
1964 
1965             case '\'':
1966               BUF_PUSH (endbuf);
1967               break;
1968 
1969             case '1': case '2': case '3': case '4': case '5':
1970             case '6': case '7': case '8': case '9':
1971               if (syntax & RE_NO_BK_REFS)
1972                 goto normal_char;
1973 
1974               c1 = c - '0';
1975 
1976               if (c1 > regnum)
1977                 return REG_ESUBREG;
1978 
1979               /* Can't back reference to a subexpression if inside of it.  */
1980               if (group_in_compile_stack (compile_stack, c1))
1981                 goto normal_char;
1982 
1983               laststart = b;
1984               BUF_PUSH_2 (duplicate, c1);
1985               break;
1986 
1987 
1988             case '+':
1989             case '?':
1990               if (syntax & RE_BK_PLUS_QM)
1991                 goto handle_plus;
1992               else
1993                 goto normal_backslash;
1994 
1995             default:
1996             normal_backslash:
1997               /* You might think it would be useful for \ to mean
1998                  not to translate; but if we don't translate it
1999                  it will never match anything.  */
2000               c = TRANSLATE (c);
2001               goto normal_char;
2002             }
2003           break;
2004 
2005 
2006 	default:
2007         /* Expects the character in `c'.  */
2008 	normal_char:
2009 	      /* If no exactn currently being built.  */
2010           if (!pending_exact
2011 
2012               /* If last exactn not at current position.  */
2013               || pending_exact + *pending_exact + 1 != b
2014 
2015               /* We have only one byte following the exactn for the count.  */
2016 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2017 
2018               /* If followed by a repetition operator.  */
2019               || *p == '*' || *p == '^'
2020 	      || ((syntax & RE_BK_PLUS_QM)
2021 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2022 		  : (*p == '+' || *p == '?'))
2023 	      || ((syntax & RE_INTERVALS)
2024                   && ((syntax & RE_NO_BK_BRACES)
2025 		      ? *p == '{'
2026                       : (p[0] == '\\' && p[1] == '{'))))
2027 	    {
2028 	      /* Start building a new exactn.  */
2029 
2030               laststart = b;
2031 
2032 	      BUF_PUSH_2 (exactn, 0);
2033 	      pending_exact = b - 1;
2034             }
2035 
2036 	  BUF_PUSH (c);
2037           (*pending_exact)++;
2038 	  break;
2039         } /* switch (c) */
2040     } /* while p != pend */
2041 
2042 
2043   /* Through the pattern now.  */
2044 
2045   if (fixup_alt_jump)
2046     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2047 
2048   if (!COMPILE_STACK_EMPTY)
2049     return REG_EPAREN;
2050 
2051   free (compile_stack.stack);
2052 
2053   /* We have succeeded; set the length of the buffer.  */
2054   bufp->used = b - bufp->buffer;
2055 
2056 #ifdef DEBUG
2057   if (debug)
2058     {
2059       DEBUG_PRINT1 ("\nCompiled pattern: ");
2060       print_compiled_pattern (bufp);
2061     }
2062 #endif /* DEBUG */
2063 
2064   return REG_NOERROR;
2065 } /* regex_compile */
2066 
2067 /* Subroutines for `regex_compile'.  */
2068 
2069 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2070 
2071 static void
2072 store_op1 (op, loc, arg)
2073     re_opcode_t op;
2074     unsigned char *loc;
2075     int arg;
2076 {
2077   *loc = (unsigned char) op;
2078   STORE_NUMBER (loc + 1, arg);
2079 }
2080 
2081 
2082 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2083 
2084 static void
2085 store_op2 (op, loc, arg1, arg2)
2086     re_opcode_t op;
2087     unsigned char *loc;
2088     int arg1, arg2;
2089 {
2090   *loc = (unsigned char) op;
2091   STORE_NUMBER (loc + 1, arg1);
2092   STORE_NUMBER (loc + 3, arg2);
2093 }
2094 
2095 
2096 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2097    for OP followed by two-byte integer parameter ARG.  */
2098 
2099 static void
2100 insert_op1 (op, loc, arg, end)
2101     re_opcode_t op;
2102     unsigned char *loc;
2103     int arg;
2104     unsigned char *end;
2105 {
2106   register unsigned char *pfrom = end;
2107   register unsigned char *pto = end + 3;
2108 
2109   while (pfrom != loc)
2110     *--pto = *--pfrom;
2111 
2112   store_op1 (op, loc, arg);
2113 }
2114 
2115 
2116 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2117 
2118 static void
2119 insert_op2 (op, loc, arg1, arg2, end)
2120     re_opcode_t op;
2121     unsigned char *loc;
2122     int arg1, arg2;
2123     unsigned char *end;
2124 {
2125   register unsigned char *pfrom = end;
2126   register unsigned char *pto = end + 5;
2127 
2128   while (pfrom != loc)
2129     *--pto = *--pfrom;
2130 
2131   store_op2 (op, loc, arg1, arg2);
2132 }
2133 
2134 
2135 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2136    after an alternative or a begin-subexpression.  We assume there is at
2137    least one character before the ^.  */
2138 
2139 static boolean
2140 at_begline_loc_p (pattern, p, syntax)
2141     const char *pattern, *p;
2142     reg_syntax_t syntax;
2143 {
2144   const char *prev = p - 2;
2145   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2146 
2147   return
2148        /* After a subexpression?  */
2149        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2150        /* After an alternative?  */
2151     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2152 }
2153 
2154 
2155 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2156    at least one character after the $, i.e., `P < PEND'.  */
2157 
2158 static boolean
2159 at_endline_loc_p (p, pend, syntax)
2160     const char *p, *pend;
2161     int syntax;
2162 {
2163   const char *next = p;
2164   boolean next_backslash = *next == '\\';
2165   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2166 
2167   return
2168        /* Before a subexpression?  */
2169        (syntax & RE_NO_BK_PARENS ? *next == ')'
2170         : next_backslash && next_next && *next_next == ')')
2171        /* Before an alternative?  */
2172     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2173         : next_backslash && next_next && *next_next == '|');
2174 }
2175 
2176 
2177 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2178    false if it's not.  */
2179 
2180 static boolean
2181 group_in_compile_stack (compile_stack, regnum)
2182     compile_stack_type compile_stack;
2183     regnum_t regnum;
2184 {
2185   int this_element;
2186 
2187   for (this_element = compile_stack.avail - 1;
2188        this_element >= 0;
2189        this_element--)
2190     if (compile_stack.stack[this_element].regnum == regnum)
2191       return true;
2192 
2193   return false;
2194 }
2195 
2196 
2197 /* Read the ending character of a range (in a bracket expression) from the
2198    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2199    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2200    Then we set the translation of all bits between the starting and
2201    ending characters (inclusive) in the compiled pattern B.
2202 
2203    Return an error code.
2204 
2205    We use these short variable names so we can use the same macros as
2206    `regex_compile' itself.  */
2207 
2208 static reg_errcode_t
2209 compile_range (p_ptr, pend, translate, syntax, b)
2210     const char **p_ptr, *pend;
2211     char *translate;
2212     reg_syntax_t syntax;
2213     unsigned char *b;
2214 {
2215   unsigned this_char;
2216 
2217   const char *p = *p_ptr;
2218   int range_start, range_end;
2219 
2220   if (p == pend)
2221     return REG_ERANGE;
2222 
2223   /* Even though the pattern is a signed `char *', we need to fetch
2224      with unsigned char *'s; if the high bit of the pattern character
2225      is set, the range endpoints will be negative if we fetch using a
2226      signed char *.
2227 
2228      We also want to fetch the endpoints without translating them; the
2229      appropriate translation is done in the bit-setting loop below.  */
2230   range_start = ((unsigned char *) p)[-2];
2231   range_end   = ((unsigned char *) p)[0];
2232 
2233   /* Have to increment the pointer into the pattern string, so the
2234      caller isn't still at the ending character.  */
2235   (*p_ptr)++;
2236 
2237   /* If the start is after the end, the range is empty.  */
2238   if (range_start > range_end)
2239     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2240 
2241   /* Here we see why `this_char' has to be larger than an `unsigned
2242      char' -- the range is inclusive, so if `range_end' == 0xff
2243      (assuming 8-bit characters), we would otherwise go into an infinite
2244      loop, since all characters <= 0xff.  */
2245   for (this_char = range_start; this_char <= range_end; this_char++)
2246     {
2247       SET_LIST_BIT (TRANSLATE (this_char));
2248     }
2249 
2250   return REG_NOERROR;
2251 }
2252 
2253 /* Failure stack declarations and macros; both re_compile_fastmap and
2254    re_match_2 use a failure stack.  These have to be macros because of
2255    REGEX_ALLOCATE.  */
2256 
2257 
2258 /* Number of failure points for which to initially allocate space
2259    when matching.  If this number is exceeded, we allocate more
2260    space, so it is not a hard limit.  */
2261 #ifndef INIT_FAILURE_ALLOC
2262 #define INIT_FAILURE_ALLOC 5
2263 #endif
2264 
2265 /* Roughly the maximum number of failure points on the stack.  Would be
2266    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2267    This is a variable only so users of regex can assign to it; we never
2268    change it ourselves.  */
2269 int re_max_failures = 2000;
2270 
2271 typedef const unsigned char *fail_stack_elt_t;
2272 
2273 typedef struct
2274 {
2275   fail_stack_elt_t *stack;
2276   unsigned size;
2277   unsigned avail;			/* Offset of next open position.  */
2278 } fail_stack_type;
2279 
2280 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2281 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2282 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2283 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2284 
2285 
2286 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2287 
2288 #define INIT_FAIL_STACK()						\
2289   do {									\
2290     fail_stack.stack = (fail_stack_elt_t *)				\
2291       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2292 									\
2293     if (fail_stack.stack == NULL)					\
2294       return -2;							\
2295 									\
2296     fail_stack.size = INIT_FAILURE_ALLOC;				\
2297     fail_stack.avail = 0;						\
2298   } while (0)
2299 
2300 
2301 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2302 
2303    Return 1 if succeeds, and 0 if either ran out of memory
2304    allocating space for it or it was already too large.
2305 
2306    REGEX_REALLOCATE requires `destination' be declared.   */
2307 
2308 #define DOUBLE_FAIL_STACK(fail_stack)					\
2309   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2310    ? 0									\
2311    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2312         REGEX_REALLOCATE ((fail_stack).stack, 				\
2313           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2314           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2315 									\
2316       (fail_stack).stack == NULL					\
2317       ? 0								\
2318       : ((fail_stack).size <<= 1, 					\
2319          1)))
2320 
2321 
2322 /* Push PATTERN_OP on FAIL_STACK.
2323 
2324    Return 1 if was able to do so and 0 if ran out of memory allocating
2325    space to do so.  */
2326 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2327   ((FAIL_STACK_FULL ()							\
2328     && !DOUBLE_FAIL_STACK (fail_stack))					\
2329     ? 0									\
2330     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2331        1))
2332 
2333 /* This pushes an item onto the failure stack.  Must be a four-byte
2334    value.  Assumes the variable `fail_stack'.  Probably should only
2335    be called from within `PUSH_FAILURE_POINT'.  */
2336 #define PUSH_FAILURE_ITEM(item)						\
2337   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2338 
2339 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2340 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2341 
2342 /* Used to omit pushing failure point id's when we're not debugging.  */
2343 #ifdef DEBUG
2344 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2345 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2346 #else
2347 #define DEBUG_PUSH(item)
2348 #define DEBUG_POP(item_addr)
2349 #endif
2350 
2351 
2352 /* Push the information about the state we will need
2353    if we ever fail back to it.
2354 
2355    Requires variables fail_stack, regstart, regend, reg_info, and
2356    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2357    declared.
2358 
2359    Does `return FAILURE_CODE' if runs out of memory.  */
2360 
2361 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2362   do {									\
2363     char *destination;							\
2364     /* Must be int, so when we don't save any registers, the arithmetic	\
2365        of 0 + -1 isn't done as unsigned.  */				\
2366     int this_reg;							\
2367     									\
2368     DEBUG_STATEMENT (failure_id++);					\
2369     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2370     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2371     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2372     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2373 									\
2374     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2375     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2376 									\
2377     /* Ensure we have enough space allocated for what we will push.  */	\
2378     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2379       {									\
2380         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2381           return failure_code;						\
2382 									\
2383         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2384 		       (fail_stack).size);				\
2385         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2386       }									\
2387 									\
2388     /* Push the info, starting with the registers.  */			\
2389     DEBUG_PRINT1 ("\n");						\
2390 									\
2391     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2392          this_reg++)							\
2393       {									\
2394 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2395         DEBUG_STATEMENT (num_regs_pushed++);				\
2396 									\
2397 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2398         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2399                                                                         \
2400 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2401         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2402 									\
2403 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2404         DEBUG_PRINT2 (" match_null=%d",					\
2405                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2406         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2407         DEBUG_PRINT2 (" matched_something=%d",				\
2408                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2409         DEBUG_PRINT2 (" ever_matched=%d",				\
2410                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2411 	DEBUG_PRINT1 ("\n");						\
2412         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2413       }									\
2414 									\
2415     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2416     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2417 									\
2418     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2419     PUSH_FAILURE_ITEM (highest_active_reg);				\
2420 									\
2421     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2422     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2423     PUSH_FAILURE_ITEM (pattern_place);					\
2424 									\
2425     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2426     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2427 				 size2);				\
2428     DEBUG_PRINT1 ("'\n");						\
2429     PUSH_FAILURE_ITEM (string_place);					\
2430 									\
2431     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2432     DEBUG_PUSH (failure_id);						\
2433   } while (0)
2434 
2435 /* This is the number of items that are pushed and popped on the stack
2436    for each register.  */
2437 #define NUM_REG_ITEMS  3
2438 
2439 /* Individual items aside from the registers.  */
2440 #ifdef DEBUG
2441 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2442 #else
2443 #define NUM_NONREG_ITEMS 4
2444 #endif
2445 
2446 /* We push at most this many items on the stack.  */
2447 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2448 
2449 /* We actually push this many items.  */
2450 #define NUM_FAILURE_ITEMS						\
2451   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2452     + NUM_NONREG_ITEMS)
2453 
2454 /* How many items can still be added to the stack without overflowing it.  */
2455 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2456 
2457 
2458 /* Pops what PUSH_FAIL_STACK pushes.
2459 
2460    We restore into the parameters, all of which should be lvalues:
2461      STR -- the saved data position.
2462      PAT -- the saved pattern position.
2463      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2464      REGSTART, REGEND -- arrays of string positions.
2465      REG_INFO -- array of information about each subexpression.
2466 
2467    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2468    `pend', `string1', `size1', `string2', and `size2'.  */
2469 
2470 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2471 {									\
2472   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2473   int this_reg;								\
2474   const unsigned char *string_temp;					\
2475 									\
2476   assert (!FAIL_STACK_EMPTY ());					\
2477 									\
2478   /* Remove failure points and point to how many regs pushed.  */	\
2479   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2480   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2481   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2482 									\
2483   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2484 									\
2485   DEBUG_POP (&failure_id);						\
2486   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2487 									\
2488   /* If the saved string location is NULL, it came from an		\
2489      on_failure_keep_string_jump opcode, and we want to throw away the	\
2490      saved NULL, thus retaining our current position in the string.  */	\
2491   string_temp = POP_FAILURE_ITEM ();					\
2492   if (string_temp != NULL)						\
2493     str = (const char *) string_temp;					\
2494 									\
2495   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2496   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2497   DEBUG_PRINT1 ("'\n");							\
2498 									\
2499   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2500   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2501   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2502 									\
2503   /* Restore register info.  */						\
2504   high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2505   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2506 									\
2507   low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2508   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2509 									\
2510   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2511     {									\
2512       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2513 									\
2514       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2515       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2516 									\
2517       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2518       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2519 									\
2520       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2521       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2522     }									\
2523 									\
2524   DEBUG_STATEMENT (nfailure_points_popped++);				\
2525 } /* POP_FAILURE_POINT */
2526 
2527 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2528    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2529    characters can start a string that matches the pattern.  This fastmap
2530    is used by re_search to skip quickly over impossible starting points.
2531 
2532    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2533    area as BUFP->fastmap.
2534 
2535    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2536    the pattern buffer.
2537 
2538    Returns 0 if we succeed, -2 if an internal error.   */
2539 
2540 int
2541 re_compile_fastmap (bufp)
2542      struct re_pattern_buffer *bufp;
2543 {
2544   int j, k;
2545   fail_stack_type fail_stack;
2546 #ifndef REGEX_MALLOC
2547   char *destination;
2548 #endif
2549   /* We don't push any register information onto the failure stack.  */
2550   unsigned num_regs = 0;
2551 
2552   register char *fastmap = bufp->fastmap;
2553   unsigned char *pattern = bufp->buffer;
2554   unsigned long size = bufp->used;
2555   const unsigned char *p = pattern;
2556   register unsigned char *pend = pattern + size;
2557 
2558   /* Assume that each path through the pattern can be null until
2559      proven otherwise.  We set this false at the bottom of switch
2560      statement, to which we get only if a particular path doesn't
2561      match the empty string.  */
2562   boolean path_can_be_null = true;
2563 
2564   /* We aren't doing a `succeed_n' to begin with.  */
2565   boolean succeed_n_p = false;
2566 
2567   assert (fastmap != NULL && p != NULL);
2568 
2569   INIT_FAIL_STACK ();
2570   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2571   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2572   bufp->can_be_null = 0;
2573 
2574   while (p != pend || !FAIL_STACK_EMPTY ())
2575     {
2576       if (p == pend)
2577         {
2578           bufp->can_be_null |= path_can_be_null;
2579 
2580           /* Reset for next path.  */
2581           path_can_be_null = true;
2582 
2583           p = fail_stack.stack[--fail_stack.avail];
2584 	}
2585 
2586       /* We should never be about to go beyond the end of the pattern.  */
2587       assert (p < pend);
2588 
2589 #ifdef SWITCH_ENUM_BUG
2590       switch ((int) ((re_opcode_t) *p++))
2591 #else
2592       switch ((re_opcode_t) *p++)
2593 #endif
2594 	{
2595 
2596         /* I guess the idea here is to simply not bother with a fastmap
2597            if a backreference is used, since it's too hard to figure out
2598            the fastmap for the corresponding group.  Setting
2599            `can_be_null' stops `re_search_2' from using the fastmap, so
2600            that is all we do.  */
2601 	case duplicate:
2602 	  bufp->can_be_null = 1;
2603           return 0;
2604 
2605 
2606       /* Following are the cases which match a character.  These end
2607          with `break'.  */
2608 
2609 	case exactn:
2610           fastmap[p[1]] = 1;
2611 	  break;
2612 
2613 
2614         case charset:
2615           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2616 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2617               fastmap[j] = 1;
2618 	  break;
2619 
2620 
2621 	case charset_not:
2622 	  /* Chars beyond end of map must be allowed.  */
2623 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2624             fastmap[j] = 1;
2625 
2626 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2627 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2628               fastmap[j] = 1;
2629           break;
2630 
2631 
2632 	case wordchar:
2633 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2634 	    if (SYNTAX (j) == Sword)
2635 	      fastmap[j] = 1;
2636 	  break;
2637 
2638 
2639 	case notwordchar:
2640 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2641 	    if (SYNTAX (j) != Sword)
2642 	      fastmap[j] = 1;
2643 	  break;
2644 
2645 
2646         case anychar:
2647           /* `.' matches anything ...  */
2648 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2649             fastmap[j] = 1;
2650 
2651           /* ... except perhaps newline.  */
2652           if (!(bufp->syntax & RE_DOT_NEWLINE))
2653             fastmap['\n'] = 0;
2654 
2655           /* Return if we have already set `can_be_null'; if we have,
2656              then the fastmap is irrelevant.  Something's wrong here.  */
2657 	  else if (bufp->can_be_null)
2658 	    return 0;
2659 
2660           /* Otherwise, have to check alternative paths.  */
2661 	  break;
2662 
2663 
2664 #ifdef emacs
2665         case syntaxspec:
2666 	  k = *p++;
2667 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2668 	    if (SYNTAX (j) == (enum syntaxcode) k)
2669 	      fastmap[j] = 1;
2670 	  break;
2671 
2672 
2673 	case notsyntaxspec:
2674 	  k = *p++;
2675 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2676 	    if (SYNTAX (j) != (enum syntaxcode) k)
2677 	      fastmap[j] = 1;
2678 	  break;
2679 
2680 
2681       /* All cases after this match the empty string.  These end with
2682          `continue'.  */
2683 
2684 
2685 	case before_dot:
2686 	case at_dot:
2687 	case after_dot:
2688           continue;
2689 #endif /* not emacs */
2690 
2691 
2692         case no_op:
2693         case begline:
2694         case endline:
2695 	case begbuf:
2696 	case endbuf:
2697 	case wordbound:
2698 	case notwordbound:
2699 	case wordbeg:
2700 	case wordend:
2701         case push_dummy_failure:
2702           continue;
2703 
2704 
2705 	case jump_n:
2706         case pop_failure_jump:
2707 	case maybe_pop_jump:
2708 	case jump:
2709         case jump_past_alt:
2710 	case dummy_failure_jump:
2711           EXTRACT_NUMBER_AND_INCR (j, p);
2712 	  p += j;
2713 	  if (j > 0)
2714 	    continue;
2715 
2716           /* Jump backward implies we just went through the body of a
2717              loop and matched nothing.  Opcode jumped to should be
2718              `on_failure_jump' or `succeed_n'.  Just treat it like an
2719              ordinary jump.  For a * loop, it has pushed its failure
2720              point already; if so, discard that as redundant.  */
2721           if ((re_opcode_t) *p != on_failure_jump
2722 	      && (re_opcode_t) *p != succeed_n)
2723 	    continue;
2724 
2725           p++;
2726           EXTRACT_NUMBER_AND_INCR (j, p);
2727           p += j;
2728 
2729           /* If what's on the stack is where we are now, pop it.  */
2730           if (!FAIL_STACK_EMPTY ()
2731 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2732             fail_stack.avail--;
2733 
2734           continue;
2735 
2736 
2737         case on_failure_jump:
2738         case on_failure_keep_string_jump:
2739 	handle_on_failure_jump:
2740           EXTRACT_NUMBER_AND_INCR (j, p);
2741 
2742           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2743              end of the pattern.  We don't want to push such a point,
2744              since when we restore it above, entering the switch will
2745              increment `p' past the end of the pattern.  We don't need
2746              to push such a point since we obviously won't find any more
2747              fastmap entries beyond `pend'.  Such a pattern can match
2748              the null string, though.  */
2749           if (p + j < pend)
2750             {
2751               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2752                 return -2;
2753             }
2754           else
2755             bufp->can_be_null = 1;
2756 
2757           if (succeed_n_p)
2758             {
2759               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2760               succeed_n_p = false;
2761 	    }
2762 
2763           continue;
2764 
2765 
2766 	case succeed_n:
2767           /* Get to the number of times to succeed.  */
2768           p += 2;
2769 
2770           /* Increment p past the n for when k != 0.  */
2771           EXTRACT_NUMBER_AND_INCR (k, p);
2772           if (k == 0)
2773 	    {
2774               p -= 4;
2775   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2776               goto handle_on_failure_jump;
2777             }
2778           continue;
2779 
2780 
2781 	case set_number_at:
2782           p += 4;
2783           continue;
2784 
2785 
2786 	case start_memory:
2787         case stop_memory:
2788 	  p += 2;
2789 	  continue;
2790 
2791 
2792 	default:
2793           abort (); /* We have listed all the cases.  */
2794         } /* switch *p++ */
2795 
2796       /* Getting here means we have found the possible starting
2797          characters for one path of the pattern -- and that the empty
2798          string does not match.  We need not follow this path further.
2799          Instead, look at the next alternative (remembered on the
2800          stack), or quit if no more.  The test at the top of the loop
2801          does these things.  */
2802       path_can_be_null = false;
2803       p = pend;
2804     } /* while p */
2805 
2806   /* Set `can_be_null' for the last path (also the first path, if the
2807      pattern is empty).  */
2808   bufp->can_be_null |= path_can_be_null;
2809   return 0;
2810 } /* re_compile_fastmap */
2811 
2812 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2813    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2814    this memory for recording register information.  STARTS and ENDS
2815    must be allocated using the malloc library routine, and must each
2816    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2817 
2818    If NUM_REGS == 0, then subsequent matches should allocate their own
2819    register data.
2820 
2821    Unless this function is called, the first search or match using
2822    PATTERN_BUFFER will allocate its own register data, without
2823    freeing the old data.  */
2824 
2825 void
2826 re_set_registers (bufp, regs, num_regs, starts, ends)
2827     struct re_pattern_buffer *bufp;
2828     struct re_registers *regs;
2829     unsigned num_regs;
2830     regoff_t *starts, *ends;
2831 {
2832   if (num_regs)
2833     {
2834       bufp->regs_allocated = REGS_REALLOCATE;
2835       regs->num_regs = num_regs;
2836       regs->start = starts;
2837       regs->end = ends;
2838     }
2839   else
2840     {
2841       bufp->regs_allocated = REGS_UNALLOCATED;
2842       regs->num_regs = 0;
2843       regs->start = regs->end = (regoff_t *) 0;
2844     }
2845 }
2846 
2847 /* Searching routines.  */
2848 
2849 /* Like re_search_2, below, but only one string is specified, and
2850    doesn't let you say where to stop matching. */
2851 
2852 int
2853 re_search (bufp, string, size, startpos, range, regs)
2854      struct re_pattern_buffer *bufp;
2855      const char *string;
2856      int size, startpos, range;
2857      struct re_registers *regs;
2858 {
2859   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2860 		      regs, size);
2861 }
2862 
2863 
2864 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2865    virtual concatenation of STRING1 and STRING2, starting first at index
2866    STARTPOS, then at STARTPOS + 1, and so on.
2867 
2868    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2869 
2870    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2871    only at STARTPOS; in general, the last start tried is STARTPOS +
2872    RANGE.
2873 
2874    In REGS, return the indices of the virtual concatenation of STRING1
2875    and STRING2 that matched the entire BUFP->buffer and its contained
2876    subexpressions.
2877 
2878    Do not consider matching one past the index STOP in the virtual
2879    concatenation of STRING1 and STRING2.
2880 
2881    We return either the position in the strings at which the match was
2882    found, -1 if no match, or -2 if error (such as failure
2883    stack overflow).  */
2884 
2885 int
2886 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2887      struct re_pattern_buffer *bufp;
2888      const char *string1, *string2;
2889      int size1, size2;
2890      int startpos;
2891      int range;
2892      struct re_registers *regs;
2893      int stop;
2894 {
2895   int val;
2896   register char *fastmap = bufp->fastmap;
2897   register char *translate = bufp->translate;
2898   int total_size = size1 + size2;
2899   int endpos = startpos + range;
2900 
2901   /* Check for out-of-range STARTPOS.  */
2902   if (startpos < 0 || startpos > total_size)
2903     return -1;
2904 
2905   /* Fix up RANGE if it might eventually take us outside
2906      the virtual concatenation of STRING1 and STRING2.  */
2907   if (endpos < -1)
2908     range = -1 - startpos;
2909   else if (endpos > total_size)
2910     range = total_size - startpos;
2911 
2912   /* If the search isn't to be a backwards one, don't waste time in a
2913      search for a pattern that must be anchored.  */
2914   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2915     {
2916       if (startpos > 0)
2917 	return -1;
2918       else
2919 	range = 1;
2920     }
2921 
2922   /* Update the fastmap now if not correct already.  */
2923   if (fastmap && !bufp->fastmap_accurate)
2924     if (re_compile_fastmap (bufp) == -2)
2925       return -2;
2926 
2927   /* Loop through the string, looking for a place to start matching.  */
2928   for (;;)
2929     {
2930       /* If a fastmap is supplied, skip quickly over characters that
2931          cannot be the start of a match.  If the pattern can match the
2932          null string, however, we don't need to skip characters; we want
2933          the first null string.  */
2934       if (fastmap && startpos < total_size && !bufp->can_be_null)
2935 	{
2936 	  if (range > 0)	/* Searching forwards.  */
2937 	    {
2938 	      register const char *d;
2939 	      register int lim = 0;
2940 	      int irange = range;
2941 
2942               if (startpos < size1 && startpos + range >= size1)
2943                 lim = range - (size1 - startpos);
2944 
2945 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2946 
2947               /* Written out as an if-else to avoid testing `translate'
2948                  inside the loop.  */
2949 	      if (translate)
2950                 while (range > lim
2951                        && !fastmap[(unsigned char)
2952 				   translate[(unsigned char) *d++]])
2953                   range--;
2954 	      else
2955                 while (range > lim && !fastmap[(unsigned char) *d++])
2956                   range--;
2957 
2958 	      startpos += irange - range;
2959 	    }
2960 	  else				/* Searching backwards.  */
2961 	    {
2962 	      register char c = (size1 == 0 || startpos >= size1
2963                                  ? string2[startpos - size1]
2964                                  : string1[startpos]);
2965 
2966 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2967 		goto advance;
2968 	    }
2969 	}
2970 
2971       /* If can't match the null string, and that's all we have left, fail.  */
2972       if (range >= 0 && startpos == total_size && fastmap
2973           && !bufp->can_be_null)
2974 	return -1;
2975 
2976       val = re_match_2 (bufp, string1, size1, string2, size2,
2977 	                startpos, regs, stop);
2978       if (val >= 0)
2979 	return startpos;
2980 
2981       if (val == -2)
2982 	return -2;
2983 
2984     advance:
2985       if (!range)
2986         break;
2987       else if (range > 0)
2988         {
2989           range--;
2990           startpos++;
2991         }
2992       else
2993         {
2994           range++;
2995           startpos--;
2996         }
2997     }
2998   return -1;
2999 } /* re_search_2 */
3000 
3001 /* Declarations and macros for re_match_2.  */
3002 
3003 static int bcmp_translate ();
3004 static boolean alt_match_null_string_p (),
3005                common_op_match_null_string_p (),
3006                group_match_null_string_p ();
3007 
3008 /* Structure for per-register (a.k.a. per-group) information.
3009    This must not be longer than one word, because we push this value
3010    onto the failure stack.  Other register information, such as the
3011    starting and ending positions (which are addresses), and the list of
3012    inner groups (which is a bits list) are maintained in separate
3013    variables.
3014 
3015    We are making a (strictly speaking) nonportable assumption here: that
3016    the compiler will pack our bit fields into something that fits into
3017    the type of `word', i.e., is something that fits into one item on the
3018    failure stack.  */
3019 typedef union
3020 {
3021   fail_stack_elt_t word;
3022   struct
3023   {
3024       /* This field is one if this group can match the empty string,
3025          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3026 #define MATCH_NULL_UNSET_VALUE 3
3027     unsigned match_null_string_p : 2;
3028     unsigned is_active : 1;
3029     unsigned matched_something : 1;
3030     unsigned ever_matched_something : 1;
3031   } bits;
3032 } register_info_type;
3033 
3034 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3035 #define IS_ACTIVE(R)  ((R).bits.is_active)
3036 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3037 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3038 
3039 
3040 /* Call this when have matched a real character; it sets `matched' flags
3041    for the subexpressions which we are currently inside.  Also records
3042    that those subexprs have matched.  */
3043 #define SET_REGS_MATCHED()						\
3044   do									\
3045     {									\
3046       unsigned r;							\
3047       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3048         {								\
3049           MATCHED_SOMETHING (reg_info[r])				\
3050             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3051             = 1;							\
3052         }								\
3053     }									\
3054   while (0)
3055 
3056 
3057 /* This converts PTR, a pointer into one of the search strings `string1'
3058    and `string2' into an offset from the beginning of that string.  */
3059 #define POINTER_TO_OFFSET(ptr)						\
3060   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3061 
3062 /* Registers are set to a sentinel when they haven't yet matched.  */
3063 #define REG_UNSET_VALUE ((char *) -1)
3064 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3065 
3066 
3067 /* Macros for dealing with the split strings in re_match_2.  */
3068 
3069 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3070 
3071 /* Call before fetching a character with *d.  This switches over to
3072    string2 if necessary.  */
3073 #define PREFETCH()							\
3074   while (d == dend)						    	\
3075     {									\
3076       /* End of string2 => fail.  */					\
3077       if (dend == end_match_2) 						\
3078         goto fail;							\
3079       /* End of string1 => advance to string2.  */ 			\
3080       d = string2;						        \
3081       dend = end_match_2;						\
3082     }
3083 
3084 
3085 /* Test if at very beginning or at very end of the virtual concatenation
3086    of `string1' and `string2'.  If only one string, it's `string2'.  */
3087 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3088 #define AT_STRINGS_END(d) ((d) == end2)
3089 
3090 
3091 /* Test if D points to a character which is word-constituent.  We have
3092    two special cases to check for: if past the end of string1, look at
3093    the first character in string2; and if before the beginning of
3094    string2, look at the last character in string1.  */
3095 #define WORDCHAR_P(d)							\
3096   (SYNTAX ((d) == end1 ? *string2					\
3097            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3098    == Sword)
3099 
3100 /* Test if the character before D and the one at D differ with respect
3101    to being word-constituent.  */
3102 #define AT_WORD_BOUNDARY(d)						\
3103   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3104    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3105 
3106 
3107 /* Free everything we malloc.  */
3108 #ifdef REGEX_MALLOC
3109 #define FREE_VAR(var) if (var) free (var); var = NULL
3110 #define FREE_VARIABLES()						\
3111   do {									\
3112     FREE_VAR (fail_stack.stack);					\
3113     FREE_VAR (regstart);						\
3114     FREE_VAR (regend);							\
3115     FREE_VAR (old_regstart);						\
3116     FREE_VAR (old_regend);						\
3117     FREE_VAR (best_regstart);						\
3118     FREE_VAR (best_regend);						\
3119     FREE_VAR (reg_info);						\
3120     FREE_VAR (reg_dummy);						\
3121     FREE_VAR (reg_info_dummy);						\
3122   } while (0)
3123 #else /* not REGEX_MALLOC */
3124 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3125 #define FREE_VARIABLES() alloca (0)
3126 #endif /* not REGEX_MALLOC */
3127 
3128 
3129 /* These values must meet several constraints.  They must not be valid
3130    register values; since we have a limit of 255 registers (because
3131    we use only one byte in the pattern for the register number), we can
3132    use numbers larger than 255.  They must differ by 1, because of
3133    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3134    be larger than the value for the highest register, so we do not try
3135    to actually save any registers when none are active.  */
3136 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3137 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3138 
3139 /* Matching routines.  */
3140 
3141 #ifndef emacs   /* Emacs never uses this.  */
3142 /* re_match is like re_match_2 except it takes only a single string.  */
3143 
3144 int
3145 re_match (bufp, string, size, pos, regs)
3146      struct re_pattern_buffer *bufp;
3147      const char *string;
3148      int size, pos;
3149      struct re_registers *regs;
3150  {
3151   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3152 }
3153 #endif /* not emacs */
3154 
3155 
3156 /* re_match_2 matches the compiled pattern in BUFP against the
3157    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3158    and SIZE2, respectively).  We start matching at POS, and stop
3159    matching at STOP.
3160 
3161    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3162    store offsets for the substring each group matched in REGS.  See the
3163    documentation for exactly how many groups we fill.
3164 
3165    We return -1 if no match, -2 if an internal error (such as the
3166    failure stack overflowing).  Otherwise, we return the length of the
3167    matched substring.  */
3168 
3169 int
3170 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3171      struct re_pattern_buffer *bufp;
3172      const char *string1, *string2;
3173      int size1, size2;
3174      int pos;
3175      struct re_registers *regs;
3176      int stop;
3177 {
3178   /* General temporaries.  */
3179   int mcnt;
3180   unsigned char *p1;
3181 
3182   /* Just past the end of the corresponding string.  */
3183   const char *end1, *end2;
3184 
3185   /* Pointers into string1 and string2, just past the last characters in
3186      each to consider matching.  */
3187   const char *end_match_1, *end_match_2;
3188 
3189   /* Where we are in the data, and the end of the current string.  */
3190   const char *d, *dend;
3191 
3192   /* Where we are in the pattern, and the end of the pattern.  */
3193   unsigned char *p = bufp->buffer;
3194   register unsigned char *pend = p + bufp->used;
3195 
3196   /* We use this to map every character in the string.  */
3197   char *translate = bufp->translate;
3198 
3199   /* Failure point stack.  Each place that can handle a failure further
3200      down the line pushes a failure point on this stack.  It consists of
3201      restart, regend, and reg_info for all registers corresponding to
3202      the subexpressions we're currently inside, plus the number of such
3203      registers, and, finally, two char *'s.  The first char * is where
3204      to resume scanning the pattern; the second one is where to resume
3205      scanning the strings.  If the latter is zero, the failure point is
3206      a ``dummy''; if a failure happens and the failure point is a dummy,
3207      it gets discarded and the next next one is tried.  */
3208   fail_stack_type fail_stack;
3209 #ifdef DEBUG
3210   static unsigned failure_id = 0;
3211   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3212 #endif
3213 
3214   /* We fill all the registers internally, independent of what we
3215      return, for use in backreferences.  The number here includes
3216      an element for register zero.  */
3217   unsigned num_regs = bufp->re_nsub + 1;
3218 
3219   /* The currently active registers.  */
3220   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3221   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3222 
3223   /* Information on the contents of registers. These are pointers into
3224      the input strings; they record just what was matched (on this
3225      attempt) by a subexpression part of the pattern, that is, the
3226      regnum-th regstart pointer points to where in the pattern we began
3227      matching and the regnum-th regend points to right after where we
3228      stopped matching the regnum-th subexpression.  (The zeroth register
3229      keeps track of what the whole pattern matches.)  */
3230   const char **regstart, **regend;
3231 
3232   /* If a group that's operated upon by a repetition operator fails to
3233      match anything, then the register for its start will need to be
3234      restored because it will have been set to wherever in the string we
3235      are when we last see its open-group operator.  Similarly for a
3236      register's end.  */
3237   const char **old_regstart, **old_regend;
3238 
3239   /* The is_active field of reg_info helps us keep track of which (possibly
3240      nested) subexpressions we are currently in. The matched_something
3241      field of reg_info[reg_num] helps us tell whether or not we have
3242      matched any of the pattern so far this time through the reg_num-th
3243      subexpression.  These two fields get reset each time through any
3244      loop their register is in.  */
3245   register_info_type *reg_info;
3246 
3247   /* The following record the register info as found in the above
3248      variables when we find a match better than any we've seen before.
3249      This happens as we backtrack through the failure points, which in
3250      turn happens only if we have not yet matched the entire string. */
3251   unsigned best_regs_set = false;
3252   const char **best_regstart, **best_regend;
3253 
3254   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3255      allocate space for that if we're not allocating space for anything
3256      else (see below).  Also, we never need info about register 0 for
3257      any of the other register vectors, and it seems rather a kludge to
3258      treat `best_regend' differently than the rest.  So we keep track of
3259      the end of the best match so far in a separate variable.  We
3260      initialize this to NULL so that when we backtrack the first time
3261      and need to test it, it's not garbage.  */
3262   const char *match_end = NULL;
3263 
3264   /* Used when we pop values we don't care about.  */
3265   const char **reg_dummy;
3266   register_info_type *reg_info_dummy;
3267 
3268 #ifdef DEBUG
3269   /* Counts the total number of registers pushed.  */
3270   unsigned num_regs_pushed = 0;
3271 #endif
3272 
3273   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3274 
3275   INIT_FAIL_STACK ();
3276 
3277   /* Do not bother to initialize all the register variables if there are
3278      no groups in the pattern, as it takes a fair amount of time.  If
3279      there are groups, we include space for register 0 (the whole
3280      pattern), even though we never use it, since it simplifies the
3281      array indexing.  We should fix this.  */
3282   if (bufp->re_nsub)
3283     {
3284       regstart = REGEX_TALLOC (num_regs, const char *);
3285       regend = REGEX_TALLOC (num_regs, const char *);
3286       old_regstart = REGEX_TALLOC (num_regs, const char *);
3287       old_regend = REGEX_TALLOC (num_regs, const char *);
3288       best_regstart = REGEX_TALLOC (num_regs, const char *);
3289       best_regend = REGEX_TALLOC (num_regs, const char *);
3290       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3291       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3292       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3293 
3294       if (!(regstart && regend && old_regstart && old_regend && reg_info
3295             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3296         {
3297           FREE_VARIABLES ();
3298           return -2;
3299         }
3300     }
3301 #ifdef REGEX_MALLOC
3302   else
3303     {
3304       /* We must initialize all our variables to NULL, so that
3305          `FREE_VARIABLES' doesn't try to free them.  */
3306       regstart = regend = old_regstart = old_regend = best_regstart
3307         = best_regend = reg_dummy = NULL;
3308       reg_info = reg_info_dummy = (register_info_type *) NULL;
3309     }
3310 #endif /* REGEX_MALLOC */
3311 
3312   /* The starting position is bogus.  */
3313   if (pos < 0 || pos > size1 + size2)
3314     {
3315       FREE_VARIABLES ();
3316       return -1;
3317     }
3318 
3319   /* Initialize subexpression text positions to -1 to mark ones that no
3320      start_memory/stop_memory has been seen for. Also initialize the
3321      register information struct.  */
3322   for (mcnt = 1; mcnt < num_regs; mcnt++)
3323     {
3324       regstart[mcnt] = regend[mcnt]
3325         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3326 
3327       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3328       IS_ACTIVE (reg_info[mcnt]) = 0;
3329       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3330       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3331     }
3332 
3333   /* We move `string1' into `string2' if the latter's empty -- but not if
3334      `string1' is null.  */
3335   if (size2 == 0 && string1 != NULL)
3336     {
3337       string2 = string1;
3338       size2 = size1;
3339       string1 = 0;
3340       size1 = 0;
3341     }
3342   end1 = string1 + size1;
3343   end2 = string2 + size2;
3344 
3345   /* Compute where to stop matching, within the two strings.  */
3346   if (stop <= size1)
3347     {
3348       end_match_1 = string1 + stop;
3349       end_match_2 = string2;
3350     }
3351   else
3352     {
3353       end_match_1 = end1;
3354       end_match_2 = string2 + stop - size1;
3355     }
3356 
3357   /* `p' scans through the pattern as `d' scans through the data.
3358      `dend' is the end of the input string that `d' points within.  `d'
3359      is advanced into the following input string whenever necessary, but
3360      this happens before fetching; therefore, at the beginning of the
3361      loop, `d' can be pointing at the end of a string, but it cannot
3362      equal `string2'.  */
3363   if (size1 > 0 && pos <= size1)
3364     {
3365       d = string1 + pos;
3366       dend = end_match_1;
3367     }
3368   else
3369     {
3370       d = string2 + pos - size1;
3371       dend = end_match_2;
3372     }
3373 
3374   DEBUG_PRINT1 ("The compiled pattern is: ");
3375   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3376   DEBUG_PRINT1 ("The string to match is: `");
3377   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3378   DEBUG_PRINT1 ("'\n");
3379 
3380   /* This loops over pattern commands.  It exits by returning from the
3381      function if the match is complete, or it drops through if the match
3382      fails at this starting point in the input data.  */
3383   for (;;)
3384     {
3385       DEBUG_PRINT2 ("\n0x%x: ", p);
3386 
3387       if (p == pend)
3388 	{ /* End of pattern means we might have succeeded.  */
3389           DEBUG_PRINT1 ("end of pattern ... ");
3390 
3391 	  /* If we haven't matched the entire string, and we want the
3392              longest match, try backtracking.  */
3393           if (d != end_match_2)
3394 	    {
3395               DEBUG_PRINT1 ("backtracking.\n");
3396 
3397               if (!FAIL_STACK_EMPTY ())
3398                 { /* More failure points to try.  */
3399                   boolean same_str_p = (FIRST_STRING_P (match_end)
3400 	        	                == MATCHING_IN_FIRST_STRING);
3401 
3402                   /* If exceeds best match so far, save it.  */
3403                   if (!best_regs_set
3404                       || (same_str_p && d > match_end)
3405                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3406                     {
3407                       best_regs_set = true;
3408                       match_end = d;
3409 
3410                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3411 
3412                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3413                         {
3414                           best_regstart[mcnt] = regstart[mcnt];
3415                           best_regend[mcnt] = regend[mcnt];
3416                         }
3417                     }
3418                   goto fail;
3419                 }
3420 
3421               /* If no failure points, don't restore garbage.  */
3422               else if (best_regs_set)
3423                 {
3424   	        restore_best_regs:
3425                   /* Restore best match.  It may happen that `dend ==
3426                      end_match_1' while the restored d is in string2.
3427                      For example, the pattern `x.*y.*z' against the
3428                      strings `x-' and `y-z-', if the two strings are
3429                      not consecutive in memory.  */
3430                   DEBUG_PRINT1 ("Restoring best registers.\n");
3431 
3432                   d = match_end;
3433                   dend = ((d >= string1 && d <= end1)
3434 		           ? end_match_1 : end_match_2);
3435 
3436 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3437 		    {
3438 		      regstart[mcnt] = best_regstart[mcnt];
3439 		      regend[mcnt] = best_regend[mcnt];
3440 		    }
3441                 }
3442             } /* d != end_match_2 */
3443 
3444           DEBUG_PRINT1 ("Accepting match.\n");
3445 
3446           /* If caller wants register contents data back, do it.  */
3447           if (regs && !bufp->no_sub)
3448 	    {
3449               /* Have the register data arrays been allocated?  */
3450               if (bufp->regs_allocated == REGS_UNALLOCATED)
3451                 { /* No.  So allocate them with malloc.  We need one
3452                      extra element beyond `num_regs' for the `-1' marker
3453                      GNU code uses.  */
3454                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3455                   regs->start = TALLOC (regs->num_regs, regoff_t);
3456                   regs->end = TALLOC (regs->num_regs, regoff_t);
3457                   if (regs->start == NULL || regs->end == NULL)
3458                     return -2;
3459                   bufp->regs_allocated = REGS_REALLOCATE;
3460                 }
3461               else if (bufp->regs_allocated == REGS_REALLOCATE)
3462                 { /* Yes.  If we need more elements than were already
3463                      allocated, reallocate them.  If we need fewer, just
3464                      leave it alone.  */
3465                   if (regs->num_regs < num_regs + 1)
3466                     {
3467                       regs->num_regs = num_regs + 1;
3468                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3469                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3470                       if (regs->start == NULL || regs->end == NULL)
3471                         return -2;
3472                     }
3473                 }
3474               else
3475                 assert (bufp->regs_allocated == REGS_FIXED);
3476 
3477               /* Convert the pointer data in `regstart' and `regend' to
3478                  indices.  Register zero has to be set differently,
3479                  since we haven't kept track of any info for it.  */
3480               if (regs->num_regs > 0)
3481                 {
3482                   regs->start[0] = pos;
3483                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3484 			          : d - string2 + size1);
3485                 }
3486 
3487               /* Go through the first `min (num_regs, regs->num_regs)'
3488                  registers, since that is all we initialized.  */
3489 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3490 		{
3491                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3492                     regs->start[mcnt] = regs->end[mcnt] = -1;
3493                   else
3494                     {
3495 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3496                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3497                     }
3498 		}
3499 
3500               /* If the regs structure we return has more elements than
3501                  were in the pattern, set the extra elements to -1.  If
3502                  we (re)allocated the registers, this is the case,
3503                  because we always allocate enough to have at least one
3504                  -1 at the end.  */
3505               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3506                 regs->start[mcnt] = regs->end[mcnt] = -1;
3507 	    } /* regs && !bufp->no_sub */
3508 
3509           FREE_VARIABLES ();
3510           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3511                         nfailure_points_pushed, nfailure_points_popped,
3512                         nfailure_points_pushed - nfailure_points_popped);
3513           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3514 
3515           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3516 			    ? string1
3517 			    : string2 - size1);
3518 
3519           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3520 
3521           return mcnt;
3522         }
3523 
3524       /* Otherwise match next pattern command.  */
3525 #ifdef SWITCH_ENUM_BUG
3526       switch ((int) ((re_opcode_t) *p++))
3527 #else
3528       switch ((re_opcode_t) *p++)
3529 #endif
3530 	{
3531         /* Ignore these.  Used to ignore the n of succeed_n's which
3532            currently have n == 0.  */
3533         case no_op:
3534           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3535           break;
3536 
3537 
3538         /* Match the next n pattern characters exactly.  The following
3539            byte in the pattern defines n, and the n bytes after that
3540            are the characters to match.  */
3541 	case exactn:
3542 	  mcnt = *p++;
3543           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3544 
3545           /* This is written out as an if-else so we don't waste time
3546              testing `translate' inside the loop.  */
3547           if (translate)
3548 	    {
3549 	      do
3550 		{
3551 		  PREFETCH ();
3552 		  if (translate[(unsigned char) *d++] != (char) *p++)
3553                     goto fail;
3554 		}
3555 	      while (--mcnt);
3556 	    }
3557 	  else
3558 	    {
3559 	      do
3560 		{
3561 		  PREFETCH ();
3562 		  if (*d++ != (char) *p++) goto fail;
3563 		}
3564 	      while (--mcnt);
3565 	    }
3566 	  SET_REGS_MATCHED ();
3567           break;
3568 
3569 
3570         /* Match any character except possibly a newline or a null.  */
3571 	case anychar:
3572           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3573 
3574           PREFETCH ();
3575 
3576           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3577               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3578 	    goto fail;
3579 
3580           SET_REGS_MATCHED ();
3581           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3582           d++;
3583 	  break;
3584 
3585 
3586 	case charset:
3587 	case charset_not:
3588 	  {
3589 	    register unsigned char c;
3590 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3591 
3592             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3593 
3594 	    PREFETCH ();
3595 	    c = TRANSLATE (*d); /* The character to match.  */
3596 
3597             /* Cast to `unsigned' instead of `unsigned char' in case the
3598                bit list is a full 32 bytes long.  */
3599 	    if (c < (unsigned) (*p * BYTEWIDTH)
3600 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3601 	      not = !not;
3602 
3603 	    p += 1 + *p;
3604 
3605 	    if (!not) goto fail;
3606 
3607 	    SET_REGS_MATCHED ();
3608             d++;
3609 	    break;
3610 	  }
3611 
3612 
3613         /* The beginning of a group is represented by start_memory.
3614            The arguments are the register number in the next byte, and the
3615            number of groups inner to this one in the next.  The text
3616            matched within the group is recorded (in the internal
3617            registers data structure) under the register number.  */
3618         case start_memory:
3619 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3620 
3621           /* Find out if this group can match the empty string.  */
3622 	  p1 = p;		/* To send to group_match_null_string_p.  */
3623 
3624           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3625             REG_MATCH_NULL_STRING_P (reg_info[*p])
3626               = group_match_null_string_p (&p1, pend, reg_info);
3627 
3628           /* Save the position in the string where we were the last time
3629              we were at this open-group operator in case the group is
3630              operated upon by a repetition operator, e.g., with `(a*)*b'
3631              against `ab'; then we want to ignore where we are now in
3632              the string in case this attempt to match fails.  */
3633           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3634                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3635                              : regstart[*p];
3636 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3637 			 POINTER_TO_OFFSET (old_regstart[*p]));
3638 
3639           regstart[*p] = d;
3640 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3641 
3642           IS_ACTIVE (reg_info[*p]) = 1;
3643           MATCHED_SOMETHING (reg_info[*p]) = 0;
3644 
3645           /* This is the new highest active register.  */
3646           highest_active_reg = *p;
3647 
3648           /* If nothing was active before, this is the new lowest active
3649              register.  */
3650           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3651             lowest_active_reg = *p;
3652 
3653           /* Move past the register number and inner group count.  */
3654           p += 2;
3655           break;
3656 
3657 
3658         /* The stop_memory opcode represents the end of a group.  Its
3659            arguments are the same as start_memory's: the register
3660            number, and the number of inner groups.  */
3661 	case stop_memory:
3662 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3663 
3664           /* We need to save the string position the last time we were at
3665              this close-group operator in case the group is operated
3666              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3667              against `aba'; then we want to ignore where we are now in
3668              the string in case this attempt to match fails.  */
3669           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3670                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3671 			   : regend[*p];
3672 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3673 			 POINTER_TO_OFFSET (old_regend[*p]));
3674 
3675           regend[*p] = d;
3676 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3677 
3678           /* This register isn't active anymore.  */
3679           IS_ACTIVE (reg_info[*p]) = 0;
3680 
3681           /* If this was the only register active, nothing is active
3682              anymore.  */
3683           if (lowest_active_reg == highest_active_reg)
3684             {
3685               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3686               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3687             }
3688           else
3689             { /* We must scan for the new highest active register, since
3690                  it isn't necessarily one less than now: consider
3691                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3692                  new highest active register is 1.  */
3693               unsigned char r = *p - 1;
3694               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3695                 r--;
3696 
3697               /* If we end up at register zero, that means that we saved
3698                  the registers as the result of an `on_failure_jump', not
3699                  a `start_memory', and we jumped to past the innermost
3700                  `stop_memory'.  For example, in ((.)*) we save
3701                  registers 1 and 2 as a result of the *, but when we pop
3702                  back to the second ), we are at the stop_memory 1.
3703                  Thus, nothing is active.  */
3704 	      if (r == 0)
3705                 {
3706                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3707                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3708                 }
3709               else
3710                 highest_active_reg = r;
3711             }
3712 
3713           /* If just failed to match something this time around with a
3714              group that's operated on by a repetition operator, try to
3715              force exit from the ``loop'', and restore the register
3716              information for this group that we had before trying this
3717              last match.  */
3718           if ((!MATCHED_SOMETHING (reg_info[*p])
3719                || (re_opcode_t) p[-3] == start_memory)
3720 	      && (p + 2) < pend)
3721             {
3722               boolean is_a_jump_n = false;
3723 
3724               p1 = p + 2;
3725               mcnt = 0;
3726               switch ((re_opcode_t) *p1++)
3727                 {
3728                   case jump_n:
3729 		    is_a_jump_n = true;
3730                   case pop_failure_jump:
3731 		  case maybe_pop_jump:
3732 		  case jump:
3733 		  case dummy_failure_jump:
3734                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3735 		    if (is_a_jump_n)
3736 		      p1 += 2;
3737                     break;
3738 
3739                   default:
3740                     /* do nothing */ ;
3741                 }
3742 	      p1 += mcnt;
3743 
3744               /* If the next operation is a jump backwards in the pattern
3745 	         to an on_failure_jump right before the start_memory
3746                  corresponding to this stop_memory, exit from the loop
3747                  by forcing a failure after pushing on the stack the
3748                  on_failure_jump's jump in the pattern, and d.  */
3749               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3750                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3751 		{
3752                   /* If this group ever matched anything, then restore
3753                      what its registers were before trying this last
3754                      failed match, e.g., with `(a*)*b' against `ab' for
3755                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3756                      against `aba' for regend[3].
3757 
3758                      Also restore the registers for inner groups for,
3759                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3760                      otherwise get trashed).  */
3761 
3762                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3763 		    {
3764 		      unsigned r;
3765 
3766                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3767 
3768 		      /* Restore this and inner groups' (if any) registers.  */
3769                       for (r = *p; r < *p + *(p + 1); r++)
3770                         {
3771                           regstart[r] = old_regstart[r];
3772 
3773                           /* xx why this test?  */
3774                           if ((int) old_regend[r] >= (int) regstart[r])
3775                             regend[r] = old_regend[r];
3776                         }
3777                     }
3778 		  p1++;
3779                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3780                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3781 
3782                   goto fail;
3783                 }
3784             }
3785 
3786           /* Move past the register number and the inner group count.  */
3787           p += 2;
3788           break;
3789 
3790 
3791 	/* \<digit> has been turned into a `duplicate' command which is
3792            followed by the numeric value of <digit> as the register number.  */
3793         case duplicate:
3794 	  {
3795 	    register const char *d2, *dend2;
3796 	    int regno = *p++;   /* Get which register to match against.  */
3797 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3798 
3799 	    /* Can't back reference a group which we've never matched.  */
3800             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3801               goto fail;
3802 
3803             /* Where in input to try to start matching.  */
3804             d2 = regstart[regno];
3805 
3806             /* Where to stop matching; if both the place to start and
3807                the place to stop matching are in the same string, then
3808                set to the place to stop, otherwise, for now have to use
3809                the end of the first string.  */
3810 
3811             dend2 = ((FIRST_STRING_P (regstart[regno])
3812 		      == FIRST_STRING_P (regend[regno]))
3813 		     ? regend[regno] : end_match_1);
3814 	    for (;;)
3815 	      {
3816 		/* If necessary, advance to next segment in register
3817                    contents.  */
3818 		while (d2 == dend2)
3819 		  {
3820 		    if (dend2 == end_match_2) break;
3821 		    if (dend2 == regend[regno]) break;
3822 
3823                     /* End of string1 => advance to string2. */
3824                     d2 = string2;
3825                     dend2 = regend[regno];
3826 		  }
3827 		/* At end of register contents => success */
3828 		if (d2 == dend2) break;
3829 
3830 		/* If necessary, advance to next segment in data.  */
3831 		PREFETCH ();
3832 
3833 		/* How many characters left in this segment to match.  */
3834 		mcnt = dend - d;
3835 
3836 		/* Want how many consecutive characters we can match in
3837                    one shot, so, if necessary, adjust the count.  */
3838                 if (mcnt > dend2 - d2)
3839 		  mcnt = dend2 - d2;
3840 
3841 		/* Compare that many; failure if mismatch, else move
3842                    past them.  */
3843 		if (translate
3844                     ? bcmp_translate (d, d2, mcnt, translate)
3845                     : bcmp (d, d2, mcnt))
3846 		  goto fail;
3847 		d += mcnt, d2 += mcnt;
3848 	      }
3849 	  }
3850 	  break;
3851 
3852 
3853         /* begline matches the empty string at the beginning of the string
3854            (unless `not_bol' is set in `bufp'), and, if
3855            `newline_anchor' is set, after newlines.  */
3856 	case begline:
3857           DEBUG_PRINT1 ("EXECUTING begline.\n");
3858 
3859           if (AT_STRINGS_BEG (d))
3860             {
3861               if (!bufp->not_bol) break;
3862             }
3863           else if (d[-1] == '\n' && bufp->newline_anchor)
3864             {
3865               break;
3866             }
3867           /* In all other cases, we fail.  */
3868           goto fail;
3869 
3870 
3871         /* endline is the dual of begline.  */
3872 	case endline:
3873           DEBUG_PRINT1 ("EXECUTING endline.\n");
3874 
3875           if (AT_STRINGS_END (d))
3876             {
3877               if (!bufp->not_eol) break;
3878             }
3879 
3880           /* We have to ``prefetch'' the next character.  */
3881           else if ((d == end1 ? *string2 : *d) == '\n'
3882                    && bufp->newline_anchor)
3883             {
3884               break;
3885             }
3886           goto fail;
3887 
3888 
3889 	/* Match at the very beginning of the data.  */
3890         case begbuf:
3891           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3892           if (AT_STRINGS_BEG (d))
3893             break;
3894           goto fail;
3895 
3896 
3897 	/* Match at the very end of the data.  */
3898         case endbuf:
3899           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3900 	  if (AT_STRINGS_END (d))
3901 	    break;
3902           goto fail;
3903 
3904 
3905         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3906            pushes NULL as the value for the string on the stack.  Then
3907            `pop_failure_point' will keep the current value for the
3908            string, instead of restoring it.  To see why, consider
3909            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3910            then the . fails against the \n.  But the next thing we want
3911            to do is match the \n against the \n; if we restored the
3912            string value, we would be back at the foo.
3913 
3914            Because this is used only in specific cases, we don't need to
3915            check all the things that `on_failure_jump' does, to make
3916            sure the right things get saved on the stack.  Hence we don't
3917            share its code.  The only reason to push anything on the
3918            stack at all is that otherwise we would have to change
3919            `anychar's code to do something besides goto fail in this
3920            case; that seems worse than this.  */
3921         case on_failure_keep_string_jump:
3922           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3923 
3924           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3925           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3926 
3927           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3928           break;
3929 
3930 
3931 	/* Uses of on_failure_jump:
3932 
3933            Each alternative starts with an on_failure_jump that points
3934            to the beginning of the next alternative.  Each alternative
3935            except the last ends with a jump that in effect jumps past
3936            the rest of the alternatives.  (They really jump to the
3937            ending jump of the following alternative, because tensioning
3938            these jumps is a hassle.)
3939 
3940            Repeats start with an on_failure_jump that points past both
3941            the repetition text and either the following jump or
3942            pop_failure_jump back to this on_failure_jump.  */
3943 	case on_failure_jump:
3944         on_failure:
3945           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3946 
3947           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3948           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3949 
3950           /* If this on_failure_jump comes right before a group (i.e.,
3951              the original * applied to a group), save the information
3952              for that group and all inner ones, so that if we fail back
3953              to this point, the group's information will be correct.
3954              For example, in \(a*\)*\1, we need the preceding group,
3955              and in \(\(a*\)b*\)\2, we need the inner group.  */
3956 
3957           /* We can't use `p' to check ahead because we push
3958              a failure point to `p + mcnt' after we do this.  */
3959           p1 = p;
3960 
3961           /* We need to skip no_op's before we look for the
3962              start_memory in case this on_failure_jump is happening as
3963              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3964              against aba.  */
3965           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3966             p1++;
3967 
3968           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3969             {
3970               /* We have a new highest active register now.  This will
3971                  get reset at the start_memory we are about to get to,
3972                  but we will have saved all the registers relevant to
3973                  this repetition op, as described above.  */
3974               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3975               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3976                 lowest_active_reg = *(p1 + 1);
3977             }
3978 
3979           DEBUG_PRINT1 (":\n");
3980           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3981           break;
3982 
3983 
3984         /* A smart repeat ends with `maybe_pop_jump'.
3985 	   We change it to either `pop_failure_jump' or `jump'.  */
3986         case maybe_pop_jump:
3987           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3988           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3989           {
3990 	    register unsigned char *p2 = p;
3991 
3992             /* Compare the beginning of the repeat with what in the
3993                pattern follows its end. If we can establish that there
3994                is nothing that they would both match, i.e., that we
3995                would have to backtrack because of (as in, e.g., `a*a')
3996                then we can change to pop_failure_jump, because we'll
3997                never have to backtrack.
3998 
3999                This is not true in the case of alternatives: in
4000                `(a|ab)*' we do need to backtrack to the `ab' alternative
4001                (e.g., if the string was `ab').  But instead of trying to
4002                detect that here, the alternative has put on a dummy
4003                failure point which is what we will end up popping.  */
4004 
4005 	    /* Skip over open/close-group commands.  */
4006 	    while (p2 + 2 < pend
4007 		   && ((re_opcode_t) *p2 == stop_memory
4008 		       || (re_opcode_t) *p2 == start_memory))
4009 	      p2 += 3;			/* Skip over args, too.  */
4010 
4011             /* If we're at the end of the pattern, we can change.  */
4012             if (p2 == pend)
4013 	      {
4014 		/* Consider what happens when matching ":\(.*\)"
4015 		   against ":/".  I don't really understand this code
4016 		   yet.  */
4017   	        p[-3] = (unsigned char) pop_failure_jump;
4018                 DEBUG_PRINT1
4019                   ("  End of pattern: change to `pop_failure_jump'.\n");
4020               }
4021 
4022             else if ((re_opcode_t) *p2 == exactn
4023 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4024 	      {
4025 		register unsigned char c
4026                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4027 		p1 = p + mcnt;
4028 
4029                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4030                    to the `maybe_finalize_jump' of this case.  Examine what
4031                    follows.  */
4032                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4033                   {
4034   		    p[-3] = (unsigned char) pop_failure_jump;
4035                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4036                                   c, p1[5]);
4037                   }
4038 
4039 		else if ((re_opcode_t) p1[3] == charset
4040 			 || (re_opcode_t) p1[3] == charset_not)
4041 		  {
4042 		    int not = (re_opcode_t) p1[3] == charset_not;
4043 
4044 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4045 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4046 		      not = !not;
4047 
4048                     /* `not' is equal to 1 if c would match, which means
4049                         that we can't change to pop_failure_jump.  */
4050 		    if (!not)
4051                       {
4052   		        p[-3] = (unsigned char) pop_failure_jump;
4053                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4054                       }
4055 		  }
4056 	      }
4057 	  }
4058 	  p -= 2;		/* Point at relative address again.  */
4059 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4060 	    {
4061 	      p[-1] = (unsigned char) jump;
4062               DEBUG_PRINT1 ("  Match => jump.\n");
4063 	      goto unconditional_jump;
4064 	    }
4065         /* Note fall through.  */
4066 
4067 
4068 	/* The end of a simple repeat has a pop_failure_jump back to
4069            its matching on_failure_jump, where the latter will push a
4070            failure point.  The pop_failure_jump takes off failure
4071            points put on by this pop_failure_jump's matching
4072            on_failure_jump; we got through the pattern to here from the
4073            matching on_failure_jump, so didn't fail.  */
4074         case pop_failure_jump:
4075           {
4076             /* We need to pass separate storage for the lowest and
4077                highest registers, even though we don't care about the
4078                actual values.  Otherwise, we will restore only one
4079                register from the stack, since lowest will == highest in
4080                `pop_failure_point'.  */
4081             unsigned dummy_low_reg, dummy_high_reg;
4082             unsigned char *pdummy;
4083             const char *sdummy;
4084 
4085             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4086             POP_FAILURE_POINT (sdummy, pdummy,
4087                                dummy_low_reg, dummy_high_reg,
4088                                reg_dummy, reg_dummy, reg_info_dummy);
4089           }
4090           /* Note fall through.  */
4091 
4092 
4093         /* Unconditionally jump (without popping any failure points).  */
4094         case jump:
4095 	unconditional_jump:
4096 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4097           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4098 	  p += mcnt;				/* Do the jump.  */
4099           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4100 	  break;
4101 
4102 
4103         /* We need this opcode so we can detect where alternatives end
4104            in `group_match_null_string_p' et al.  */
4105         case jump_past_alt:
4106           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4107           goto unconditional_jump;
4108 
4109 
4110         /* Normally, the on_failure_jump pushes a failure point, which
4111            then gets popped at pop_failure_jump.  We will end up at
4112            pop_failure_jump, also, and with a pattern of, say, `a+', we
4113            are skipping over the on_failure_jump, so we have to push
4114            something meaningless for pop_failure_jump to pop.  */
4115         case dummy_failure_jump:
4116           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4117           /* It doesn't matter what we push for the string here.  What
4118              the code at `fail' tests is the value for the pattern.  */
4119           PUSH_FAILURE_POINT (0, 0, -2);
4120           goto unconditional_jump;
4121 
4122 
4123         /* At the end of an alternative, we need to push a dummy failure
4124            point in case we are followed by a `pop_failure_jump', because
4125            we don't want the failure point for the alternative to be
4126            popped.  For example, matching `(a|ab)*' against `aab'
4127            requires that we match the `ab' alternative.  */
4128         case push_dummy_failure:
4129           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4130           /* See comments just above at `dummy_failure_jump' about the
4131              two zeroes.  */
4132           PUSH_FAILURE_POINT (0, 0, -2);
4133           break;
4134 
4135         /* Have to succeed matching what follows at least n times.
4136            After that, handle like `on_failure_jump'.  */
4137         case succeed_n:
4138           EXTRACT_NUMBER (mcnt, p + 2);
4139           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4140 
4141           assert (mcnt >= 0);
4142           /* Originally, this is how many times we HAVE to succeed.  */
4143           if (mcnt > 0)
4144             {
4145                mcnt--;
4146 	       p += 2;
4147                STORE_NUMBER_AND_INCR (p, mcnt);
4148                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4149             }
4150 	  else if (mcnt == 0)
4151             {
4152               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4153 	      p[2] = (unsigned char) no_op;
4154               p[3] = (unsigned char) no_op;
4155               goto on_failure;
4156             }
4157           break;
4158 
4159         case jump_n:
4160           EXTRACT_NUMBER (mcnt, p + 2);
4161           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4162 
4163           /* Originally, this is how many times we CAN jump.  */
4164           if (mcnt)
4165             {
4166                mcnt--;
4167                STORE_NUMBER (p + 2, mcnt);
4168 	       goto unconditional_jump;
4169             }
4170           /* If don't have to jump any more, skip over the rest of command.  */
4171 	  else
4172 	    p += 4;
4173           break;
4174 
4175 	case set_number_at:
4176 	  {
4177             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4178 
4179             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4180             p1 = p + mcnt;
4181             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4182             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4183 	    STORE_NUMBER (p1, mcnt);
4184             break;
4185           }
4186 
4187         case wordbound:
4188           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4189           if (AT_WORD_BOUNDARY (d))
4190 	    break;
4191           goto fail;
4192 
4193 	case notwordbound:
4194           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4195 	  if (AT_WORD_BOUNDARY (d))
4196 	    goto fail;
4197           break;
4198 
4199 	case wordbeg:
4200           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4201 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4202 	    break;
4203           goto fail;
4204 
4205 	case wordend:
4206           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4207 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4208               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4209 	    break;
4210           goto fail;
4211 
4212 #ifdef emacs
4213 #ifdef emacs19
4214   	case before_dot:
4215           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4216  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4217   	    goto fail;
4218   	  break;
4219 
4220   	case at_dot:
4221           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4222  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4223   	    goto fail;
4224   	  break;
4225 
4226   	case after_dot:
4227           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4228           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4229   	    goto fail;
4230   	  break;
4231 #else /* not emacs19 */
4232 	case at_dot:
4233           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4234 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4235 	    goto fail;
4236 	  break;
4237 #endif /* not emacs19 */
4238 
4239 	case syntaxspec:
4240           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4241 	  mcnt = *p++;
4242 	  goto matchsyntax;
4243 
4244         case wordchar:
4245           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4246 	  mcnt = (int) Sword;
4247         matchsyntax:
4248 	  PREFETCH ();
4249 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4250             goto fail;
4251           SET_REGS_MATCHED ();
4252 	  break;
4253 
4254 	case notsyntaxspec:
4255           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4256 	  mcnt = *p++;
4257 	  goto matchnotsyntax;
4258 
4259         case notwordchar:
4260           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4261 	  mcnt = (int) Sword;
4262         matchnotsyntax:
4263 	  PREFETCH ();
4264 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4265             goto fail;
4266 	  SET_REGS_MATCHED ();
4267           break;
4268 
4269 #else /* not emacs */
4270 	case wordchar:
4271           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4272 	  PREFETCH ();
4273           if (!WORDCHAR_P (d))
4274             goto fail;
4275 	  SET_REGS_MATCHED ();
4276           d++;
4277 	  break;
4278 
4279 	case notwordchar:
4280           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4281 	  PREFETCH ();
4282 	  if (WORDCHAR_P (d))
4283             goto fail;
4284           SET_REGS_MATCHED ();
4285           d++;
4286 	  break;
4287 #endif /* not emacs */
4288 
4289         default:
4290           abort ();
4291 	}
4292       continue;  /* Successfully executed one pattern command; keep going.  */
4293 
4294 
4295     /* We goto here if a matching operation fails. */
4296     fail:
4297       if (!FAIL_STACK_EMPTY ())
4298 	{ /* A restart point is known.  Restore to that state.  */
4299           DEBUG_PRINT1 ("\nFAIL:\n");
4300           POP_FAILURE_POINT (d, p,
4301                              lowest_active_reg, highest_active_reg,
4302                              regstart, regend, reg_info);
4303 
4304           /* If this failure point is a dummy, try the next one.  */
4305           if (!p)
4306 	    goto fail;
4307 
4308           /* If we failed to the end of the pattern, don't examine *p.  */
4309 	  assert (p <= pend);
4310           if (p < pend)
4311             {
4312               boolean is_a_jump_n = false;
4313 
4314               /* If failed to a backwards jump that's part of a repetition
4315                  loop, need to pop this failure point and use the next one.  */
4316               switch ((re_opcode_t) *p)
4317                 {
4318                 case jump_n:
4319                   is_a_jump_n = true;
4320                 case maybe_pop_jump:
4321                 case pop_failure_jump:
4322                 case jump:
4323                   p1 = p + 1;
4324                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4325                   p1 += mcnt;
4326 
4327                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4328                       || (!is_a_jump_n
4329                           && (re_opcode_t) *p1 == on_failure_jump))
4330                     goto fail;
4331                   break;
4332                 default:
4333                   /* do nothing */ ;
4334                 }
4335             }
4336 
4337           if (d >= string1 && d <= end1)
4338 	    dend = end_match_1;
4339         }
4340       else
4341         break;   /* Matching at this starting point really fails.  */
4342     } /* for (;;) */
4343 
4344   if (best_regs_set)
4345     goto restore_best_regs;
4346 
4347   FREE_VARIABLES ();
4348 
4349   return -1;         			/* Failure to match.  */
4350 } /* re_match_2 */
4351 
4352 /* Subroutine definitions for re_match_2.  */
4353 
4354 
4355 /* We are passed P pointing to a register number after a start_memory.
4356 
4357    Return true if the pattern up to the corresponding stop_memory can
4358    match the empty string, and false otherwise.
4359 
4360    If we find the matching stop_memory, sets P to point to one past its number.
4361    Otherwise, sets P to an undefined byte less than or equal to END.
4362 
4363    We don't handle duplicates properly (yet).  */
4364 
4365 static boolean
4366 group_match_null_string_p (p, end, reg_info)
4367     unsigned char **p, *end;
4368     register_info_type *reg_info;
4369 {
4370   int mcnt;
4371   /* Point to after the args to the start_memory.  */
4372   unsigned char *p1 = *p + 2;
4373 
4374   while (p1 < end)
4375     {
4376       /* Skip over opcodes that can match nothing, and return true or
4377 	 false, as appropriate, when we get to one that can't, or to the
4378          matching stop_memory.  */
4379 
4380       switch ((re_opcode_t) *p1)
4381         {
4382         /* Could be either a loop or a series of alternatives.  */
4383         case on_failure_jump:
4384           p1++;
4385           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4386 
4387           /* If the next operation is not a jump backwards in the
4388 	     pattern.  */
4389 
4390 	  if (mcnt >= 0)
4391 	    {
4392               /* Go through the on_failure_jumps of the alternatives,
4393                  seeing if any of the alternatives cannot match nothing.
4394                  The last alternative starts with only a jump,
4395                  whereas the rest start with on_failure_jump and end
4396                  with a jump, e.g., here is the pattern for `a|b|c':
4397 
4398                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4399                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4400                  /exactn/1/c
4401 
4402                  So, we have to first go through the first (n-1)
4403                  alternatives and then deal with the last one separately.  */
4404 
4405 
4406               /* Deal with the first (n-1) alternatives, which start
4407                  with an on_failure_jump (see above) that jumps to right
4408                  past a jump_past_alt.  */
4409 
4410               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4411                 {
4412                   /* `mcnt' holds how many bytes long the alternative
4413                      is, including the ending `jump_past_alt' and
4414                      its number.  */
4415 
4416                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4417 				                      reg_info))
4418                     return false;
4419 
4420                   /* Move to right after this alternative, including the
4421 		     jump_past_alt.  */
4422                   p1 += mcnt;
4423 
4424                   /* Break if it's the beginning of an n-th alternative
4425                      that doesn't begin with an on_failure_jump.  */
4426                   if ((re_opcode_t) *p1 != on_failure_jump)
4427                     break;
4428 
4429 		  /* Still have to check that it's not an n-th
4430 		     alternative that starts with an on_failure_jump.  */
4431 		  p1++;
4432                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4433                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4434                     {
4435 		      /* Get to the beginning of the n-th alternative.  */
4436                       p1 -= 3;
4437                       break;
4438                     }
4439                 }
4440 
4441               /* Deal with the last alternative: go back and get number
4442                  of the `jump_past_alt' just before it.  `mcnt' contains
4443                  the length of the alternative.  */
4444               EXTRACT_NUMBER (mcnt, p1 - 2);
4445 
4446               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4447                 return false;
4448 
4449               p1 += mcnt;	/* Get past the n-th alternative.  */
4450             } /* if mcnt > 0 */
4451           break;
4452 
4453 
4454         case stop_memory:
4455 	  assert (p1[1] == **p);
4456           *p = p1 + 2;
4457           return true;
4458 
4459 
4460         default:
4461           if (!common_op_match_null_string_p (&p1, end, reg_info))
4462             return false;
4463         }
4464     } /* while p1 < end */
4465 
4466   return false;
4467 } /* group_match_null_string_p */
4468 
4469 
4470 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4471    It expects P to be the first byte of a single alternative and END one
4472    byte past the last. The alternative can contain groups.  */
4473 
4474 static boolean
4475 alt_match_null_string_p (p, end, reg_info)
4476     unsigned char *p, *end;
4477     register_info_type *reg_info;
4478 {
4479   int mcnt;
4480   unsigned char *p1 = p;
4481 
4482   while (p1 < end)
4483     {
4484       /* Skip over opcodes that can match nothing, and break when we get
4485          to one that can't.  */
4486 
4487       switch ((re_opcode_t) *p1)
4488         {
4489 	/* It's a loop.  */
4490         case on_failure_jump:
4491           p1++;
4492           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4493           p1 += mcnt;
4494           break;
4495 
4496 	default:
4497           if (!common_op_match_null_string_p (&p1, end, reg_info))
4498             return false;
4499         }
4500     }  /* while p1 < end */
4501 
4502   return true;
4503 } /* alt_match_null_string_p */
4504 
4505 
4506 /* Deals with the ops common to group_match_null_string_p and
4507    alt_match_null_string_p.
4508 
4509    Sets P to one after the op and its arguments, if any.  */
4510 
4511 static boolean
4512 common_op_match_null_string_p (p, end, reg_info)
4513     unsigned char **p, *end;
4514     register_info_type *reg_info;
4515 {
4516   int mcnt;
4517   boolean ret;
4518   int reg_no;
4519   unsigned char *p1 = *p;
4520 
4521   switch ((re_opcode_t) *p1++)
4522     {
4523     case no_op:
4524     case begline:
4525     case endline:
4526     case begbuf:
4527     case endbuf:
4528     case wordbeg:
4529     case wordend:
4530     case wordbound:
4531     case notwordbound:
4532 #ifdef emacs
4533     case before_dot:
4534     case at_dot:
4535     case after_dot:
4536 #endif
4537       break;
4538 
4539     case start_memory:
4540       reg_no = *p1;
4541       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4542       ret = group_match_null_string_p (&p1, end, reg_info);
4543 
4544       /* Have to set this here in case we're checking a group which
4545          contains a group and a back reference to it.  */
4546 
4547       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4548         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4549 
4550       if (!ret)
4551         return false;
4552       break;
4553 
4554     /* If this is an optimized succeed_n for zero times, make the jump.  */
4555     case jump:
4556       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4557       if (mcnt >= 0)
4558         p1 += mcnt;
4559       else
4560         return false;
4561       break;
4562 
4563     case succeed_n:
4564       /* Get to the number of times to succeed.  */
4565       p1 += 2;
4566       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4567 
4568       if (mcnt == 0)
4569         {
4570           p1 -= 4;
4571           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4572           p1 += mcnt;
4573         }
4574       else
4575         return false;
4576       break;
4577 
4578     case duplicate:
4579       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4580         return false;
4581       break;
4582 
4583     case set_number_at:
4584       p1 += 4;
4585 
4586     default:
4587       /* All other opcodes mean we cannot match the empty string.  */
4588       return false;
4589   }
4590 
4591   *p = p1;
4592   return true;
4593 } /* common_op_match_null_string_p */
4594 
4595 
4596 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4597    bytes; nonzero otherwise.  */
4598 
4599 static int
4600 bcmp_translate (s1, s2, len, translate)
4601      unsigned char *s1, *s2;
4602      register int len;
4603      char *translate;
4604 {
4605   register unsigned char *p1 = s1, *p2 = s2;
4606   while (len)
4607     {
4608       if (translate[*p1++] != translate[*p2++]) return 1;
4609       len--;
4610     }
4611   return 0;
4612 }
4613 
4614 /* Entry points for GNU code.  */
4615 
4616 /* re_compile_pattern is the GNU regular expression compiler: it
4617    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4618    Returns 0 if the pattern was valid, otherwise an error string.
4619 
4620    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4621    are set in BUFP on entry.
4622 
4623    We call regex_compile to do the actual compilation.  */
4624 
4625 const char *
4626 re_compile_pattern (pattern, length, bufp)
4627      const char *pattern;
4628      int length;
4629      struct re_pattern_buffer *bufp;
4630 {
4631   reg_errcode_t ret;
4632 
4633   /* GNU code is written to assume at least RE_NREGS registers will be set
4634      (and at least one extra will be -1).  */
4635   bufp->regs_allocated = REGS_UNALLOCATED;
4636 
4637   /* And GNU code determines whether or not to get register information
4638      by passing null for the REGS argument to re_match, etc., not by
4639      setting no_sub.  */
4640   bufp->no_sub = 0;
4641 
4642   /* Match anchors at newline.  */
4643   bufp->newline_anchor = 1;
4644 
4645   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4646 
4647   return re_error_msg[(int) ret];
4648 }
4649 
4650 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4651    them if this is an Emacs or POSIX compilation.  */
4652 
4653 #if !defined (emacs)
4654 
4655 /* BSD has one and only one pattern buffer.  */
4656 static struct re_pattern_buffer re_comp_buf;
4657 
4658 char *
4659 re_comp (s)
4660     const char *s;
4661 {
4662   reg_errcode_t ret;
4663 
4664   if (!s)
4665     {
4666       if (!re_comp_buf.buffer)
4667 	return "No previous regular expression";
4668       return 0;
4669     }
4670 
4671   if (!re_comp_buf.buffer)
4672     {
4673       re_comp_buf.buffer = (unsigned char *) malloc (200);
4674       if (re_comp_buf.buffer == NULL)
4675         return "Memory exhausted";
4676       re_comp_buf.allocated = 200;
4677 
4678       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4679       if (re_comp_buf.fastmap == NULL)
4680 	return "Memory exhausted";
4681     }
4682 
4683   /* Since `re_exec' always passes NULL for the `regs' argument, we
4684      don't need to initialize the pattern buffer fields which affect it.  */
4685 
4686   /* Match anchors at newlines.  */
4687   re_comp_buf.newline_anchor = 1;
4688 
4689   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4690 
4691   /* Yes, we're discarding `const' here.  */
4692   return (char *) re_error_msg[(int) ret];
4693 }
4694 
4695 
4696 int
4697 re_exec (s)
4698     const char *s;
4699 {
4700   const int len = strlen (s);
4701   return
4702     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4703 }
4704 #endif /* not emacs and not _POSIX_SOURCE */
4705 
4706 /* POSIX.2 functions.  Don't define these for Emacs.  */
4707 
4708 #ifndef emacs
4709 
4710 /* regcomp takes a regular expression as a string and compiles it.
4711 
4712    PREG is a regex_t *.  We do not expect any fields to be initialized,
4713    since POSIX says we shouldn't.  Thus, we set
4714 
4715      `buffer' to the compiled pattern;
4716      `used' to the length of the compiled pattern;
4717      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4718        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4719        RE_SYNTAX_POSIX_BASIC;
4720      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4721      `fastmap' and `fastmap_accurate' to zero;
4722      `re_nsub' to the number of subexpressions in PATTERN.
4723 
4724    PATTERN is the address of the pattern string.
4725 
4726    CFLAGS is a series of bits which affect compilation.
4727 
4728      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4729      use POSIX basic syntax.
4730 
4731      If REG_NEWLINE is set, then . and [^...] don't match newline.
4732      Also, regexec will try a match beginning after every newline.
4733 
4734      If REG_ICASE is set, then we considers upper- and lowercase
4735      versions of letters to be equivalent when matching.
4736 
4737      If REG_NOSUB is set, then when PREG is passed to regexec, that
4738      routine will report only success or failure, and nothing about the
4739      registers.
4740 
4741    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4742    the return codes and their meanings.)  */
4743 
4744 int
4745 regcomp (preg, pattern, cflags)
4746     regex_t *preg;
4747     const char *pattern;
4748     int cflags;
4749 {
4750   reg_errcode_t ret;
4751   unsigned syntax
4752     = (cflags & REG_EXTENDED) ?
4753       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4754 
4755   /* regex_compile will allocate the space for the compiled pattern.  */
4756   preg->buffer = 0;
4757   preg->allocated = 0;
4758 
4759   /* Don't bother to use a fastmap when searching.  This simplifies the
4760      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4761      characters after newlines into the fastmap.  This way, we just try
4762      every character.  */
4763   preg->fastmap = 0;
4764 
4765   if (cflags & REG_ICASE)
4766     {
4767       unsigned i;
4768 
4769       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4770       if (preg->translate == NULL)
4771         return (int) REG_ESPACE;
4772 
4773       /* Map uppercase characters to corresponding lowercase ones.  */
4774       for (i = 0; i < CHAR_SET_SIZE; i++)
4775         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4776     }
4777   else
4778     preg->translate = NULL;
4779 
4780   /* If REG_NEWLINE is set, newlines are treated differently.  */
4781   if (cflags & REG_NEWLINE)
4782     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4783       syntax &= ~RE_DOT_NEWLINE;
4784       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4785       /* It also changes the matching behavior.  */
4786       preg->newline_anchor = 1;
4787     }
4788   else
4789     preg->newline_anchor = 0;
4790 
4791   preg->no_sub = !!(cflags & REG_NOSUB);
4792 
4793   /* POSIX says a null character in the pattern terminates it, so we
4794      can use strlen here in compiling the pattern.  */
4795   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4796 
4797   /* POSIX doesn't distinguish between an unmatched open-group and an
4798      unmatched close-group: both are REG_EPAREN.  */
4799   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4800 
4801   return (int) ret;
4802 }
4803 
4804 
4805 /* regexec searches for a given pattern, specified by PREG, in the
4806    string STRING.
4807 
4808    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4809    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4810    least NMATCH elements, and we set them to the offsets of the
4811    corresponding matched substrings.
4812 
4813    EFLAGS specifies `execution flags' which affect matching: if
4814    REG_NOTBOL is set, then ^ does not match at the beginning of the
4815    string; if REG_NOTEOL is set, then $ does not match at the end.
4816 
4817    We return 0 if we find a match and REG_NOMATCH if not.  */
4818 
4819 int
4820 regexec (preg, string, nmatch, pmatch, eflags)
4821     const regex_t *preg;
4822     const char *string;
4823     size_t nmatch;
4824     regmatch_t pmatch[];
4825     int eflags;
4826 {
4827   int ret;
4828   struct re_registers regs;
4829   regex_t private_preg;
4830   int len = strlen (string);
4831   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4832 
4833   private_preg = *preg;
4834 
4835   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4836   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4837 
4838   /* The user has told us exactly how many registers to return
4839      information about, via `nmatch'.  We have to pass that on to the
4840      matching routines.  */
4841   private_preg.regs_allocated = REGS_FIXED;
4842 
4843   if (want_reg_info)
4844     {
4845       regs.num_regs = nmatch;
4846       regs.start = TALLOC (nmatch, regoff_t);
4847       regs.end = TALLOC (nmatch, regoff_t);
4848       if (regs.start == NULL || regs.end == NULL)
4849         return (int) REG_NOMATCH;
4850     }
4851 
4852   /* Perform the searching operation.  */
4853   ret = re_search (&private_preg, string, len,
4854                    /* start: */ 0, /* range: */ len,
4855                    want_reg_info ? &regs : (struct re_registers *) 0);
4856 
4857   /* Copy the register information to the POSIX structure.  */
4858   if (want_reg_info)
4859     {
4860       if (ret >= 0)
4861         {
4862           unsigned r;
4863 
4864           for (r = 0; r < nmatch; r++)
4865             {
4866               pmatch[r].rm_so = regs.start[r];
4867               pmatch[r].rm_eo = regs.end[r];
4868             }
4869         }
4870 
4871       /* If we needed the temporary register info, free the space now.  */
4872       free (regs.start);
4873       free (regs.end);
4874     }
4875 
4876   /* We want zero return to mean success, unlike `re_search'.  */
4877   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4878 }
4879 
4880 
4881 /* Returns a message corresponding to an error code, ERRCODE, returned
4882    from either regcomp or regexec.   We don't use PREG here.  */
4883 
4884 size_t
4885 regerror (errcode, preg, errbuf, errbuf_size)
4886     int errcode;
4887     const regex_t *preg;
4888     char *errbuf;
4889     size_t errbuf_size;
4890 {
4891   const char *msg;
4892   size_t msg_size;
4893 
4894   if (errcode < 0
4895       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4896     /* Only error codes returned by the rest of the code should be passed
4897        to this routine.  If we are given anything else, or if other regex
4898        code generates an invalid error code, then the program has a bug.
4899        Dump core so we can fix it.  */
4900     abort ();
4901 
4902   msg = re_error_msg[errcode];
4903 
4904   /* POSIX doesn't require that we do anything in this case, but why
4905      not be nice.  */
4906   if (! msg)
4907     msg = "Success";
4908 
4909   msg_size = strlen (msg) + 1; /* Includes the null.  */
4910 
4911   if (errbuf_size != 0)
4912     {
4913       if (msg_size > errbuf_size)
4914         {
4915           strncpy (errbuf, msg, errbuf_size - 1);
4916           errbuf[errbuf_size - 1] = 0;
4917         }
4918       else
4919         strcpy (errbuf, msg);
4920     }
4921 
4922   return msg_size;
4923 }
4924 
4925 
4926 /* Free dynamically allocated space used by PREG.  */
4927 
4928 void
4929 regfree (preg)
4930     regex_t *preg;
4931 {
4932   if (preg->buffer != NULL)
4933     free (preg->buffer);
4934   preg->buffer = NULL;
4935 
4936   preg->allocated = 0;
4937   preg->used = 0;
4938 
4939   if (preg->fastmap != NULL)
4940     free (preg->fastmap);
4941   preg->fastmap = NULL;
4942   preg->fastmap_accurate = 0;
4943 
4944   if (preg->translate != NULL)
4945     free (preg->translate);
4946   preg->translate = NULL;
4947 }
4948 
4949 #endif /* not emacs  */
4950 
4951 /*
4952 Local variables:
4953 make-backup-files: t
4954 version-control: t
4955 trim-versions-without-asking: nil
4956 End:
4957 */
4958