xref: /openbsd-src/gnu/usr.bin/cvs/lib/regex.c (revision 62a742911104f98b9185b2c6b6007d9b1c36396c)
1 /* Extended regular expression matching and search library, version
2    0.12.  (Implements POSIX draft P10003.2/D11.2, except for
3    internationalization features.)
4 
5    Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.  */
16 
17 /* Trying to define this in the makefile would get hairy, unless we can
18    more gracefully do it for NT, OS/2, unix, etc.  */
19 #define REGEX_MALLOC 1
20 
21 /* AIX requires this to be the first thing in the file. */
22 #if defined (_AIX) && !defined (REGEX_MALLOC)
23   #pragma alloca
24 #endif
25 
26 #define _GNU_SOURCE
27 
28 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
29 #include <sys/types.h>
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 /* The `emacs' switch turns on certain matching commands
36    that make sense only in Emacs. */
37 #ifdef emacs
38 
39 #include "lisp.h"
40 #include "buffer.h"
41 
42 /* Make syntax table lookup grant data in gl_state.  */
43 #define SYNTAX_ENTRY_VIA_PROPERTY
44 
45 #include "syntax.h"
46 #include "charset.h"
47 #include "category.h"
48 
49 #define malloc xmalloc
50 #define free xfree
51 
52 #else  /* not emacs */
53 
54 /* We used to test for `BSTRING' here, but only GCC and Emacs define
55    `BSTRING', as far as I know, and neither of them use this code.  */
56 #if HAVE_STRING_H || STDC_HEADERS
57 #include <string.h>
58 #ifndef bcmp
59 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
60 #endif
61 #ifndef bcopy
62 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
63 #endif
64 #ifndef bzero
65 #define bzero(s, n)	memset ((s), 0, (n))
66 #endif
67 #else
68 #include <strings.h>
69 #endif
70 
71 #ifdef STDC_HEADERS
72 #include <stdlib.h>
73 #else
74 char *malloc ();
75 char *realloc ();
76 #endif
77 
78 
79 /* Define the syntax stuff for \<, \>, etc.  */
80 
81 /* This must be nonzero for the wordchar and notwordchar pattern
82    commands in re_match_2.  */
83 #ifndef Sword
84 #define Sword 1
85 #endif
86 
87 #ifdef SYNTAX_TABLE
88 
89 extern char *re_syntax_table;
90 
91 #else /* not SYNTAX_TABLE */
92 
93 /* How many characters in the character set.  */
94 #define CHAR_SET_SIZE 256
95 
96 static char re_syntax_table[CHAR_SET_SIZE];
97 
98 static void
99 init_syntax_once ()
100 {
101    register int c;
102    static int done = 0;
103 
104    if (done)
105      return;
106 
107    bzero (re_syntax_table, sizeof re_syntax_table);
108 
109    for (c = 'a'; c <= 'z'; c++)
110      re_syntax_table[c] = Sword;
111 
112    for (c = 'A'; c <= 'Z'; c++)
113      re_syntax_table[c] = Sword;
114 
115    for (c = '0'; c <= '9'; c++)
116      re_syntax_table[c] = Sword;
117 
118    re_syntax_table['_'] = Sword;
119 
120    done = 1;
121 }
122 
123 #endif /* not SYNTAX_TABLE */
124 
125 #define SYNTAX(c) re_syntax_table[c]
126 
127 #endif /* not emacs */
128 
129 /* Get the interface, including the syntax bits.  */
130 #include "regex.h"
131 
132 /* isalpha etc. are used for the character classes.  */
133 #include <ctype.h>
134 
135 #ifndef isascii
136 #define isascii(c) 1
137 #endif
138 
139 #ifdef isblank
140 #define ISBLANK(c) (isascii (c) && isblank (c))
141 #else
142 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
143 #endif
144 #ifdef isgraph
145 #define ISGRAPH(c) (isascii (c) && isgraph (c))
146 #else
147 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
148 #endif
149 
150 #define ISPRINT(c) (isascii (c) && isprint (c))
151 #define ISDIGIT(c) (isascii (c) && isdigit (c))
152 #define ISALNUM(c) (isascii (c) && isalnum (c))
153 #define ISALPHA(c) (isascii (c) && isalpha (c))
154 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
155 #define ISLOWER(c) (isascii (c) && islower (c))
156 #define ISPUNCT(c) (isascii (c) && ispunct (c))
157 #define ISSPACE(c) (isascii (c) && isspace (c))
158 #define ISUPPER(c) (isascii (c) && isupper (c))
159 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
160 
161 #ifndef NULL
162 #define NULL 0
163 #endif
164 
165 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
166    since ours (we hope) works properly with all combinations of
167    machines, compilers, `char' and `unsigned char' argument types.
168    (Per Bothner suggested the basic approach.)	*/
169 #undef SIGN_EXTEND_CHAR
170 #if __STDC__
171 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
172 #else  /* not __STDC__ */
173 /* As in Harbison and Steele.  */
174 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
175 #endif
176 
177 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
178    use `alloca' instead of `malloc'.  This is because using malloc in
179    re_search* or re_match* could cause memory leaks when C-g is used in
180    Emacs; also, malloc is slower and causes storage fragmentation.  On
181    the other hand, malloc is more portable, and easier to debug.
182 
183    Because we sometimes use alloca, some routines have to be macros,
184    not functions -- `alloca'-allocated space disappears at the end of the
185    function it is called in.  */
186 
187 #ifdef REGEX_MALLOC
188 
189 #define REGEX_ALLOCATE malloc
190 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
191 
192 #else /* not REGEX_MALLOC  */
193 
194 /* Emacs already defines alloca, sometimes.  */
195 #ifndef alloca
196 
197 /* Make alloca work the best possible way.  */
198 #ifdef __GNUC__
199 #define alloca __builtin_alloca
200 #else /* not __GNUC__ */
201 #if HAVE_ALLOCA_H
202 #include <alloca.h>
203 #else /* not __GNUC__ or HAVE_ALLOCA_H */
204 #ifndef _AIX /* Already did AIX, up at the top.  */
205 char *alloca ();
206 #endif /* not _AIX */
207 #endif /* not HAVE_ALLOCA_H */
208 #endif /* not __GNUC__ */
209 
210 #endif /* not alloca */
211 
212 #define REGEX_ALLOCATE alloca
213 
214 /* Assumes a `char *destination' variable.  */
215 #define REGEX_REALLOCATE(source, osize, nsize)				\
216   (destination = (char *) alloca (nsize),				\
217    bcopy (source, destination, osize),					\
218    destination)
219 
220 #endif /* not REGEX_MALLOC */
221 
222 
223 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
224    `string1' or just past its end.  This works if PTR is NULL, which is
225    a good thing.  */
226 #define FIRST_STRING_P(ptr) 					\
227   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
228 
229 /* (Re)Allocate N items of type T using malloc, or fail.  */
230 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
231 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
232 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
233 
234 #define BYTEWIDTH 8 /* In bits.  */
235 
236 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
237 
238 #undef MAX
239 #undef MIN
240 #define MAX(a, b) ((a) > (b) ? (a) : (b))
241 #define MIN(a, b) ((a) < (b) ? (a) : (b))
242 
243 typedef char boolean;
244 #define false 0
245 #define true 1
246 
247 /* These are the command codes that appear in compiled regular
248    expressions.  Some opcodes are followed by argument bytes.  A
249    command code can specify any interpretation whatsoever for its
250    arguments.  Zero bytes may appear in the compiled regular expression.
251 
252    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
253    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
254    `exactn' we use here must also be 1.  */
255 
256 typedef enum
257 {
258   no_op = 0,
259 
260         /* Followed by one byte giving n, then by n literal bytes.  */
261   exactn = 1,
262 
263         /* Matches any (more or less) character.  */
264   anychar,
265 
266         /* Matches any one char belonging to specified set.  First
267            following byte is number of bitmap bytes.  Then come bytes
268            for a bitmap saying which chars are in.  Bits in each byte
269            are ordered low-bit-first.  A character is in the set if its
270            bit is 1.  A character too large to have a bit in the map is
271            automatically not in the set.  */
272   charset,
273 
274         /* Same parameters as charset, but match any character that is
275            not one of those specified.  */
276   charset_not,
277 
278         /* Start remembering the text that is matched, for storing in a
279            register.  Followed by one byte with the register number, in
280            the range 0 to one less than the pattern buffer's re_nsub
281            field.  Then followed by one byte with the number of groups
282            inner to this one.  (This last has to be part of the
283            start_memory only because we need it in the on_failure_jump
284            of re_match_2.)  */
285   start_memory,
286 
287         /* Stop remembering the text that is matched and store it in a
288            memory register.  Followed by one byte with the register
289            number, in the range 0 to one less than `re_nsub' in the
290            pattern buffer, and one byte with the number of inner groups,
291            just like `start_memory'.  (We need the number of inner
292            groups here because we don't have any easy way of finding the
293            corresponding start_memory when we're at a stop_memory.)  */
294   stop_memory,
295 
296         /* Match a duplicate of something remembered. Followed by one
297            byte containing the register number.  */
298   duplicate,
299 
300         /* Fail unless at beginning of line.  */
301   begline,
302 
303         /* Fail unless at end of line.  */
304   endline,
305 
306         /* Succeeds if at beginning of buffer (if emacs) or at beginning
307            of string to be matched (if not).  */
308   begbuf,
309 
310         /* Analogously, for end of buffer/string.  */
311   endbuf,
312 
313         /* Followed by two byte relative address to which to jump.  */
314   jump,
315 
316 	/* Same as jump, but marks the end of an alternative.  */
317   jump_past_alt,
318 
319         /* Followed by two-byte relative address of place to resume at
320            in case of failure.  */
321   on_failure_jump,
322 
323         /* Like on_failure_jump, but pushes a placeholder instead of the
324            current string position when executed.  */
325   on_failure_keep_string_jump,
326 
327         /* Throw away latest failure point and then jump to following
328            two-byte relative address.  */
329   pop_failure_jump,
330 
331         /* Change to pop_failure_jump if know won't have to backtrack to
332            match; otherwise change to jump.  This is used to jump
333            back to the beginning of a repeat.  If what follows this jump
334            clearly won't match what the repeat does, such that we can be
335            sure that there is no use backtracking out of repetitions
336            already matched, then we change it to a pop_failure_jump.
337            Followed by two-byte address.  */
338   maybe_pop_jump,
339 
340         /* Jump to following two-byte address, and push a dummy failure
341            point. This failure point will be thrown away if an attempt
342            is made to use it for a failure.  A `+' construct makes this
343            before the first repeat.  Also used as an intermediary kind
344            of jump when compiling an alternative.  */
345   dummy_failure_jump,
346 
347 	/* Push a dummy failure point and continue.  Used at the end of
348 	   alternatives.  */
349   push_dummy_failure,
350 
351         /* Followed by two-byte relative address and two-byte number n.
352            After matching N times, jump to the address upon failure.  */
353   succeed_n,
354 
355         /* Followed by two-byte relative address, and two-byte number n.
356            Jump to the address N times, then fail.  */
357   jump_n,
358 
359         /* Set the following two-byte relative address to the
360            subsequent two-byte number.  The address *includes* the two
361            bytes of number.  */
362   set_number_at,
363 
364   wordchar,	/* Matches any word-constituent character.  */
365   notwordchar,	/* Matches any char that is not a word-constituent.  */
366 
367   wordbeg,	/* Succeeds if at word beginning.  */
368   wordend,	/* Succeeds if at word end.  */
369 
370   wordbound,	/* Succeeds if at a word boundary.  */
371   notwordbound	/* Succeeds if not at a word boundary.  */
372 
373 #ifdef emacs
374   ,before_dot,	/* Succeeds if before point.  */
375   at_dot,	/* Succeeds if at point.  */
376   after_dot,	/* Succeeds if after point.  */
377 
378 	/* Matches any character whose syntax is specified.  Followed by
379            a byte which contains a syntax code, e.g., Sword.  */
380   syntaxspec,
381 
382 	/* Matches any character whose syntax is not that specified.  */
383   notsyntaxspec
384 #endif /* emacs */
385 } re_opcode_t;
386 
387 /* Common operations on the compiled pattern.  */
388 
389 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
390 
391 #define STORE_NUMBER(destination, number)				\
392   do {									\
393     (destination)[0] = (number) & 0377;					\
394     (destination)[1] = (number) >> 8;					\
395   } while (0)
396 
397 /* Same as STORE_NUMBER, except increment DESTINATION to
398    the byte after where the number is stored.  Therefore, DESTINATION
399    must be an lvalue.  */
400 
401 #define STORE_NUMBER_AND_INCR(destination, number)			\
402   do {									\
403     STORE_NUMBER (destination, number);					\
404     (destination) += 2;							\
405   } while (0)
406 
407 /* Put into DESTINATION a number stored in two contiguous bytes starting
408    at SOURCE.  */
409 
410 #define EXTRACT_NUMBER(destination, source)				\
411   do {									\
412     (destination) = *(source) & 0377;					\
413     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
414   } while (0)
415 
416 #ifdef DEBUG
417 static void
418 extract_number (dest, source)
419     int *dest;
420     unsigned char *source;
421 {
422   int temp = SIGN_EXTEND_CHAR (*(source + 1));
423   *dest = *source & 0377;
424   *dest += temp << 8;
425 }
426 
427 #ifndef EXTRACT_MACROS /* To debug the macros.  */
428 #undef EXTRACT_NUMBER
429 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
430 #endif /* not EXTRACT_MACROS */
431 
432 #endif /* DEBUG */
433 
434 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
435    SOURCE must be an lvalue.  */
436 
437 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
438   do {									\
439     EXTRACT_NUMBER (destination, source);				\
440     (source) += 2; 							\
441   } while (0)
442 
443 #ifdef DEBUG
444 static void
445 extract_number_and_incr (destination, source)
446     int *destination;
447     unsigned char **source;
448 {
449   extract_number (destination, *source);
450   *source += 2;
451 }
452 
453 #ifndef EXTRACT_MACROS
454 #undef EXTRACT_NUMBER_AND_INCR
455 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
456   extract_number_and_incr (&dest, &src)
457 #endif /* not EXTRACT_MACROS */
458 
459 #endif /* DEBUG */
460 
461 /* If DEBUG is defined, Regex prints many voluminous messages about what
462    it is doing (if the variable `debug' is nonzero).  If linked with the
463    main program in `iregex.c', you can enter patterns and strings
464    interactively.  And if linked with the main program in `main.c' and
465    the other test files, you can run the already-written tests.  */
466 
467 #ifdef DEBUG
468 
469 /* We use standard I/O for debugging.  */
470 #include <stdio.h>
471 
472 /* It is useful to test things that ``must'' be true when debugging.  */
473 #include <assert.h>
474 
475 static int debug = 0;
476 
477 #define DEBUG_STATEMENT(e) e
478 #define DEBUG_PRINT1(x) if (debug) printf (x)
479 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
480 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
481 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
482 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
483   if (debug) print_partial_compiled_pattern (s, e)
484 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
485   if (debug) print_double_string (w, s1, sz1, s2, sz2)
486 
487 
488 extern void printchar ();
489 
490 /* Print the fastmap in human-readable form.  */
491 
492 void
493 print_fastmap (fastmap)
494     char *fastmap;
495 {
496   unsigned was_a_range = 0;
497   unsigned i = 0;
498 
499   while (i < (1 << BYTEWIDTH))
500     {
501       if (fastmap[i++])
502 	{
503 	  was_a_range = 0;
504           printchar (i - 1);
505           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
506             {
507               was_a_range = 1;
508               i++;
509             }
510 	  if (was_a_range)
511             {
512               printf ("-");
513               printchar (i - 1);
514             }
515         }
516     }
517   putchar ('\n');
518 }
519 
520 
521 /* Print a compiled pattern string in human-readable form, starting at
522    the START pointer into it and ending just before the pointer END.  */
523 
524 void
525 print_partial_compiled_pattern (start, end)
526     unsigned char *start;
527     unsigned char *end;
528 {
529   int mcnt, mcnt2;
530   unsigned char *p = start;
531   unsigned char *pend = end;
532 
533   if (start == NULL)
534     {
535       printf ("(null)\n");
536       return;
537     }
538 
539   /* Loop over pattern commands.  */
540   while (p < pend)
541     {
542       switch ((re_opcode_t) *p++)
543 	{
544         case no_op:
545           printf ("/no_op");
546           break;
547 
548 	case exactn:
549 	  mcnt = *p++;
550           printf ("/exactn/%d", mcnt);
551           do
552 	    {
553               putchar ('/');
554 	      printchar (*p++);
555             }
556           while (--mcnt);
557           break;
558 
559 	case start_memory:
560           mcnt = *p++;
561           printf ("/start_memory/%d/%d", mcnt, *p++);
562           break;
563 
564 	case stop_memory:
565           mcnt = *p++;
566 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
567           break;
568 
569 	case duplicate:
570 	  printf ("/duplicate/%d", *p++);
571 	  break;
572 
573 	case anychar:
574 	  printf ("/anychar");
575 	  break;
576 
577 	case charset:
578         case charset_not:
579           {
580             register int c;
581 
582             printf ("/charset%s",
583 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
584 
585             assert (p + *p < pend);
586 
587             for (c = 0; c < *p; c++)
588               {
589                 unsigned bit;
590                 unsigned char map_byte = p[1 + c];
591 
592                 putchar ('/');
593 
594 		for (bit = 0; bit < BYTEWIDTH; bit++)
595                   if (map_byte & (1 << bit))
596                     printchar (c * BYTEWIDTH + bit);
597               }
598 	    p += 1 + *p;
599 	    break;
600 	  }
601 
602 	case begline:
603 	  printf ("/begline");
604           break;
605 
606 	case endline:
607           printf ("/endline");
608           break;
609 
610 	case on_failure_jump:
611           extract_number_and_incr (&mcnt, &p);
612   	  printf ("/on_failure_jump/0/%d", mcnt);
613           break;
614 
615 	case on_failure_keep_string_jump:
616           extract_number_and_incr (&mcnt, &p);
617   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
618           break;
619 
620 	case dummy_failure_jump:
621           extract_number_and_incr (&mcnt, &p);
622   	  printf ("/dummy_failure_jump/0/%d", mcnt);
623           break;
624 
625 	case push_dummy_failure:
626           printf ("/push_dummy_failure");
627           break;
628 
629         case maybe_pop_jump:
630           extract_number_and_incr (&mcnt, &p);
631   	  printf ("/maybe_pop_jump/0/%d", mcnt);
632 	  break;
633 
634         case pop_failure_jump:
635 	  extract_number_and_incr (&mcnt, &p);
636   	  printf ("/pop_failure_jump/0/%d", mcnt);
637 	  break;
638 
639         case jump_past_alt:
640 	  extract_number_and_incr (&mcnt, &p);
641   	  printf ("/jump_past_alt/0/%d", mcnt);
642 	  break;
643 
644         case jump:
645 	  extract_number_and_incr (&mcnt, &p);
646   	  printf ("/jump/0/%d", mcnt);
647 	  break;
648 
649         case succeed_n:
650           extract_number_and_incr (&mcnt, &p);
651           extract_number_and_incr (&mcnt2, &p);
652  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
653           break;
654 
655         case jump_n:
656           extract_number_and_incr (&mcnt, &p);
657           extract_number_and_incr (&mcnt2, &p);
658  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
659           break;
660 
661         case set_number_at:
662           extract_number_and_incr (&mcnt, &p);
663           extract_number_and_incr (&mcnt2, &p);
664  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
665           break;
666 
667         case wordbound:
668 	  printf ("/wordbound");
669 	  break;
670 
671 	case notwordbound:
672 	  printf ("/notwordbound");
673           break;
674 
675 	case wordbeg:
676 	  printf ("/wordbeg");
677 	  break;
678 
679 	case wordend:
680 	  printf ("/wordend");
681 
682 #ifdef emacs
683 	case before_dot:
684 	  printf ("/before_dot");
685           break;
686 
687 	case at_dot:
688 	  printf ("/at_dot");
689           break;
690 
691 	case after_dot:
692 	  printf ("/after_dot");
693           break;
694 
695 	case syntaxspec:
696           printf ("/syntaxspec");
697 	  mcnt = *p++;
698 	  printf ("/%d", mcnt);
699           break;
700 
701 	case notsyntaxspec:
702           printf ("/notsyntaxspec");
703 	  mcnt = *p++;
704 	  printf ("/%d", mcnt);
705 	  break;
706 #endif /* emacs */
707 
708 	case wordchar:
709 	  printf ("/wordchar");
710           break;
711 
712 	case notwordchar:
713 	  printf ("/notwordchar");
714           break;
715 
716 	case begbuf:
717 	  printf ("/begbuf");
718           break;
719 
720 	case endbuf:
721 	  printf ("/endbuf");
722           break;
723 
724         default:
725           printf ("?%d", *(p-1));
726 	}
727     }
728   printf ("/\n");
729 }
730 
731 
732 void
733 print_compiled_pattern (bufp)
734     struct re_pattern_buffer *bufp;
735 {
736   unsigned char *buffer = bufp->buffer;
737 
738   print_partial_compiled_pattern (buffer, buffer + bufp->used);
739   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
740 
741   if (bufp->fastmap_accurate && bufp->fastmap)
742     {
743       printf ("fastmap: ");
744       print_fastmap (bufp->fastmap);
745     }
746 
747   printf ("re_nsub: %d\t", bufp->re_nsub);
748   printf ("regs_alloc: %d\t", bufp->regs_allocated);
749   printf ("can_be_null: %d\t", bufp->can_be_null);
750   printf ("newline_anchor: %d\n", bufp->newline_anchor);
751   printf ("no_sub: %d\t", bufp->no_sub);
752   printf ("not_bol: %d\t", bufp->not_bol);
753   printf ("not_eol: %d\t", bufp->not_eol);
754   printf ("syntax: %d\n", bufp->syntax);
755   /* Perhaps we should print the translate table?  */
756 }
757 
758 
759 void
760 print_double_string (where, string1, size1, string2, size2)
761     const char *where;
762     const char *string1;
763     const char *string2;
764     int size1;
765     int size2;
766 {
767   unsigned this_char;
768 
769   if (where == NULL)
770     printf ("(null)");
771   else
772     {
773       if (FIRST_STRING_P (where))
774         {
775           for (this_char = where - string1; this_char < size1; this_char++)
776             printchar (string1[this_char]);
777 
778           where = string2;
779         }
780 
781       for (this_char = where - string2; this_char < size2; this_char++)
782         printchar (string2[this_char]);
783     }
784 }
785 
786 #else /* not DEBUG */
787 
788 #undef assert
789 #define assert(e)
790 
791 #define DEBUG_STATEMENT(e)
792 #define DEBUG_PRINT1(x)
793 #define DEBUG_PRINT2(x1, x2)
794 #define DEBUG_PRINT3(x1, x2, x3)
795 #define DEBUG_PRINT4(x1, x2, x3, x4)
796 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
797 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
798 
799 #endif /* not DEBUG */
800 
801 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
802    also be assigned to arbitrarily: each pattern buffer stores its own
803    syntax, so it can be changed between regex compilations.  */
804 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
805 
806 
807 /* Specify the precise syntax of regexps for compilation.  This provides
808    for compatibility for various utilities which historically have
809    different, incompatible syntaxes.
810 
811    The argument SYNTAX is a bit mask comprised of the various bits
812    defined in regex.h.  We return the old syntax.  */
813 
814 reg_syntax_t
815 re_set_syntax (syntax)
816     reg_syntax_t syntax;
817 {
818   reg_syntax_t ret = re_syntax_options;
819 
820   re_syntax_options = syntax;
821   return ret;
822 }
823 
824 /* This table gives an error message for each of the error codes listed
825    in regex.h.  Obviously the order here has to be same as there.  */
826 
827 static const char *re_error_msg[] =
828   { NULL,					/* REG_NOERROR */
829     "No match",					/* REG_NOMATCH */
830     "Invalid regular expression",		/* REG_BADPAT */
831     "Invalid collation character",		/* REG_ECOLLATE */
832     "Invalid character class name",		/* REG_ECTYPE */
833     "Trailing backslash",			/* REG_EESCAPE */
834     "Invalid back reference",			/* REG_ESUBREG */
835     "Unmatched [ or [^",			/* REG_EBRACK */
836     "Unmatched ( or \\(",			/* REG_EPAREN */
837     "Unmatched \\{",				/* REG_EBRACE */
838     "Invalid content of \\{\\}",		/* REG_BADBR */
839     "Invalid range end",			/* REG_ERANGE */
840     "Memory exhausted",				/* REG_ESPACE */
841     "Invalid preceding regular expression",	/* REG_BADRPT */
842     "Premature end of regular expression",	/* REG_EEND */
843     "Regular expression too big",		/* REG_ESIZE */
844     "Unmatched ) or \\)",			/* REG_ERPAREN */
845   };
846 
847 /* Subroutine declarations and macros for regex_compile.  */
848 
849 static void store_op1 (), store_op2 ();
850 static void insert_op1 (), insert_op2 ();
851 static boolean at_begline_loc_p (), at_endline_loc_p ();
852 static boolean group_in_compile_stack ();
853 static reg_errcode_t compile_range ();
854 
855 /* Fetch the next character in the uncompiled pattern---translating it
856    if necessary.  Also cast from a signed character in the constant
857    string passed to us by the user to an unsigned char that we can use
858    as an array index (in, e.g., `translate').  */
859 #define PATFETCH(c)							\
860   do {if (p == pend) return REG_EEND;					\
861     c = (unsigned char) *p++;						\
862     if (translate) c = translate[c]; 					\
863   } while (0)
864 
865 /* Fetch the next character in the uncompiled pattern, with no
866    translation.  */
867 #define PATFETCH_RAW(c)							\
868   do {if (p == pend) return REG_EEND;					\
869     c = (unsigned char) *p++; 						\
870   } while (0)
871 
872 /* Go backwards one character in the pattern.  */
873 #define PATUNFETCH p--
874 
875 
876 /* If `translate' is non-null, return translate[D], else just D.  We
877    cast the subscript to translate because some data is declared as
878    `char *', to avoid warnings when a string constant is passed.  But
879    when we use a character as a subscript we must make it unsigned.  */
880 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
881 
882 
883 /* Macros for outputting the compiled pattern into `buffer'.  */
884 
885 /* If the buffer isn't allocated when it comes in, use this.  */
886 #define INIT_BUF_SIZE  32
887 
888 /* Make sure we have at least N more bytes of space in buffer.  */
889 #define GET_BUFFER_SPACE(n)						\
890     while (b - bufp->buffer + (n) > bufp->allocated)			\
891       EXTEND_BUFFER ()
892 
893 /* Make sure we have one more byte of buffer space and then add C to it.  */
894 #define BUF_PUSH(c)							\
895   do {									\
896     GET_BUFFER_SPACE (1);						\
897     *b++ = (unsigned char) (c);						\
898   } while (0)
899 
900 
901 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
902 #define BUF_PUSH_2(c1, c2)						\
903   do {									\
904     GET_BUFFER_SPACE (2);						\
905     *b++ = (unsigned char) (c1);					\
906     *b++ = (unsigned char) (c2);					\
907   } while (0)
908 
909 
910 /* As with BUF_PUSH_2, except for three bytes.  */
911 #define BUF_PUSH_3(c1, c2, c3)						\
912   do {									\
913     GET_BUFFER_SPACE (3);						\
914     *b++ = (unsigned char) (c1);					\
915     *b++ = (unsigned char) (c2);					\
916     *b++ = (unsigned char) (c3);					\
917   } while (0)
918 
919 
920 /* Store a jump with opcode OP at LOC to location TO.  We store a
921    relative address offset by the three bytes the jump itself occupies.  */
922 #define STORE_JUMP(op, loc, to) \
923   store_op1 (op, loc, (to) - (loc) - 3)
924 
925 /* Likewise, for a two-argument jump.  */
926 #define STORE_JUMP2(op, loc, to, arg) \
927   store_op2 (op, loc, (to) - (loc) - 3, arg)
928 
929 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
930 #define INSERT_JUMP(op, loc, to) \
931   insert_op1 (op, loc, (to) - (loc) - 3, b)
932 
933 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
934 #define INSERT_JUMP2(op, loc, to, arg) \
935   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
936 
937 
938 /* This is not an arbitrary limit: the arguments which represent offsets
939    into the pattern are two bytes long.  So if 2^16 bytes turns out to
940    be too small, many things would have to change.  */
941 #define MAX_BUF_SIZE (1L << 16)
942 
943 
944 /* Extend the buffer by twice its current size via realloc and
945    reset the pointers that pointed into the old block to point to the
946    correct places in the new one.  If extending the buffer results in it
947    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
948 #define EXTEND_BUFFER()							\
949   do { 									\
950     unsigned char *old_buffer = bufp->buffer;				\
951     if (bufp->allocated == MAX_BUF_SIZE) 				\
952       return REG_ESIZE;							\
953     bufp->allocated <<= 1;						\
954     if (bufp->allocated > MAX_BUF_SIZE)					\
955       bufp->allocated = MAX_BUF_SIZE; 					\
956     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
957     if (bufp->buffer == NULL)						\
958       return REG_ESPACE;						\
959     /* If the buffer moved, move all the pointers into it.  */		\
960     if (old_buffer != bufp->buffer)					\
961       {									\
962         b = (b - old_buffer) + bufp->buffer;				\
963         begalt = (begalt - old_buffer) + bufp->buffer;			\
964         if (fixup_alt_jump)						\
965           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
966         if (laststart)							\
967           laststart = (laststart - old_buffer) + bufp->buffer;		\
968         if (pending_exact)						\
969           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
970       }									\
971   } while (0)
972 
973 
974 /* Since we have one byte reserved for the register number argument to
975    {start,stop}_memory, the maximum number of groups we can report
976    things about is what fits in that byte.  */
977 #define MAX_REGNUM 255
978 
979 /* But patterns can have more than `MAX_REGNUM' registers.  We just
980    ignore the excess.  */
981 typedef unsigned regnum_t;
982 
983 
984 /* Macros for the compile stack.  */
985 
986 /* Since offsets can go either forwards or backwards, this type needs to
987    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
988 typedef int pattern_offset_t;
989 
990 typedef struct
991 {
992   pattern_offset_t begalt_offset;
993   pattern_offset_t fixup_alt_jump;
994   pattern_offset_t inner_group_offset;
995   pattern_offset_t laststart_offset;
996   regnum_t regnum;
997 } compile_stack_elt_t;
998 
999 
1000 typedef struct
1001 {
1002   compile_stack_elt_t *stack;
1003   unsigned size;
1004   unsigned avail;			/* Offset of next open position.  */
1005 } compile_stack_type;
1006 
1007 
1008 #define INIT_COMPILE_STACK_SIZE 32
1009 
1010 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1011 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1012 
1013 /* The next available element.  */
1014 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1015 
1016 
1017 /* Set the bit for character C in a list.  */
1018 #define SET_LIST_BIT(c)                               \
1019   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1020    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1021 
1022 
1023 /* Get the next unsigned number in the uncompiled pattern.  */
1024 #define GET_UNSIGNED_NUMBER(num) 					\
1025   { if (p != pend)							\
1026      {									\
1027        PATFETCH (c); 							\
1028        while (ISDIGIT (c)) 						\
1029          { 								\
1030            if (num < 0)							\
1031               num = 0;							\
1032            num = num * 10 + c - '0'; 					\
1033            if (p == pend) 						\
1034               break; 							\
1035            PATFETCH (c);						\
1036          } 								\
1037        } 								\
1038     }
1039 
1040 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1041 
1042 #define IS_CHAR_CLASS(string)						\
1043    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1044     || STREQ (string, "lower") || STREQ (string, "digit")		\
1045     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1046     || STREQ (string, "space") || STREQ (string, "print")		\
1047     || STREQ (string, "punct") || STREQ (string, "graph")		\
1048     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1049 
1050 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1051    Returns one of error codes defined in `regex.h', or zero for success.
1052 
1053    Assumes the `allocated' (and perhaps `buffer') and `translate'
1054    fields are set in BUFP on entry.
1055 
1056    If it succeeds, results are put in BUFP (if it returns an error, the
1057    contents of BUFP are undefined):
1058      `buffer' is the compiled pattern;
1059      `syntax' is set to SYNTAX;
1060      `used' is set to the length of the compiled pattern;
1061      `fastmap_accurate' is zero;
1062      `re_nsub' is the number of subexpressions in PATTERN;
1063      `not_bol' and `not_eol' are zero;
1064 
1065    The `fastmap' and `newline_anchor' fields are neither
1066    examined nor set.  */
1067 
1068 static reg_errcode_t
1069 regex_compile (pattern, size, syntax, bufp)
1070      const char *pattern;
1071      int size;
1072      reg_syntax_t syntax;
1073      struct re_pattern_buffer *bufp;
1074 {
1075   /* We fetch characters from PATTERN here.  Even though PATTERN is
1076      `char *' (i.e., signed), we declare these variables as unsigned, so
1077      they can be reliably used as array indices.  */
1078   register unsigned char c, c1;
1079 
1080   /* A random temporary spot in PATTERN.  */
1081   const char *p1;
1082 
1083   /* Points to the end of the buffer, where we should append.  */
1084   register unsigned char *b;
1085 
1086   /* Keeps track of unclosed groups.  */
1087   compile_stack_type compile_stack;
1088 
1089   /* Points to the current (ending) position in the pattern.  */
1090   const char *p = pattern;
1091   const char *pend = pattern + size;
1092 
1093   /* How to translate the characters in the pattern.  */
1094   char *translate = bufp->translate;
1095 
1096   /* Address of the count-byte of the most recently inserted `exactn'
1097      command.  This makes it possible to tell if a new exact-match
1098      character can be added to that command or if the character requires
1099      a new `exactn' command.  */
1100   unsigned char *pending_exact = 0;
1101 
1102   /* Address of start of the most recently finished expression.
1103      This tells, e.g., postfix * where to find the start of its
1104      operand.  Reset at the beginning of groups and alternatives.  */
1105   unsigned char *laststart = 0;
1106 
1107   /* Address of beginning of regexp, or inside of last group.  */
1108   unsigned char *begalt;
1109 
1110   /* Place in the uncompiled pattern (i.e., the {) to
1111      which to go back if the interval is invalid.  */
1112   const char *beg_interval;
1113 
1114   /* Address of the place where a forward jump should go to the end of
1115      the containing expression.  Each alternative of an `or' -- except the
1116      last -- ends with a forward jump of this sort.  */
1117   unsigned char *fixup_alt_jump = 0;
1118 
1119   /* Counts open-groups as they are encountered.  Remembered for the
1120      matching close-group on the compile stack, so the same register
1121      number is put in the stop_memory as the start_memory.  */
1122   regnum_t regnum = 0;
1123 
1124 #ifdef DEBUG
1125   DEBUG_PRINT1 ("\nCompiling pattern: ");
1126   if (debug)
1127     {
1128       unsigned debug_count;
1129 
1130       for (debug_count = 0; debug_count < size; debug_count++)
1131         printchar (pattern[debug_count]);
1132       putchar ('\n');
1133     }
1134 #endif /* DEBUG */
1135 
1136   /* Initialize the compile stack.  */
1137   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1138   if (compile_stack.stack == NULL)
1139     return REG_ESPACE;
1140 
1141   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1142   compile_stack.avail = 0;
1143 
1144   /* Initialize the pattern buffer.  */
1145   bufp->syntax = syntax;
1146   bufp->fastmap_accurate = 0;
1147   bufp->not_bol = bufp->not_eol = 0;
1148 
1149   /* Set `used' to zero, so that if we return an error, the pattern
1150      printer (for debugging) will think there's no pattern.  We reset it
1151      at the end.  */
1152   bufp->used = 0;
1153 
1154   /* Always count groups, whether or not bufp->no_sub is set.  */
1155   bufp->re_nsub = 0;
1156 
1157 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1158   /* Initialize the syntax table.  */
1159    init_syntax_once ();
1160 #endif
1161 
1162   if (bufp->allocated == 0)
1163     {
1164       if (bufp->buffer)
1165 	{ /* If zero allocated, but buffer is non-null, try to realloc
1166              enough space.  This loses if buffer's address is bogus, but
1167              that is the user's responsibility.  */
1168           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1169         }
1170       else
1171         { /* Caller did not allocate a buffer.  Do it for them.  */
1172           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1173         }
1174       if (!bufp->buffer) return REG_ESPACE;
1175 
1176       bufp->allocated = INIT_BUF_SIZE;
1177     }
1178 
1179   begalt = b = bufp->buffer;
1180 
1181   /* Loop through the uncompiled pattern until we're at the end.  */
1182   while (p != pend)
1183     {
1184       PATFETCH (c);
1185 
1186       switch (c)
1187         {
1188         case '^':
1189           {
1190             if (   /* If at start of pattern, it's an operator.  */
1191                    p == pattern + 1
1192                    /* If context independent, it's an operator.  */
1193                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1194                    /* Otherwise, depends on what's come before.  */
1195                 || at_begline_loc_p (pattern, p, syntax))
1196               BUF_PUSH (begline);
1197             else
1198               goto normal_char;
1199           }
1200           break;
1201 
1202 
1203         case '$':
1204           {
1205             if (   /* If at end of pattern, it's an operator.  */
1206                    p == pend
1207                    /* If context independent, it's an operator.  */
1208                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1209                    /* Otherwise, depends on what's next.  */
1210                 || at_endline_loc_p (p, pend, syntax))
1211                BUF_PUSH (endline);
1212              else
1213                goto normal_char;
1214            }
1215            break;
1216 
1217 
1218 	case '+':
1219         case '?':
1220           if ((syntax & RE_BK_PLUS_QM)
1221               || (syntax & RE_LIMITED_OPS))
1222             goto normal_char;
1223         handle_plus:
1224         case '*':
1225           /* If there is no previous pattern... */
1226           if (!laststart)
1227             {
1228               if (syntax & RE_CONTEXT_INVALID_OPS)
1229                 return REG_BADRPT;
1230               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1231                 goto normal_char;
1232             }
1233 
1234           {
1235             /* Are we optimizing this jump?  */
1236             boolean keep_string_p = false;
1237 
1238             /* 1 means zero (many) matches is allowed.  */
1239             char zero_times_ok = 0, many_times_ok = 0;
1240 
1241             /* If there is a sequence of repetition chars, collapse it
1242                down to just one (the right one).  We can't combine
1243                interval operators with these because of, e.g., `a{2}*',
1244                which should only match an even number of `a's.  */
1245 
1246             for (;;)
1247               {
1248                 zero_times_ok |= c != '+';
1249                 many_times_ok |= c != '?';
1250 
1251                 if (p == pend)
1252                   break;
1253 
1254                 PATFETCH (c);
1255 
1256                 if (c == '*'
1257                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1258                   ;
1259 
1260                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1261                   {
1262                     if (p == pend) return REG_EESCAPE;
1263 
1264                     PATFETCH (c1);
1265                     if (!(c1 == '+' || c1 == '?'))
1266                       {
1267                         PATUNFETCH;
1268                         PATUNFETCH;
1269                         break;
1270                       }
1271 
1272                     c = c1;
1273                   }
1274                 else
1275                   {
1276                     PATUNFETCH;
1277                     break;
1278                   }
1279 
1280                 /* If we get here, we found another repeat character.  */
1281                }
1282 
1283             /* Star, etc. applied to an empty pattern is equivalent
1284                to an empty pattern.  */
1285             if (!laststart)
1286               break;
1287 
1288             /* Now we know whether or not zero matches is allowed
1289                and also whether or not two or more matches is allowed.  */
1290             if (many_times_ok)
1291               { /* More than one repetition is allowed, so put in at the
1292                    end a backward relative jump from `b' to before the next
1293                    jump we're going to put in below (which jumps from
1294                    laststart to after this jump).
1295 
1296                    But if we are at the `*' in the exact sequence `.*\n',
1297                    insert an unconditional jump backwards to the .,
1298                    instead of the beginning of the loop.  This way we only
1299                    push a failure point once, instead of every time
1300                    through the loop.  */
1301                 assert (p - 1 > pattern);
1302 
1303                 /* Allocate the space for the jump.  */
1304                 GET_BUFFER_SPACE (3);
1305 
1306                 /* We know we are not at the first character of the pattern,
1307                    because laststart was nonzero.  And we've already
1308                    incremented `p', by the way, to be the character after
1309                    the `*'.  Do we have to do something analogous here
1310                    for null bytes, because of RE_DOT_NOT_NULL?  */
1311                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1312 		    && zero_times_ok
1313                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1314                     && !(syntax & RE_DOT_NEWLINE))
1315                   { /* We have .*\n.  */
1316                     STORE_JUMP (jump, b, laststart);
1317                     keep_string_p = true;
1318                   }
1319                 else
1320                   /* Anything else.  */
1321                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1322 
1323                 /* We've added more stuff to the buffer.  */
1324                 b += 3;
1325               }
1326 
1327             /* On failure, jump from laststart to b + 3, which will be the
1328                end of the buffer after this jump is inserted.  */
1329             GET_BUFFER_SPACE (3);
1330             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1331                                        : on_failure_jump,
1332                          laststart, b + 3);
1333             pending_exact = 0;
1334             b += 3;
1335 
1336             if (!zero_times_ok)
1337               {
1338                 /* At least one repetition is required, so insert a
1339                    `dummy_failure_jump' before the initial
1340                    `on_failure_jump' instruction of the loop. This
1341                    effects a skip over that instruction the first time
1342                    we hit that loop.  */
1343                 GET_BUFFER_SPACE (3);
1344                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1345                 b += 3;
1346               }
1347             }
1348 	  break;
1349 
1350 
1351 	case '.':
1352           laststart = b;
1353           BUF_PUSH (anychar);
1354           break;
1355 
1356 
1357         case '[':
1358           {
1359             boolean had_char_class = false;
1360 
1361             if (p == pend) return REG_EBRACK;
1362 
1363             /* Ensure that we have enough space to push a charset: the
1364                opcode, the length count, and the bitset; 34 bytes in all.  */
1365 	    GET_BUFFER_SPACE (34);
1366 
1367             laststart = b;
1368 
1369             /* We test `*p == '^' twice, instead of using an if
1370                statement, so we only need one BUF_PUSH.  */
1371             BUF_PUSH (*p == '^' ? charset_not : charset);
1372             if (*p == '^')
1373               p++;
1374 
1375             /* Remember the first position in the bracket expression.  */
1376             p1 = p;
1377 
1378             /* Push the number of bytes in the bitmap.  */
1379             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1380 
1381             /* Clear the whole map.  */
1382             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1383 
1384             /* charset_not matches newline according to a syntax bit.  */
1385             if ((re_opcode_t) b[-2] == charset_not
1386                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1387               SET_LIST_BIT ('\n');
1388 
1389             /* Read in characters and ranges, setting map bits.  */
1390             for (;;)
1391               {
1392                 if (p == pend) return REG_EBRACK;
1393 
1394                 PATFETCH (c);
1395 
1396                 /* \ might escape characters inside [...] and [^...].  */
1397                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1398                   {
1399                     if (p == pend) return REG_EESCAPE;
1400 
1401                     PATFETCH (c1);
1402                     SET_LIST_BIT (c1);
1403                     continue;
1404                   }
1405 
1406                 /* Could be the end of the bracket expression.  If it's
1407                    not (i.e., when the bracket expression is `[]' so
1408                    far), the ']' character bit gets set way below.  */
1409                 if (c == ']' && p != p1 + 1)
1410                   break;
1411 
1412                 /* Look ahead to see if it's a range when the last thing
1413                    was a character class.  */
1414                 if (had_char_class && c == '-' && *p != ']')
1415                   return REG_ERANGE;
1416 
1417                 /* Look ahead to see if it's a range when the last thing
1418                    was a character: if this is a hyphen not at the
1419                    beginning or the end of a list, then it's the range
1420                    operator.  */
1421                 if (c == '-'
1422                     && !(p - 2 >= pattern && p[-2] == '[')
1423                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1424                     && *p != ']')
1425                   {
1426                     reg_errcode_t ret
1427                       = compile_range (&p, pend, translate, syntax, b);
1428                     if (ret != REG_NOERROR) return ret;
1429                   }
1430 
1431                 else if (p[0] == '-' && p[1] != ']')
1432                   { /* This handles ranges made up of characters only.  */
1433                     reg_errcode_t ret;
1434 
1435 		    /* Move past the `-'.  */
1436                     PATFETCH (c1);
1437 
1438                     ret = compile_range (&p, pend, translate, syntax, b);
1439                     if (ret != REG_NOERROR) return ret;
1440                   }
1441 
1442                 /* See if we're at the beginning of a possible character
1443                    class.  */
1444 
1445                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1446                   { /* Leave room for the null.  */
1447                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1448 
1449                     PATFETCH (c);
1450                     c1 = 0;
1451 
1452                     /* If pattern is `[[:'.  */
1453                     if (p == pend) return REG_EBRACK;
1454 
1455                     for (;;)
1456                       {
1457                         PATFETCH (c);
1458                         if (c == ':' || c == ']' || p == pend
1459                             || c1 == CHAR_CLASS_MAX_LENGTH)
1460                           break;
1461                         str[c1++] = c;
1462                       }
1463                     str[c1] = '\0';
1464 
1465                     /* If isn't a word bracketed by `[:' and:`]':
1466                        undo the ending character, the letters, and leave
1467                        the leading `:' and `[' (but set bits for them).  */
1468                     if (c == ':' && *p == ']')
1469                       {
1470                         int ch;
1471                         boolean is_alnum = STREQ (str, "alnum");
1472                         boolean is_alpha = STREQ (str, "alpha");
1473                         boolean is_blank = STREQ (str, "blank");
1474                         boolean is_cntrl = STREQ (str, "cntrl");
1475                         boolean is_digit = STREQ (str, "digit");
1476                         boolean is_graph = STREQ (str, "graph");
1477                         boolean is_lower = STREQ (str, "lower");
1478                         boolean is_print = STREQ (str, "print");
1479                         boolean is_punct = STREQ (str, "punct");
1480                         boolean is_space = STREQ (str, "space");
1481                         boolean is_upper = STREQ (str, "upper");
1482                         boolean is_xdigit = STREQ (str, "xdigit");
1483 
1484                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1485 
1486                         /* Throw away the ] at the end of the character
1487                            class.  */
1488                         PATFETCH (c);
1489 
1490                         if (p == pend) return REG_EBRACK;
1491 
1492                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1493                           {
1494                             if (   (is_alnum  && ISALNUM (ch))
1495                                 || (is_alpha  && ISALPHA (ch))
1496                                 || (is_blank  && ISBLANK (ch))
1497                                 || (is_cntrl  && ISCNTRL (ch))
1498                                 || (is_digit  && ISDIGIT (ch))
1499                                 || (is_graph  && ISGRAPH (ch))
1500                                 || (is_lower  && ISLOWER (ch))
1501                                 || (is_print  && ISPRINT (ch))
1502                                 || (is_punct  && ISPUNCT (ch))
1503                                 || (is_space  && ISSPACE (ch))
1504                                 || (is_upper  && ISUPPER (ch))
1505                                 || (is_xdigit && ISXDIGIT (ch)))
1506                             SET_LIST_BIT (ch);
1507                           }
1508                         had_char_class = true;
1509                       }
1510                     else
1511                       {
1512                         c1++;
1513                         while (c1--)
1514                           PATUNFETCH;
1515                         SET_LIST_BIT ('[');
1516                         SET_LIST_BIT (':');
1517                         had_char_class = false;
1518                       }
1519                   }
1520                 else
1521                   {
1522                     had_char_class = false;
1523                     SET_LIST_BIT (c);
1524                   }
1525               }
1526 
1527             /* Discard any (non)matching list bytes that are all 0 at the
1528                end of the map.  Decrease the map-length byte too.  */
1529             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1530               b[-1]--;
1531             b += b[-1];
1532           }
1533           break;
1534 
1535 
1536 	case '(':
1537           if (syntax & RE_NO_BK_PARENS)
1538             goto handle_open;
1539           else
1540             goto normal_char;
1541 
1542 
1543         case ')':
1544           if (syntax & RE_NO_BK_PARENS)
1545             goto handle_close;
1546           else
1547             goto normal_char;
1548 
1549 
1550         case '\n':
1551           if (syntax & RE_NEWLINE_ALT)
1552             goto handle_alt;
1553           else
1554             goto normal_char;
1555 
1556 
1557 	case '|':
1558           if (syntax & RE_NO_BK_VBAR)
1559             goto handle_alt;
1560           else
1561             goto normal_char;
1562 
1563 
1564         case '{':
1565            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1566              goto handle_interval;
1567            else
1568              goto normal_char;
1569 
1570 
1571         case '\\':
1572           if (p == pend) return REG_EESCAPE;
1573 
1574           /* Do not translate the character after the \, so that we can
1575              distinguish, e.g., \B from \b, even if we normally would
1576              translate, e.g., B to b.  */
1577           PATFETCH_RAW (c);
1578 
1579           switch (c)
1580             {
1581             case '(':
1582               if (syntax & RE_NO_BK_PARENS)
1583                 goto normal_backslash;
1584 
1585             handle_open:
1586               bufp->re_nsub++;
1587               regnum++;
1588 
1589               if (COMPILE_STACK_FULL)
1590                 {
1591                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1592                             compile_stack_elt_t);
1593                   if (compile_stack.stack == NULL) return REG_ESPACE;
1594 
1595                   compile_stack.size <<= 1;
1596                 }
1597 
1598               /* These are the values to restore when we hit end of this
1599                  group.  They are all relative offsets, so that if the
1600                  whole pattern moves because of realloc, they will still
1601                  be valid.  */
1602               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1603               COMPILE_STACK_TOP.fixup_alt_jump
1604                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1605               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1606               COMPILE_STACK_TOP.regnum = regnum;
1607 
1608               /* We will eventually replace the 0 with the number of
1609                  groups inner to this one.  But do not push a
1610                  start_memory for groups beyond the last one we can
1611                  represent in the compiled pattern.  */
1612               if (regnum <= MAX_REGNUM)
1613                 {
1614                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1615                   BUF_PUSH_3 (start_memory, regnum, 0);
1616                 }
1617 
1618               compile_stack.avail++;
1619 
1620               fixup_alt_jump = 0;
1621               laststart = 0;
1622               begalt = b;
1623 	      /* If we've reached MAX_REGNUM groups, then this open
1624 		 won't actually generate any code, so we'll have to
1625 		 clear pending_exact explicitly.  */
1626 	      pending_exact = 0;
1627               break;
1628 
1629 
1630             case ')':
1631               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1632 
1633               if (COMPILE_STACK_EMPTY)
1634                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1635                   goto normal_backslash;
1636                 else
1637                   return REG_ERPAREN;
1638 
1639             handle_close:
1640               if (fixup_alt_jump)
1641                 { /* Push a dummy failure point at the end of the
1642                      alternative for a possible future
1643                      `pop_failure_jump' to pop.  See comments at
1644                      `push_dummy_failure' in `re_match_2'.  */
1645                   BUF_PUSH (push_dummy_failure);
1646 
1647                   /* We allocated space for this jump when we assigned
1648                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1649                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1650                 }
1651 
1652               /* See similar code for backslashed left paren above.  */
1653               if (COMPILE_STACK_EMPTY)
1654                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1655                   goto normal_char;
1656                 else
1657                   return REG_ERPAREN;
1658 
1659               /* Since we just checked for an empty stack above, this
1660                  ``can't happen''.  */
1661               assert (compile_stack.avail != 0);
1662               {
1663                 /* We don't just want to restore into `regnum', because
1664                    later groups should continue to be numbered higher,
1665                    as in `(ab)c(de)' -- the second group is #2.  */
1666                 regnum_t this_group_regnum;
1667 
1668                 compile_stack.avail--;
1669                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1670                 fixup_alt_jump
1671                   = COMPILE_STACK_TOP.fixup_alt_jump
1672                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1673                     : 0;
1674                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1675                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1676 		/* If we've reached MAX_REGNUM groups, then this open
1677 		   won't actually generate any code, so we'll have to
1678 		   clear pending_exact explicitly.  */
1679 		pending_exact = 0;
1680 
1681                 /* We're at the end of the group, so now we know how many
1682                    groups were inside this one.  */
1683                 if (this_group_regnum <= MAX_REGNUM)
1684                   {
1685                     unsigned char *inner_group_loc
1686                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1687 
1688                     *inner_group_loc = regnum - this_group_regnum;
1689                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1690                                 regnum - this_group_regnum);
1691                   }
1692               }
1693               break;
1694 
1695 
1696             case '|':					/* `\|'.  */
1697               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1698                 goto normal_backslash;
1699             handle_alt:
1700               if (syntax & RE_LIMITED_OPS)
1701                 goto normal_char;
1702 
1703               /* Insert before the previous alternative a jump which
1704                  jumps to this alternative if the former fails.  */
1705               GET_BUFFER_SPACE (3);
1706               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1707               pending_exact = 0;
1708               b += 3;
1709 
1710               /* The alternative before this one has a jump after it
1711                  which gets executed if it gets matched.  Adjust that
1712                  jump so it will jump to this alternative's analogous
1713                  jump (put in below, which in turn will jump to the next
1714                  (if any) alternative's such jump, etc.).  The last such
1715                  jump jumps to the correct final destination.  A picture:
1716                           _____ _____
1717                           |   | |   |
1718                           |   v |   v
1719                          a | b   | c
1720 
1721                  If we are at `b', then fixup_alt_jump right now points to a
1722                  three-byte space after `a'.  We'll put in the jump, set
1723                  fixup_alt_jump to right after `b', and leave behind three
1724                  bytes which we'll fill in when we get to after `c'.  */
1725 
1726               if (fixup_alt_jump)
1727                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1728 
1729               /* Mark and leave space for a jump after this alternative,
1730                  to be filled in later either by next alternative or
1731                  when know we're at the end of a series of alternatives.  */
1732               fixup_alt_jump = b;
1733               GET_BUFFER_SPACE (3);
1734               b += 3;
1735 
1736               laststart = 0;
1737               begalt = b;
1738               break;
1739 
1740 
1741             case '{':
1742               /* If \{ is a literal.  */
1743               if (!(syntax & RE_INTERVALS)
1744                      /* If we're at `\{' and it's not the open-interval
1745                         operator.  */
1746                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1747                   || (p - 2 == pattern  &&  p == pend))
1748                 goto normal_backslash;
1749 
1750             handle_interval:
1751               {
1752                 /* If got here, then the syntax allows intervals.  */
1753 
1754                 /* At least (most) this many matches must be made.  */
1755                 int lower_bound = -1, upper_bound = -1;
1756 
1757                 beg_interval = p - 1;
1758 
1759                 if (p == pend)
1760                   {
1761                     if (syntax & RE_NO_BK_BRACES)
1762                       goto unfetch_interval;
1763                     else
1764                       return REG_EBRACE;
1765                   }
1766 
1767                 GET_UNSIGNED_NUMBER (lower_bound);
1768 
1769                 if (c == ',')
1770                   {
1771                     GET_UNSIGNED_NUMBER (upper_bound);
1772                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1773                   }
1774                 else
1775                   /* Interval such as `{1}' => match exactly once. */
1776                   upper_bound = lower_bound;
1777 
1778                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1779                     || lower_bound > upper_bound)
1780                   {
1781                     if (syntax & RE_NO_BK_BRACES)
1782                       goto unfetch_interval;
1783                     else
1784                       return REG_BADBR;
1785                   }
1786 
1787                 if (!(syntax & RE_NO_BK_BRACES))
1788                   {
1789                     if (c != '\\') return REG_EBRACE;
1790 
1791                     PATFETCH (c);
1792                   }
1793 
1794                 if (c != '}')
1795                   {
1796                     if (syntax & RE_NO_BK_BRACES)
1797                       goto unfetch_interval;
1798                     else
1799                       return REG_BADBR;
1800                   }
1801 
1802                 /* We just parsed a valid interval.  */
1803 
1804                 /* If it's invalid to have no preceding re.  */
1805                 if (!laststart)
1806                   {
1807                     if (syntax & RE_CONTEXT_INVALID_OPS)
1808                       return REG_BADRPT;
1809                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1810                       laststart = b;
1811                     else
1812                       goto unfetch_interval;
1813                   }
1814 
1815                 /* If the upper bound is zero, don't want to succeed at
1816                    all; jump from `laststart' to `b + 3', which will be
1817                    the end of the buffer after we insert the jump.  */
1818                  if (upper_bound == 0)
1819                    {
1820                      GET_BUFFER_SPACE (3);
1821                      INSERT_JUMP (jump, laststart, b + 3);
1822                      b += 3;
1823                    }
1824 
1825                  /* Otherwise, we have a nontrivial interval.  When
1826                     we're all done, the pattern will look like:
1827                       set_number_at <jump count> <upper bound>
1828                       set_number_at <succeed_n count> <lower bound>
1829                       succeed_n <after jump addr> <succeed_n count>
1830                       <body of loop>
1831                       jump_n <succeed_n addr> <jump count>
1832                     (The upper bound and `jump_n' are omitted if
1833                     `upper_bound' is 1, though.)  */
1834                  else
1835                    { /* If the upper bound is > 1, we need to insert
1836                         more at the end of the loop.  */
1837                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1838 
1839                      GET_BUFFER_SPACE (nbytes);
1840 
1841                      /* Initialize lower bound of the `succeed_n', even
1842                         though it will be set during matching by its
1843                         attendant `set_number_at' (inserted next),
1844                         because `re_compile_fastmap' needs to know.
1845                         Jump to the `jump_n' we might insert below.  */
1846                      INSERT_JUMP2 (succeed_n, laststart,
1847                                    b + 5 + (upper_bound > 1) * 5,
1848                                    lower_bound);
1849                      b += 5;
1850 
1851                      /* Code to initialize the lower bound.  Insert
1852                         before the `succeed_n'.  The `5' is the last two
1853                         bytes of this `set_number_at', plus 3 bytes of
1854                         the following `succeed_n'.  */
1855                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1856                      b += 5;
1857 
1858                      if (upper_bound > 1)
1859                        { /* More than one repetition is allowed, so
1860                             append a backward jump to the `succeed_n'
1861                             that starts this interval.
1862 
1863                             When we've reached this during matching,
1864                             we'll have matched the interval once, so
1865                             jump back only `upper_bound - 1' times.  */
1866                          STORE_JUMP2 (jump_n, b, laststart + 5,
1867                                       upper_bound - 1);
1868                          b += 5;
1869 
1870                          /* The location we want to set is the second
1871                             parameter of the `jump_n'; that is `b-2' as
1872                             an absolute address.  `laststart' will be
1873                             the `set_number_at' we're about to insert;
1874                             `laststart+3' the number to set, the source
1875                             for the relative address.  But we are
1876                             inserting into the middle of the pattern --
1877                             so everything is getting moved up by 5.
1878                             Conclusion: (b - 2) - (laststart + 3) + 5,
1879                             i.e., b - laststart.
1880 
1881                             We insert this at the beginning of the loop
1882                             so that if we fail during matching, we'll
1883                             reinitialize the bounds.  */
1884                          insert_op2 (set_number_at, laststart, b - laststart,
1885                                      upper_bound - 1, b);
1886                          b += 5;
1887                        }
1888                    }
1889                 pending_exact = 0;
1890                 beg_interval = NULL;
1891               }
1892               break;
1893 
1894             unfetch_interval:
1895               /* If an invalid interval, match the characters as literals.  */
1896                assert (beg_interval);
1897                p = beg_interval;
1898                beg_interval = NULL;
1899 
1900                /* normal_char and normal_backslash need `c'.  */
1901                PATFETCH (c);
1902 
1903                if (!(syntax & RE_NO_BK_BRACES))
1904                  {
1905                    if (p > pattern  &&  p[-1] == '\\')
1906                      goto normal_backslash;
1907                  }
1908                goto normal_char;
1909 
1910 #ifdef emacs
1911             /* There is no way to specify the before_dot and after_dot
1912                operators.  rms says this is ok.  --karl  */
1913             case '=':
1914               BUF_PUSH (at_dot);
1915               break;
1916 
1917             case 's':
1918               laststart = b;
1919               PATFETCH (c);
1920               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1921               break;
1922 
1923             case 'S':
1924               laststart = b;
1925               PATFETCH (c);
1926               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1927               break;
1928 #endif /* emacs */
1929 
1930 
1931             case 'w':
1932               laststart = b;
1933               BUF_PUSH (wordchar);
1934               break;
1935 
1936 
1937             case 'W':
1938               laststart = b;
1939               BUF_PUSH (notwordchar);
1940               break;
1941 
1942 
1943             case '<':
1944               BUF_PUSH (wordbeg);
1945               break;
1946 
1947             case '>':
1948               BUF_PUSH (wordend);
1949               break;
1950 
1951             case 'b':
1952               BUF_PUSH (wordbound);
1953               break;
1954 
1955             case 'B':
1956               BUF_PUSH (notwordbound);
1957               break;
1958 
1959             case '`':
1960               BUF_PUSH (begbuf);
1961               break;
1962 
1963             case '\'':
1964               BUF_PUSH (endbuf);
1965               break;
1966 
1967             case '1': case '2': case '3': case '4': case '5':
1968             case '6': case '7': case '8': case '9':
1969               if (syntax & RE_NO_BK_REFS)
1970                 goto normal_char;
1971 
1972               c1 = c - '0';
1973 
1974               if (c1 > regnum)
1975                 return REG_ESUBREG;
1976 
1977               /* Can't back reference to a subexpression if inside of it.  */
1978               if (group_in_compile_stack (compile_stack, c1))
1979                 goto normal_char;
1980 
1981               laststart = b;
1982               BUF_PUSH_2 (duplicate, c1);
1983               break;
1984 
1985 
1986             case '+':
1987             case '?':
1988               if (syntax & RE_BK_PLUS_QM)
1989                 goto handle_plus;
1990               else
1991                 goto normal_backslash;
1992 
1993             default:
1994             normal_backslash:
1995               /* You might think it would be useful for \ to mean
1996                  not to translate; but if we don't translate it
1997                  it will never match anything.  */
1998               c = TRANSLATE (c);
1999               goto normal_char;
2000             }
2001           break;
2002 
2003 
2004 	default:
2005         /* Expects the character in `c'.  */
2006 	normal_char:
2007 	      /* If no exactn currently being built.  */
2008           if (!pending_exact
2009 
2010               /* If last exactn not at current position.  */
2011               || pending_exact + *pending_exact + 1 != b
2012 
2013               /* We have only one byte following the exactn for the count.  */
2014 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2015 
2016               /* If followed by a repetition operator.  */
2017               || *p == '*' || *p == '^'
2018 	      || ((syntax & RE_BK_PLUS_QM)
2019 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2020 		  : (*p == '+' || *p == '?'))
2021 	      || ((syntax & RE_INTERVALS)
2022                   && ((syntax & RE_NO_BK_BRACES)
2023 		      ? *p == '{'
2024                       : (p[0] == '\\' && p[1] == '{'))))
2025 	    {
2026 	      /* Start building a new exactn.  */
2027 
2028               laststart = b;
2029 
2030 	      BUF_PUSH_2 (exactn, 0);
2031 	      pending_exact = b - 1;
2032             }
2033 
2034 	  BUF_PUSH (c);
2035           (*pending_exact)++;
2036 	  break;
2037         } /* switch (c) */
2038     } /* while p != pend */
2039 
2040 
2041   /* Through the pattern now.  */
2042 
2043   if (fixup_alt_jump)
2044     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2045 
2046   if (!COMPILE_STACK_EMPTY)
2047     return REG_EPAREN;
2048 
2049   free (compile_stack.stack);
2050 
2051   /* We have succeeded; set the length of the buffer.  */
2052   bufp->used = b - bufp->buffer;
2053 
2054 #ifdef DEBUG
2055   if (debug)
2056     {
2057       DEBUG_PRINT1 ("\nCompiled pattern: ");
2058       print_compiled_pattern (bufp);
2059     }
2060 #endif /* DEBUG */
2061 
2062   return REG_NOERROR;
2063 } /* regex_compile */
2064 
2065 /* Subroutines for `regex_compile'.  */
2066 
2067 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2068 
2069 static void
2070 store_op1 (op, loc, arg)
2071     re_opcode_t op;
2072     unsigned char *loc;
2073     int arg;
2074 {
2075   *loc = (unsigned char) op;
2076   STORE_NUMBER (loc + 1, arg);
2077 }
2078 
2079 
2080 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2081 
2082 static void
2083 store_op2 (op, loc, arg1, arg2)
2084     re_opcode_t op;
2085     unsigned char *loc;
2086     int arg1, arg2;
2087 {
2088   *loc = (unsigned char) op;
2089   STORE_NUMBER (loc + 1, arg1);
2090   STORE_NUMBER (loc + 3, arg2);
2091 }
2092 
2093 
2094 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2095    for OP followed by two-byte integer parameter ARG.  */
2096 
2097 static void
2098 insert_op1 (op, loc, arg, end)
2099     re_opcode_t op;
2100     unsigned char *loc;
2101     int arg;
2102     unsigned char *end;
2103 {
2104   register unsigned char *pfrom = end;
2105   register unsigned char *pto = end + 3;
2106 
2107   while (pfrom != loc)
2108     *--pto = *--pfrom;
2109 
2110   store_op1 (op, loc, arg);
2111 }
2112 
2113 
2114 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2115 
2116 static void
2117 insert_op2 (op, loc, arg1, arg2, end)
2118     re_opcode_t op;
2119     unsigned char *loc;
2120     int arg1, arg2;
2121     unsigned char *end;
2122 {
2123   register unsigned char *pfrom = end;
2124   register unsigned char *pto = end + 5;
2125 
2126   while (pfrom != loc)
2127     *--pto = *--pfrom;
2128 
2129   store_op2 (op, loc, arg1, arg2);
2130 }
2131 
2132 
2133 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2134    after an alternative or a begin-subexpression.  We assume there is at
2135    least one character before the ^.  */
2136 
2137 static boolean
2138 at_begline_loc_p (pattern, p, syntax)
2139     const char *pattern, *p;
2140     reg_syntax_t syntax;
2141 {
2142   const char *prev = p - 2;
2143   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2144 
2145   return
2146        /* After a subexpression?  */
2147        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2148        /* After an alternative?  */
2149     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2150 }
2151 
2152 
2153 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2154    at least one character after the $, i.e., `P < PEND'.  */
2155 
2156 static boolean
2157 at_endline_loc_p (p, pend, syntax)
2158     const char *p, *pend;
2159     int syntax;
2160 {
2161   const char *next = p;
2162   boolean next_backslash = *next == '\\';
2163   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2164 
2165   return
2166        /* Before a subexpression?  */
2167        (syntax & RE_NO_BK_PARENS ? *next == ')'
2168         : next_backslash && next_next && *next_next == ')')
2169        /* Before an alternative?  */
2170     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2171         : next_backslash && next_next && *next_next == '|');
2172 }
2173 
2174 
2175 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2176    false if it's not.  */
2177 
2178 static boolean
2179 group_in_compile_stack (compile_stack, regnum)
2180     compile_stack_type compile_stack;
2181     regnum_t regnum;
2182 {
2183   int this_element;
2184 
2185   for (this_element = compile_stack.avail - 1;
2186        this_element >= 0;
2187        this_element--)
2188     if (compile_stack.stack[this_element].regnum == regnum)
2189       return true;
2190 
2191   return false;
2192 }
2193 
2194 
2195 /* Read the ending character of a range (in a bracket expression) from the
2196    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2197    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2198    Then we set the translation of all bits between the starting and
2199    ending characters (inclusive) in the compiled pattern B.
2200 
2201    Return an error code.
2202 
2203    We use these short variable names so we can use the same macros as
2204    `regex_compile' itself.  */
2205 
2206 static reg_errcode_t
2207 compile_range (p_ptr, pend, translate, syntax, b)
2208     const char **p_ptr, *pend;
2209     char *translate;
2210     reg_syntax_t syntax;
2211     unsigned char *b;
2212 {
2213   unsigned this_char;
2214 
2215   const char *p = *p_ptr;
2216   int range_start, range_end;
2217 
2218   if (p == pend)
2219     return REG_ERANGE;
2220 
2221   /* Even though the pattern is a signed `char *', we need to fetch
2222      with unsigned char *'s; if the high bit of the pattern character
2223      is set, the range endpoints will be negative if we fetch using a
2224      signed char *.
2225 
2226      We also want to fetch the endpoints without translating them; the
2227      appropriate translation is done in the bit-setting loop below.  */
2228   range_start = ((unsigned char *) p)[-2];
2229   range_end   = ((unsigned char *) p)[0];
2230 
2231   /* Have to increment the pointer into the pattern string, so the
2232      caller isn't still at the ending character.  */
2233   (*p_ptr)++;
2234 
2235   /* If the start is after the end, the range is empty.  */
2236   if (range_start > range_end)
2237     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2238 
2239   /* Here we see why `this_char' has to be larger than an `unsigned
2240      char' -- the range is inclusive, so if `range_end' == 0xff
2241      (assuming 8-bit characters), we would otherwise go into an infinite
2242      loop, since all characters <= 0xff.  */
2243   for (this_char = range_start; this_char <= range_end; this_char++)
2244     {
2245       SET_LIST_BIT (TRANSLATE (this_char));
2246     }
2247 
2248   return REG_NOERROR;
2249 }
2250 
2251 /* Failure stack declarations and macros; both re_compile_fastmap and
2252    re_match_2 use a failure stack.  These have to be macros because of
2253    REGEX_ALLOCATE.  */
2254 
2255 
2256 /* Number of failure points for which to initially allocate space
2257    when matching.  If this number is exceeded, we allocate more
2258    space, so it is not a hard limit.  */
2259 #ifndef INIT_FAILURE_ALLOC
2260 #define INIT_FAILURE_ALLOC 5
2261 #endif
2262 
2263 /* Roughly the maximum number of failure points on the stack.  Would be
2264    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2265    This is a variable only so users of regex can assign to it; we never
2266    change it ourselves.  */
2267 int re_max_failures = 2000;
2268 
2269 union fail_stack_elt
2270 {
2271   unsigned char *pointer;
2272   int integer;
2273 };
2274 
2275 typedef union fail_stack_elt fail_stack_elt_t;
2276 
2277 typedef struct
2278 {
2279   fail_stack_elt_t *stack;
2280   unsigned size;
2281   unsigned avail;			/* Offset of next open position.  */
2282 } fail_stack_type;
2283 
2284 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2285 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2286 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2287 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2288 
2289 
2290 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2291 
2292 #define INIT_FAIL_STACK()						\
2293   do {									\
2294     fail_stack.stack = (fail_stack_elt_t *)				\
2295       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2296 									\
2297     if (fail_stack.stack == NULL)					\
2298       return -2;							\
2299 									\
2300     fail_stack.size = INIT_FAILURE_ALLOC;				\
2301     fail_stack.avail = 0;						\
2302   } while (0)
2303 
2304 
2305 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2306 
2307    Return 1 if succeeds, and 0 if either ran out of memory
2308    allocating space for it or it was already too large.
2309 
2310    REGEX_REALLOCATE requires `destination' be declared.   */
2311 
2312 #define DOUBLE_FAIL_STACK(fail_stack)					\
2313   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2314    ? 0									\
2315    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2316         REGEX_REALLOCATE ((fail_stack).stack, 				\
2317           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2318           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2319 									\
2320       (fail_stack).stack == NULL					\
2321       ? 0								\
2322       : ((fail_stack).size <<= 1, 					\
2323          1)))
2324 
2325 
2326 /* Push PATTERN_OP on FAIL_STACK.
2327 
2328    Return 1 if was able to do so and 0 if ran out of memory allocating
2329    space to do so.  */
2330 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
2331   ((FAIL_STACK_FULL ()							\
2332     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
2333    ? 0									\
2334    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
2335       1))
2336 
2337 /* Push a pointer value onto the failure stack.
2338    Assumes the variable `fail_stack'.  Probably should only
2339    be called from within `PUSH_FAILURE_POINT'.  */
2340 #define PUSH_FAILURE_POINTER(item)					\
2341   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
2342 
2343 /* This pushes an integer-valued item onto the failure stack.
2344    Assumes the variable `fail_stack'.  Probably should only
2345    be called from within `PUSH_FAILURE_POINT'.  */
2346 #define PUSH_FAILURE_INT(item)					\
2347   fail_stack.stack[fail_stack.avail++].integer = (item)
2348 
2349 /* Push a fail_stack_elt_t value onto the failure stack.
2350    Assumes the variable `fail_stack'.  Probably should only
2351    be called from within `PUSH_FAILURE_POINT'.  */
2352 #define PUSH_FAILURE_ELT(item)					\
2353   fail_stack.stack[fail_stack.avail++] =  (item)
2354 
2355 /* These three POP... operations complement the three PUSH... operations.
2356    All assume that `fail_stack' is nonempty.  */
2357 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
2358 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
2359 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
2360 
2361 /* Used to omit pushing failure point id's when we're not debugging.  */
2362 #ifdef DEBUG
2363 #define DEBUG_PUSH PUSH_FAILURE_INT
2364 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
2365 #else
2366 #define DEBUG_PUSH(item)
2367 #define DEBUG_POP(item_addr)
2368 #endif
2369 
2370 
2371 /* Push the information about the state we will need
2372    if we ever fail back to it.
2373 
2374    Requires variables fail_stack, regstart, regend, reg_info, and
2375    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2376    declared.
2377 
2378    Does `return FAILURE_CODE' if runs out of memory.  */
2379 
2380 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2381   do {									\
2382     char *destination;							\
2383     /* Must be int, so when we don't save any registers, the arithmetic	\
2384        of 0 + -1 isn't done as unsigned.  */				\
2385     int this_reg;							\
2386     									\
2387     DEBUG_STATEMENT (failure_id++);					\
2388     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2389     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2390     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2391     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2392 									\
2393     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2394     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2395 									\
2396     /* Ensure we have enough space allocated for what we will push.  */	\
2397     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2398       {									\
2399         if (!DOUBLE_FAIL_STACK (fail_stack))				\
2400           return failure_code;						\
2401 									\
2402         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2403 		       (fail_stack).size);				\
2404         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2405       }									\
2406 									\
2407     /* Push the info, starting with the registers.  */			\
2408     DEBUG_PRINT1 ("\n");						\
2409 									\
2410     if (1)								\
2411       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
2412 	   this_reg++)							\
2413 	{								\
2414 	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
2415 	  DEBUG_STATEMENT (num_regs_pushed++);				\
2416 									\
2417 	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
2418 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
2419 									\
2420 	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2421 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
2422 									\
2423 	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2424 	  DEBUG_PRINT2 (" match_null=%d",				\
2425 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2426 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2427 	  DEBUG_PRINT2 (" matched_something=%d",			\
2428 			MATCHED_SOMETHING (reg_info[this_reg]));	\
2429 	  DEBUG_PRINT2 (" ever_matched=%d",				\
2430 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2431 	  DEBUG_PRINT1 ("\n");						\
2432 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
2433 	}								\
2434 									\
2435     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2436     PUSH_FAILURE_INT (lowest_active_reg);				\
2437 									\
2438     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2439     PUSH_FAILURE_INT (highest_active_reg);				\
2440 									\
2441     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2442     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2443     PUSH_FAILURE_POINTER (pattern_place);				\
2444 									\
2445     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2446     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2447 				 size2);				\
2448     DEBUG_PRINT1 ("'\n");						\
2449     PUSH_FAILURE_POINTER (string_place);				\
2450 									\
2451     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2452     DEBUG_PUSH (failure_id);						\
2453   } while (0)
2454 
2455 /* This is the number of items that are pushed and popped on the stack
2456    for each register.  */
2457 #define NUM_REG_ITEMS  3
2458 
2459 /* Individual items aside from the registers.  */
2460 #ifdef DEBUG
2461 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2462 #else
2463 #define NUM_NONREG_ITEMS 4
2464 #endif
2465 
2466 /* We push at most this many items on the stack.  */
2467 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2468 
2469 /* We actually push this many items.  */
2470 #define NUM_FAILURE_ITEMS						\
2471   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2472     + NUM_NONREG_ITEMS)
2473 
2474 /* How many items can still be added to the stack without overflowing it.  */
2475 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2476 
2477 
2478 /* Pops what PUSH_FAIL_STACK pushes.
2479 
2480    We restore into the parameters, all of which should be lvalues:
2481      STR -- the saved data position.
2482      PAT -- the saved pattern position.
2483      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2484      REGSTART, REGEND -- arrays of string positions.
2485      REG_INFO -- array of information about each subexpression.
2486 
2487    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2488    `pend', `string1', `size1', `string2', and `size2'.  */
2489 
2490 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2491 {									\
2492   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2493   int this_reg;								\
2494   const unsigned char *string_temp;					\
2495 									\
2496   assert (!FAIL_STACK_EMPTY ());					\
2497 									\
2498   /* Remove failure points and point to how many regs pushed.  */	\
2499   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2500   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2501   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2502 									\
2503   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2504 									\
2505   DEBUG_POP (&failure_id);						\
2506   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2507 									\
2508   /* If the saved string location is NULL, it came from an		\
2509      on_failure_keep_string_jump opcode, and we want to throw away the	\
2510      saved NULL, thus retaining our current position in the string.  */	\
2511   string_temp = POP_FAILURE_POINTER ();					\
2512   if (string_temp != NULL)						\
2513     str = (const char *) string_temp;					\
2514 									\
2515   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2516   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2517   DEBUG_PRINT1 ("'\n");							\
2518 									\
2519   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
2520   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2521   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2522 									\
2523   /* Restore register info.  */						\
2524   high_reg = (unsigned) POP_FAILURE_INT ();				\
2525   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2526 									\
2527   low_reg = (unsigned) POP_FAILURE_INT ();				\
2528   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2529 									\
2530   if (1)								\
2531     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2532       {									\
2533 	DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);		\
2534 									\
2535 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
2536 	DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);	\
2537 									\
2538 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
2539 	DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2540 									\
2541 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
2542 	DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);	\
2543       }									\
2544   else									\
2545     {									\
2546       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
2547 	{								\
2548 	  reg_info[this_reg].word.integer = 0;				\
2549 	  regend[this_reg] = 0;						\
2550 	  regstart[this_reg] = 0;					\
2551 	}								\
2552       highest_active_reg = high_reg;					\
2553     }									\
2554 									\
2555   DEBUG_STATEMENT (nfailure_points_popped++);				\
2556 } /* POP_FAILURE_POINT */
2557 
2558 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2559    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2560    characters can start a string that matches the pattern.  This fastmap
2561    is used by re_search to skip quickly over impossible starting points.
2562 
2563    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2564    area as BUFP->fastmap.
2565 
2566    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2567    the pattern buffer.
2568 
2569    Returns 0 if we succeed, -2 if an internal error.   */
2570 
2571 int
2572 re_compile_fastmap (bufp)
2573      struct re_pattern_buffer *bufp;
2574 {
2575   int j, k;
2576   fail_stack_type fail_stack;
2577 #ifndef REGEX_MALLOC
2578   char *destination;
2579 #endif
2580   /* We don't push any register information onto the failure stack.  */
2581   unsigned num_regs = 0;
2582 
2583   register char *fastmap = bufp->fastmap;
2584   unsigned char *pattern = bufp->buffer;
2585   unsigned long size = bufp->used;
2586   unsigned char *p = pattern;
2587   register unsigned char *pend = pattern + size;
2588 
2589   /* Assume that each path through the pattern can be null until
2590      proven otherwise.  We set this false at the bottom of switch
2591      statement, to which we get only if a particular path doesn't
2592      match the empty string.  */
2593   boolean path_can_be_null = true;
2594 
2595   /* We aren't doing a `succeed_n' to begin with.  */
2596   boolean succeed_n_p = false;
2597 
2598   assert (fastmap != NULL && p != NULL);
2599 
2600   INIT_FAIL_STACK ();
2601   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2602   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2603   bufp->can_be_null = 0;
2604 
2605   while (p != pend || !FAIL_STACK_EMPTY ())
2606     {
2607       if (p == pend)
2608         {
2609           bufp->can_be_null |= path_can_be_null;
2610 
2611           /* Reset for next path.  */
2612           path_can_be_null = true;
2613 
2614           p = fail_stack.stack[--fail_stack.avail].pointer;
2615 	}
2616 
2617       /* We should never be about to go beyond the end of the pattern.  */
2618       assert (p < pend);
2619 
2620 #ifdef SWITCH_ENUM_BUG
2621       switch ((int) ((re_opcode_t) *p++))
2622 #else
2623       switch ((re_opcode_t) *p++)
2624 #endif
2625 	{
2626 
2627         /* I guess the idea here is to simply not bother with a fastmap
2628            if a backreference is used, since it's too hard to figure out
2629            the fastmap for the corresponding group.  Setting
2630            `can_be_null' stops `re_search_2' from using the fastmap, so
2631            that is all we do.  */
2632 	case duplicate:
2633 	  bufp->can_be_null = 1;
2634           return 0;
2635 
2636 
2637       /* Following are the cases which match a character.  These end
2638          with `break'.  */
2639 
2640 	case exactn:
2641           fastmap[p[1]] = 1;
2642 	  break;
2643 
2644 
2645         case charset:
2646           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2647 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2648               fastmap[j] = 1;
2649 	  break;
2650 
2651 
2652 	case charset_not:
2653 	  /* Chars beyond end of map must be allowed.  */
2654 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2655             fastmap[j] = 1;
2656 
2657 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2658 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2659               fastmap[j] = 1;
2660           break;
2661 
2662 
2663 	case wordchar:
2664 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2665 	    if (SYNTAX (j) == Sword)
2666 	      fastmap[j] = 1;
2667 	  break;
2668 
2669 
2670 	case notwordchar:
2671 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2672 	    if (SYNTAX (j) != Sword)
2673 	      fastmap[j] = 1;
2674 	  break;
2675 
2676 
2677         case anychar:
2678           /* `.' matches anything ...  */
2679 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2680             fastmap[j] = 1;
2681 
2682           /* ... except perhaps newline.  */
2683           if (!(bufp->syntax & RE_DOT_NEWLINE))
2684             fastmap['\n'] = 0;
2685 
2686           /* Return if we have already set `can_be_null'; if we have,
2687              then the fastmap is irrelevant.  Something's wrong here.  */
2688 	  else if (bufp->can_be_null)
2689 	    return 0;
2690 
2691           /* Otherwise, have to check alternative paths.  */
2692 	  break;
2693 
2694 
2695 #ifdef emacs
2696         case syntaxspec:
2697 	  k = *p++;
2698 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2699 	    if (SYNTAX (j) == (enum syntaxcode) k)
2700 	      fastmap[j] = 1;
2701 	  break;
2702 
2703 
2704 	case notsyntaxspec:
2705 	  k = *p++;
2706 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2707 	    if (SYNTAX (j) != (enum syntaxcode) k)
2708 	      fastmap[j] = 1;
2709 	  break;
2710 
2711 
2712       /* All cases after this match the empty string.  These end with
2713          `continue'.  */
2714 
2715 
2716 	case before_dot:
2717 	case at_dot:
2718 	case after_dot:
2719           continue;
2720 #endif /* not emacs */
2721 
2722 
2723         case no_op:
2724         case begline:
2725         case endline:
2726 	case begbuf:
2727 	case endbuf:
2728 	case wordbound:
2729 	case notwordbound:
2730 	case wordbeg:
2731 	case wordend:
2732         case push_dummy_failure:
2733           continue;
2734 
2735 
2736 	case jump_n:
2737         case pop_failure_jump:
2738 	case maybe_pop_jump:
2739 	case jump:
2740         case jump_past_alt:
2741 	case dummy_failure_jump:
2742           EXTRACT_NUMBER_AND_INCR (j, p);
2743 	  p += j;
2744 	  if (j > 0)
2745 	    continue;
2746 
2747           /* Jump backward implies we just went through the body of a
2748              loop and matched nothing.  Opcode jumped to should be
2749              `on_failure_jump' or `succeed_n'.  Just treat it like an
2750              ordinary jump.  For a * loop, it has pushed its failure
2751              point already; if so, discard that as redundant.  */
2752           if ((re_opcode_t) *p != on_failure_jump
2753 	      && (re_opcode_t) *p != succeed_n)
2754 	    continue;
2755 
2756           p++;
2757           EXTRACT_NUMBER_AND_INCR (j, p);
2758           p += j;
2759 
2760           /* If what's on the stack is where we are now, pop it.  */
2761           if (!FAIL_STACK_EMPTY ()
2762 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
2763             fail_stack.avail--;
2764 
2765           continue;
2766 
2767 
2768         case on_failure_jump:
2769         case on_failure_keep_string_jump:
2770 	handle_on_failure_jump:
2771           EXTRACT_NUMBER_AND_INCR (j, p);
2772 
2773           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2774              end of the pattern.  We don't want to push such a point,
2775              since when we restore it above, entering the switch will
2776              increment `p' past the end of the pattern.  We don't need
2777              to push such a point since we obviously won't find any more
2778              fastmap entries beyond `pend'.  Such a pattern can match
2779              the null string, though.  */
2780           if (p + j < pend)
2781             {
2782               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2783                 return -2;
2784             }
2785           else
2786             bufp->can_be_null = 1;
2787 
2788           if (succeed_n_p)
2789             {
2790               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2791               succeed_n_p = false;
2792 	    }
2793 
2794           continue;
2795 
2796 
2797 	case succeed_n:
2798           /* Get to the number of times to succeed.  */
2799           p += 2;
2800 
2801           /* Increment p past the n for when k != 0.  */
2802           EXTRACT_NUMBER_AND_INCR (k, p);
2803           if (k == 0)
2804 	    {
2805               p -= 4;
2806   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2807               goto handle_on_failure_jump;
2808             }
2809           continue;
2810 
2811 
2812 	case set_number_at:
2813           p += 4;
2814           continue;
2815 
2816 
2817 	case start_memory:
2818         case stop_memory:
2819 	  p += 2;
2820 	  continue;
2821 
2822 
2823 	default:
2824           abort (); /* We have listed all the cases.  */
2825         } /* switch *p++ */
2826 
2827       /* Getting here means we have found the possible starting
2828          characters for one path of the pattern -- and that the empty
2829          string does not match.  We need not follow this path further.
2830          Instead, look at the next alternative (remembered on the
2831          stack), or quit if no more.  The test at the top of the loop
2832          does these things.  */
2833       path_can_be_null = false;
2834       p = pend;
2835     } /* while p */
2836 
2837   /* Set `can_be_null' for the last path (also the first path, if the
2838      pattern is empty).  */
2839   bufp->can_be_null |= path_can_be_null;
2840   return 0;
2841 } /* re_compile_fastmap */
2842 
2843 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2844    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2845    this memory for recording register information.  STARTS and ENDS
2846    must be allocated using the malloc library routine, and must each
2847    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2848 
2849    If NUM_REGS == 0, then subsequent matches should allocate their own
2850    register data.
2851 
2852    Unless this function is called, the first search or match using
2853    PATTERN_BUFFER will allocate its own register data, without
2854    freeing the old data.  */
2855 
2856 void
2857 re_set_registers (bufp, regs, num_regs, starts, ends)
2858     struct re_pattern_buffer *bufp;
2859     struct re_registers *regs;
2860     unsigned num_regs;
2861     regoff_t *starts, *ends;
2862 {
2863   if (num_regs)
2864     {
2865       bufp->regs_allocated = REGS_REALLOCATE;
2866       regs->num_regs = num_regs;
2867       regs->start = starts;
2868       regs->end = ends;
2869     }
2870   else
2871     {
2872       bufp->regs_allocated = REGS_UNALLOCATED;
2873       regs->num_regs = 0;
2874       regs->start = regs->end = (regoff_t *) 0;
2875     }
2876 }
2877 
2878 /* Searching routines.  */
2879 
2880 /* Like re_search_2, below, but only one string is specified, and
2881    doesn't let you say where to stop matching. */
2882 
2883 int
2884 re_search (bufp, string, size, startpos, range, regs)
2885      struct re_pattern_buffer *bufp;
2886      const char *string;
2887      int size, startpos, range;
2888      struct re_registers *regs;
2889 {
2890   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2891 		      regs, size);
2892 }
2893 
2894 
2895 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2896    virtual concatenation of STRING1 and STRING2, starting first at index
2897    STARTPOS, then at STARTPOS + 1, and so on.
2898 
2899    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2900 
2901    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2902    only at STARTPOS; in general, the last start tried is STARTPOS +
2903    RANGE.
2904 
2905    In REGS, return the indices of the virtual concatenation of STRING1
2906    and STRING2 that matched the entire BUFP->buffer and its contained
2907    subexpressions.
2908 
2909    Do not consider matching one past the index STOP in the virtual
2910    concatenation of STRING1 and STRING2.
2911 
2912    We return either the position in the strings at which the match was
2913    found, -1 if no match, or -2 if error (such as failure
2914    stack overflow).  */
2915 
2916 int
2917 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2918      struct re_pattern_buffer *bufp;
2919      const char *string1, *string2;
2920      int size1, size2;
2921      int startpos;
2922      int range;
2923      struct re_registers *regs;
2924      int stop;
2925 {
2926   int val;
2927   register char *fastmap = bufp->fastmap;
2928   register char *translate = bufp->translate;
2929   int total_size = size1 + size2;
2930   int endpos = startpos + range;
2931 
2932   /* Check for out-of-range STARTPOS.  */
2933   if (startpos < 0 || startpos > total_size)
2934     return -1;
2935 
2936   /* Fix up RANGE if it might eventually take us outside
2937      the virtual concatenation of STRING1 and STRING2.  */
2938   if (endpos < -1)
2939     range = -1 - startpos;
2940   else if (endpos > total_size)
2941     range = total_size - startpos;
2942 
2943   /* If the search isn't to be a backwards one, don't waste time in a
2944      search for a pattern that must be anchored.  */
2945   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2946     {
2947       if (startpos > 0)
2948 	return -1;
2949       else
2950 	range = 1;
2951     }
2952 
2953 #ifdef emacs
2954   /* In a forward search for something that starts with \=.
2955      don't keep searching past point.  */
2956   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
2957     {
2958       range = PT - startpos;
2959       if (range <= 0)
2960 	return -1;
2961     }
2962 #endif /* emacs */
2963 
2964   /* Update the fastmap now if not correct already.  */
2965   if (fastmap && !bufp->fastmap_accurate)
2966     if (re_compile_fastmap (bufp) == -2)
2967       return -2;
2968 
2969   /* Loop through the string, looking for a place to start matching.  */
2970   for (;;)
2971     {
2972       /* If a fastmap is supplied, skip quickly over characters that
2973          cannot be the start of a match.  If the pattern can match the
2974          null string, however, we don't need to skip characters; we want
2975          the first null string.  */
2976       if (fastmap && startpos < total_size && !bufp->can_be_null)
2977 	{
2978 	  if (range > 0)	/* Searching forwards.  */
2979 	    {
2980 	      register const char *d;
2981 	      register int lim = 0;
2982 	      int irange = range;
2983 
2984               if (startpos < size1 && startpos + range >= size1)
2985                 lim = range - (size1 - startpos);
2986 
2987 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2988 
2989               /* Written out as an if-else to avoid testing `translate'
2990                  inside the loop.  */
2991 	      if (translate)
2992                 while (range > lim
2993                        && !fastmap[(unsigned char)
2994 				   translate[(unsigned char) *d++]])
2995                   range--;
2996 	      else
2997                 while (range > lim && !fastmap[(unsigned char) *d++])
2998                   range--;
2999 
3000 	      startpos += irange - range;
3001 	    }
3002 	  else				/* Searching backwards.  */
3003 	    {
3004 	      register char c = (size1 == 0 || startpos >= size1
3005                                  ? string2[startpos - size1]
3006                                  : string1[startpos]);
3007 
3008 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3009 		goto advance;
3010 	    }
3011 	}
3012 
3013       /* If can't match the null string, and that's all we have left, fail.  */
3014       if (range >= 0 && startpos == total_size && fastmap
3015           && !bufp->can_be_null)
3016 	return -1;
3017 
3018       val = re_match_2 (bufp, string1, size1, string2, size2,
3019 	                startpos, regs, stop);
3020       if (val >= 0)
3021 	return startpos;
3022 
3023       if (val == -2)
3024 	return -2;
3025 
3026     advance:
3027       if (!range)
3028         break;
3029       else if (range > 0)
3030         {
3031           range--;
3032           startpos++;
3033         }
3034       else
3035         {
3036           range++;
3037           startpos--;
3038         }
3039     }
3040   return -1;
3041 } /* re_search_2 */
3042 
3043 /* Declarations and macros for re_match_2.  */
3044 
3045 static int bcmp_translate ();
3046 static boolean alt_match_null_string_p (),
3047                common_op_match_null_string_p (),
3048                group_match_null_string_p ();
3049 
3050 /* Structure for per-register (a.k.a. per-group) information.
3051    Other register information, such as the
3052    starting and ending positions (which are addresses), and the list of
3053    inner groups (which is a bits list) are maintained in separate
3054    variables.
3055 
3056    We are making a (strictly speaking) nonportable assumption here: that
3057    the compiler will pack our bit fields into something that fits into
3058    the type of `word', i.e., is something that fits into one item on the
3059    failure stack.  */
3060 
3061 typedef union
3062 {
3063   fail_stack_elt_t word;
3064   struct
3065   {
3066       /* This field is one if this group can match the empty string,
3067          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3068 #define MATCH_NULL_UNSET_VALUE 3
3069     unsigned match_null_string_p : 2;
3070     unsigned is_active : 1;
3071     unsigned matched_something : 1;
3072     unsigned ever_matched_something : 1;
3073   } bits;
3074 } register_info_type;
3075 
3076 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3077 #define IS_ACTIVE(R)  ((R).bits.is_active)
3078 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3079 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3080 
3081 
3082 /* Call this when have matched a real character; it sets `matched' flags
3083    for the subexpressions which we are currently inside.  Also records
3084    that those subexprs have matched.  */
3085 #define SET_REGS_MATCHED()						\
3086   do									\
3087     {									\
3088       unsigned r;							\
3089       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3090         {								\
3091           MATCHED_SOMETHING (reg_info[r])				\
3092             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3093             = 1;							\
3094         }								\
3095     }									\
3096   while (0)
3097 
3098 
3099 /* This converts PTR, a pointer into one of the search strings `string1'
3100    and `string2' into an offset from the beginning of that string.  */
3101 #define POINTER_TO_OFFSET(ptr)						\
3102   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3103 
3104 /* Registers are set to a sentinel when they haven't yet matched.  */
3105 #define REG_UNSET_VALUE ((char *) -1)
3106 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3107 
3108 
3109 /* Macros for dealing with the split strings in re_match_2.  */
3110 
3111 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3112 
3113 /* Call before fetching a character with *d.  This switches over to
3114    string2 if necessary.  */
3115 #define PREFETCH()							\
3116   while (d == dend)						    	\
3117     {									\
3118       /* End of string2 => fail.  */					\
3119       if (dend == end_match_2) 						\
3120         goto fail;							\
3121       /* End of string1 => advance to string2.  */ 			\
3122       d = string2;						        \
3123       dend = end_match_2;						\
3124     }
3125 
3126 
3127 /* Test if at very beginning or at very end of the virtual concatenation
3128    of `string1' and `string2'.  If only one string, it's `string2'.  */
3129 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3130 #define AT_STRINGS_END(d) ((d) == end2)
3131 
3132 
3133 /* Test if D points to a character which is word-constituent.  We have
3134    two special cases to check for: if past the end of string1, look at
3135    the first character in string2; and if before the beginning of
3136    string2, look at the last character in string1.  */
3137 #define WORDCHAR_P(d)							\
3138   (SYNTAX ((d) == end1 ? *string2					\
3139            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3140    == Sword)
3141 
3142 /* Test if the character before D and the one at D differ with respect
3143    to being word-constituent.  */
3144 #define AT_WORD_BOUNDARY(d)						\
3145   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3146    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3147 
3148 
3149 /* Free everything we malloc.  */
3150 #ifdef REGEX_MALLOC
3151 #define FREE_VAR(var) if (var) free (var); var = NULL
3152 #define FREE_VARIABLES()						\
3153   do {									\
3154     FREE_VAR (fail_stack.stack);					\
3155     FREE_VAR (regstart);						\
3156     FREE_VAR (regend);							\
3157     FREE_VAR (old_regstart);						\
3158     FREE_VAR (old_regend);						\
3159     FREE_VAR (best_regstart);						\
3160     FREE_VAR (best_regend);						\
3161     FREE_VAR (reg_info);						\
3162     FREE_VAR (reg_dummy);						\
3163     FREE_VAR (reg_info_dummy);						\
3164   } while (0)
3165 #else /* not REGEX_MALLOC */
3166 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3167 #define FREE_VARIABLES() alloca (0)
3168 #endif /* not REGEX_MALLOC */
3169 
3170 
3171 /* These values must meet several constraints.  They must not be valid
3172    register values; since we have a limit of 255 registers (because
3173    we use only one byte in the pattern for the register number), we can
3174    use numbers larger than 255.  They must differ by 1, because of
3175    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3176    be larger than the value for the highest register, so we do not try
3177    to actually save any registers when none are active.  */
3178 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3179 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3180 
3181 /* Matching routines.  */
3182 
3183 #ifndef emacs   /* Emacs never uses this.  */
3184 /* re_match is like re_match_2 except it takes only a single string.  */
3185 
3186 int
3187 re_match (bufp, string, size, pos, regs)
3188      struct re_pattern_buffer *bufp;
3189      const char *string;
3190      int size, pos;
3191      struct re_registers *regs;
3192  {
3193   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3194 }
3195 #endif /* not emacs */
3196 
3197 
3198 /* re_match_2 matches the compiled pattern in BUFP against the
3199    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3200    and SIZE2, respectively).  We start matching at POS, and stop
3201    matching at STOP.
3202 
3203    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3204    store offsets for the substring each group matched in REGS.  See the
3205    documentation for exactly how many groups we fill.
3206 
3207    We return -1 if no match, -2 if an internal error (such as the
3208    failure stack overflowing).  Otherwise, we return the length of the
3209    matched substring.  */
3210 
3211 int
3212 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3213      struct re_pattern_buffer *bufp;
3214      const char *string1, *string2;
3215      int size1, size2;
3216      int pos;
3217      struct re_registers *regs;
3218      int stop;
3219 {
3220   /* General temporaries.  */
3221   int mcnt;
3222   unsigned char *p1;
3223 
3224   /* Just past the end of the corresponding string.  */
3225   const char *end1, *end2;
3226 
3227   /* Pointers into string1 and string2, just past the last characters in
3228      each to consider matching.  */
3229   const char *end_match_1, *end_match_2;
3230 
3231   /* Where we are in the data, and the end of the current string.  */
3232   const char *d, *dend;
3233 
3234   /* Where we are in the pattern, and the end of the pattern.  */
3235   unsigned char *p = bufp->buffer;
3236   register unsigned char *pend = p + bufp->used;
3237 
3238   /* We use this to map every character in the string.  */
3239   char *translate = bufp->translate;
3240 
3241   /* Failure point stack.  Each place that can handle a failure further
3242      down the line pushes a failure point on this stack.  It consists of
3243      restart, regend, and reg_info for all registers corresponding to
3244      the subexpressions we're currently inside, plus the number of such
3245      registers, and, finally, two char *'s.  The first char * is where
3246      to resume scanning the pattern; the second one is where to resume
3247      scanning the strings.  If the latter is zero, the failure point is
3248      a ``dummy''; if a failure happens and the failure point is a dummy,
3249      it gets discarded and the next next one is tried.  */
3250   fail_stack_type fail_stack;
3251 #ifdef DEBUG
3252   static unsigned failure_id = 0;
3253   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3254 #endif
3255 
3256   /* We fill all the registers internally, independent of what we
3257      return, for use in backreferences.  The number here includes
3258      an element for register zero.  */
3259   unsigned num_regs = bufp->re_nsub + 1;
3260 
3261   /* The currently active registers.  */
3262   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3263   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3264 
3265   /* Information on the contents of registers. These are pointers into
3266      the input strings; they record just what was matched (on this
3267      attempt) by a subexpression part of the pattern, that is, the
3268      regnum-th regstart pointer points to where in the pattern we began
3269      matching and the regnum-th regend points to right after where we
3270      stopped matching the regnum-th subexpression.  (The zeroth register
3271      keeps track of what the whole pattern matches.)  */
3272   const char **regstart, **regend;
3273 
3274   /* If a group that's operated upon by a repetition operator fails to
3275      match anything, then the register for its start will need to be
3276      restored because it will have been set to wherever in the string we
3277      are when we last see its open-group operator.  Similarly for a
3278      register's end.  */
3279   const char **old_regstart, **old_regend;
3280 
3281   /* The is_active field of reg_info helps us keep track of which (possibly
3282      nested) subexpressions we are currently in. The matched_something
3283      field of reg_info[reg_num] helps us tell whether or not we have
3284      matched any of the pattern so far this time through the reg_num-th
3285      subexpression.  These two fields get reset each time through any
3286      loop their register is in.  */
3287   register_info_type *reg_info;
3288 
3289   /* The following record the register info as found in the above
3290      variables when we find a match better than any we've seen before.
3291      This happens as we backtrack through the failure points, which in
3292      turn happens only if we have not yet matched the entire string. */
3293   unsigned best_regs_set = false;
3294   const char **best_regstart, **best_regend;
3295 
3296   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3297      allocate space for that if we're not allocating space for anything
3298      else (see below).  Also, we never need info about register 0 for
3299      any of the other register vectors, and it seems rather a kludge to
3300      treat `best_regend' differently than the rest.  So we keep track of
3301      the end of the best match so far in a separate variable.  We
3302      initialize this to NULL so that when we backtrack the first time
3303      and need to test it, it's not garbage.  */
3304   const char *match_end = NULL;
3305 
3306   /* Used when we pop values we don't care about.  */
3307   const char **reg_dummy;
3308   register_info_type *reg_info_dummy;
3309 
3310 #ifdef DEBUG
3311   /* Counts the total number of registers pushed.  */
3312   unsigned num_regs_pushed = 0;
3313 #endif
3314 
3315   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3316 
3317   INIT_FAIL_STACK ();
3318 
3319   /* Do not bother to initialize all the register variables if there are
3320      no groups in the pattern, as it takes a fair amount of time.  If
3321      there are groups, we include space for register 0 (the whole
3322      pattern), even though we never use it, since it simplifies the
3323      array indexing.  We should fix this.  */
3324   if (bufp->re_nsub)
3325     {
3326       regstart = REGEX_TALLOC (num_regs, const char *);
3327       regend = REGEX_TALLOC (num_regs, const char *);
3328       old_regstart = REGEX_TALLOC (num_regs, const char *);
3329       old_regend = REGEX_TALLOC (num_regs, const char *);
3330       best_regstart = REGEX_TALLOC (num_regs, const char *);
3331       best_regend = REGEX_TALLOC (num_regs, const char *);
3332       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3333       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3334       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3335 
3336       if (!(regstart && regend && old_regstart && old_regend && reg_info
3337             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3338         {
3339           FREE_VARIABLES ();
3340           return -2;
3341         }
3342     }
3343 #ifdef REGEX_MALLOC
3344   else
3345     {
3346       /* We must initialize all our variables to NULL, so that
3347          `FREE_VARIABLES' doesn't try to free them.  */
3348       regstart = regend = old_regstart = old_regend = best_regstart
3349         = best_regend = reg_dummy = NULL;
3350       reg_info = reg_info_dummy = (register_info_type *) NULL;
3351     }
3352 #endif /* REGEX_MALLOC */
3353 
3354   /* The starting position is bogus.  */
3355   if (pos < 0 || pos > size1 + size2)
3356     {
3357       FREE_VARIABLES ();
3358       return -1;
3359     }
3360 
3361   /* Initialize subexpression text positions to -1 to mark ones that no
3362      start_memory/stop_memory has been seen for. Also initialize the
3363      register information struct.  */
3364   for (mcnt = 1; mcnt < num_regs; mcnt++)
3365     {
3366       regstart[mcnt] = regend[mcnt]
3367         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3368 
3369       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3370       IS_ACTIVE (reg_info[mcnt]) = 0;
3371       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3372       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3373     }
3374 
3375   /* We move `string1' into `string2' if the latter's empty -- but not if
3376      `string1' is null.  */
3377   if (size2 == 0 && string1 != NULL)
3378     {
3379       string2 = string1;
3380       size2 = size1;
3381       string1 = 0;
3382       size1 = 0;
3383     }
3384   end1 = string1 + size1;
3385   end2 = string2 + size2;
3386 
3387   /* Compute where to stop matching, within the two strings.  */
3388   if (stop <= size1)
3389     {
3390       end_match_1 = string1 + stop;
3391       end_match_2 = string2;
3392     }
3393   else
3394     {
3395       end_match_1 = end1;
3396       end_match_2 = string2 + stop - size1;
3397     }
3398 
3399   /* `p' scans through the pattern as `d' scans through the data.
3400      `dend' is the end of the input string that `d' points within.  `d'
3401      is advanced into the following input string whenever necessary, but
3402      this happens before fetching; therefore, at the beginning of the
3403      loop, `d' can be pointing at the end of a string, but it cannot
3404      equal `string2'.  */
3405   if (size1 > 0 && pos <= size1)
3406     {
3407       d = string1 + pos;
3408       dend = end_match_1;
3409     }
3410   else
3411     {
3412       d = string2 + pos - size1;
3413       dend = end_match_2;
3414     }
3415 
3416   DEBUG_PRINT1 ("The compiled pattern is: ");
3417   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3418   DEBUG_PRINT1 ("The string to match is: `");
3419   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3420   DEBUG_PRINT1 ("'\n");
3421 
3422   /* This loops over pattern commands.  It exits by returning from the
3423      function if the match is complete, or it drops through if the match
3424      fails at this starting point in the input data.  */
3425   for (;;)
3426     {
3427       DEBUG_PRINT2 ("\n0x%x: ", p);
3428 
3429       if (p == pend)
3430 	{ /* End of pattern means we might have succeeded.  */
3431           DEBUG_PRINT1 ("end of pattern ... ");
3432 
3433 	  /* If we haven't matched the entire string, and we want the
3434              longest match, try backtracking.  */
3435           if (d != end_match_2)
3436 	    {
3437               DEBUG_PRINT1 ("backtracking.\n");
3438 
3439               if (!FAIL_STACK_EMPTY ())
3440                 { /* More failure points to try.  */
3441                   boolean same_str_p = (FIRST_STRING_P (match_end)
3442 	        	                == MATCHING_IN_FIRST_STRING);
3443 
3444                   /* If exceeds best match so far, save it.  */
3445                   if (!best_regs_set
3446                       || (same_str_p && d > match_end)
3447                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3448                     {
3449                       best_regs_set = true;
3450                       match_end = d;
3451 
3452                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3453 
3454                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3455                         {
3456                           best_regstart[mcnt] = regstart[mcnt];
3457                           best_regend[mcnt] = regend[mcnt];
3458                         }
3459                     }
3460                   goto fail;
3461                 }
3462 
3463               /* If no failure points, don't restore garbage.  */
3464               else if (best_regs_set)
3465                 {
3466   	        restore_best_regs:
3467                   /* Restore best match.  It may happen that `dend ==
3468                      end_match_1' while the restored d is in string2.
3469                      For example, the pattern `x.*y.*z' against the
3470                      strings `x-' and `y-z-', if the two strings are
3471                      not consecutive in memory.  */
3472                   DEBUG_PRINT1 ("Restoring best registers.\n");
3473 
3474                   d = match_end;
3475                   dend = ((d >= string1 && d <= end1)
3476 		           ? end_match_1 : end_match_2);
3477 
3478 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3479 		    {
3480 		      regstart[mcnt] = best_regstart[mcnt];
3481 		      regend[mcnt] = best_regend[mcnt];
3482 		    }
3483                 }
3484             } /* d != end_match_2 */
3485 
3486           DEBUG_PRINT1 ("Accepting match.\n");
3487 
3488           /* If caller wants register contents data back, do it.  */
3489           if (regs && !bufp->no_sub)
3490 	    {
3491               /* Have the register data arrays been allocated?  */
3492               if (bufp->regs_allocated == REGS_UNALLOCATED)
3493                 { /* No.  So allocate them with malloc.  We need one
3494                      extra element beyond `num_regs' for the `-1' marker
3495                      GNU code uses.  */
3496                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3497                   regs->start = TALLOC (regs->num_regs, regoff_t);
3498                   regs->end = TALLOC (regs->num_regs, regoff_t);
3499                   if (regs->start == NULL || regs->end == NULL)
3500                     return -2;
3501                   bufp->regs_allocated = REGS_REALLOCATE;
3502                 }
3503               else if (bufp->regs_allocated == REGS_REALLOCATE)
3504                 { /* Yes.  If we need more elements than were already
3505                      allocated, reallocate them.  If we need fewer, just
3506                      leave it alone.  */
3507                   if (regs->num_regs < num_regs + 1)
3508                     {
3509                       regs->num_regs = num_regs + 1;
3510                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3511                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3512                       if (regs->start == NULL || regs->end == NULL)
3513                         return -2;
3514                     }
3515                 }
3516               else
3517 		{
3518 		  /* These braces fend off a "empty body in an else-statement"
3519 		     warning under GCC when assert expands to nothing.  */
3520 		  assert (bufp->regs_allocated == REGS_FIXED);
3521 		}
3522 
3523               /* Convert the pointer data in `regstart' and `regend' to
3524                  indices.  Register zero has to be set differently,
3525                  since we haven't kept track of any info for it.  */
3526               if (regs->num_regs > 0)
3527                 {
3528                   regs->start[0] = pos;
3529                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3530 			          : d - string2 + size1);
3531                 }
3532 
3533               /* Go through the first `min (num_regs, regs->num_regs)'
3534                  registers, since that is all we initialized.  */
3535 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3536 		{
3537                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3538                     regs->start[mcnt] = regs->end[mcnt] = -1;
3539                   else
3540                     {
3541 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3542                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3543                     }
3544 		}
3545 
3546               /* If the regs structure we return has more elements than
3547                  were in the pattern, set the extra elements to -1.  If
3548                  we (re)allocated the registers, this is the case,
3549                  because we always allocate enough to have at least one
3550                  -1 at the end.  */
3551               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3552                 regs->start[mcnt] = regs->end[mcnt] = -1;
3553 	    } /* regs && !bufp->no_sub */
3554 
3555           FREE_VARIABLES ();
3556           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3557                         nfailure_points_pushed, nfailure_points_popped,
3558                         nfailure_points_pushed - nfailure_points_popped);
3559           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3560 
3561           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3562 			    ? string1
3563 			    : string2 - size1);
3564 
3565           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3566 
3567           return mcnt;
3568         }
3569 
3570       /* Otherwise match next pattern command.  */
3571 #ifdef SWITCH_ENUM_BUG
3572       switch ((int) ((re_opcode_t) *p++))
3573 #else
3574       switch ((re_opcode_t) *p++)
3575 #endif
3576 	{
3577         /* Ignore these.  Used to ignore the n of succeed_n's which
3578            currently have n == 0.  */
3579         case no_op:
3580           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3581           break;
3582 
3583 
3584         /* Match the next n pattern characters exactly.  The following
3585            byte in the pattern defines n, and the n bytes after that
3586            are the characters to match.  */
3587 	case exactn:
3588 	  mcnt = *p++;
3589           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3590 
3591           /* This is written out as an if-else so we don't waste time
3592              testing `translate' inside the loop.  */
3593           if (translate)
3594 	    {
3595 	      do
3596 		{
3597 		  PREFETCH ();
3598 		  if (translate[(unsigned char) *d++] != (char) *p++)
3599                     goto fail;
3600 		}
3601 	      while (--mcnt);
3602 	    }
3603 	  else
3604 	    {
3605 	      do
3606 		{
3607 		  PREFETCH ();
3608 		  if (*d++ != (char) *p++) goto fail;
3609 		}
3610 	      while (--mcnt);
3611 	    }
3612 	  SET_REGS_MATCHED ();
3613           break;
3614 
3615 
3616         /* Match any character except possibly a newline or a null.  */
3617 	case anychar:
3618           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3619 
3620           PREFETCH ();
3621 
3622           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3623               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3624 	    goto fail;
3625 
3626           SET_REGS_MATCHED ();
3627           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3628           d++;
3629 	  break;
3630 
3631 
3632 	case charset:
3633 	case charset_not:
3634 	  {
3635 	    register unsigned char c;
3636 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3637 
3638             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3639 
3640 	    PREFETCH ();
3641 	    c = TRANSLATE (*d); /* The character to match.  */
3642 
3643             /* Cast to `unsigned' instead of `unsigned char' in case the
3644                bit list is a full 32 bytes long.  */
3645 	    if (c < (unsigned) (*p * BYTEWIDTH)
3646 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3647 	      not = !not;
3648 
3649 	    p += 1 + *p;
3650 
3651 	    if (!not) goto fail;
3652 
3653 	    SET_REGS_MATCHED ();
3654             d++;
3655 	    break;
3656 	  }
3657 
3658 
3659         /* The beginning of a group is represented by start_memory.
3660            The arguments are the register number in the next byte, and the
3661            number of groups inner to this one in the next.  The text
3662            matched within the group is recorded (in the internal
3663            registers data structure) under the register number.  */
3664         case start_memory:
3665 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3666 
3667           /* Find out if this group can match the empty string.  */
3668 	  p1 = p;		/* To send to group_match_null_string_p.  */
3669 
3670           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3671             REG_MATCH_NULL_STRING_P (reg_info[*p])
3672               = group_match_null_string_p (&p1, pend, reg_info);
3673 
3674           /* Save the position in the string where we were the last time
3675              we were at this open-group operator in case the group is
3676              operated upon by a repetition operator, e.g., with `(a*)*b'
3677              against `ab'; then we want to ignore where we are now in
3678              the string in case this attempt to match fails.  */
3679           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3680                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3681                              : regstart[*p];
3682 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3683 			 POINTER_TO_OFFSET (old_regstart[*p]));
3684 
3685           regstart[*p] = d;
3686 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3687 
3688           IS_ACTIVE (reg_info[*p]) = 1;
3689           MATCHED_SOMETHING (reg_info[*p]) = 0;
3690 
3691           /* This is the new highest active register.  */
3692           highest_active_reg = *p;
3693 
3694           /* If nothing was active before, this is the new lowest active
3695              register.  */
3696           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3697             lowest_active_reg = *p;
3698 
3699           /* Move past the register number and inner group count.  */
3700           p += 2;
3701           break;
3702 
3703 
3704         /* The stop_memory opcode represents the end of a group.  Its
3705            arguments are the same as start_memory's: the register
3706            number, and the number of inner groups.  */
3707 	case stop_memory:
3708 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3709 
3710           /* We need to save the string position the last time we were at
3711              this close-group operator in case the group is operated
3712              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3713              against `aba'; then we want to ignore where we are now in
3714              the string in case this attempt to match fails.  */
3715           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3716                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3717 			   : regend[*p];
3718 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3719 			 POINTER_TO_OFFSET (old_regend[*p]));
3720 
3721           regend[*p] = d;
3722 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3723 
3724           /* This register isn't active anymore.  */
3725           IS_ACTIVE (reg_info[*p]) = 0;
3726 
3727           /* If this was the only register active, nothing is active
3728              anymore.  */
3729           if (lowest_active_reg == highest_active_reg)
3730             {
3731               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3732               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3733             }
3734           else
3735             { /* We must scan for the new highest active register, since
3736                  it isn't necessarily one less than now: consider
3737                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3738                  new highest active register is 1.  */
3739               unsigned char r = *p - 1;
3740               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3741                 r--;
3742 
3743               /* If we end up at register zero, that means that we saved
3744                  the registers as the result of an `on_failure_jump', not
3745                  a `start_memory', and we jumped to past the innermost
3746                  `stop_memory'.  For example, in ((.)*) we save
3747                  registers 1 and 2 as a result of the *, but when we pop
3748                  back to the second ), we are at the stop_memory 1.
3749                  Thus, nothing is active.  */
3750 	      if (r == 0)
3751                 {
3752                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3753                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3754                 }
3755               else
3756                 highest_active_reg = r;
3757             }
3758 
3759           /* If just failed to match something this time around with a
3760              group that's operated on by a repetition operator, try to
3761              force exit from the ``loop'', and restore the register
3762              information for this group that we had before trying this
3763              last match.  */
3764           if ((!MATCHED_SOMETHING (reg_info[*p])
3765                || (re_opcode_t) p[-3] == start_memory)
3766 	      && (p + 2) < pend)
3767             {
3768               boolean is_a_jump_n = false;
3769 
3770               p1 = p + 2;
3771               mcnt = 0;
3772               switch ((re_opcode_t) *p1++)
3773                 {
3774                   case jump_n:
3775 		    is_a_jump_n = true;
3776                   case pop_failure_jump:
3777 		  case maybe_pop_jump:
3778 		  case jump:
3779 		  case dummy_failure_jump:
3780                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3781 		    if (is_a_jump_n)
3782 		      p1 += 2;
3783                     break;
3784 
3785                   default:
3786                     /* do nothing */ ;
3787                 }
3788 	      p1 += mcnt;
3789 
3790               /* If the next operation is a jump backwards in the pattern
3791 	         to an on_failure_jump right before the start_memory
3792                  corresponding to this stop_memory, exit from the loop
3793                  by forcing a failure after pushing on the stack the
3794                  on_failure_jump's jump in the pattern, and d.  */
3795               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3796                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3797 		{
3798                   /* If this group ever matched anything, then restore
3799                      what its registers were before trying this last
3800                      failed match, e.g., with `(a*)*b' against `ab' for
3801                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3802                      against `aba' for regend[3].
3803 
3804                      Also restore the registers for inner groups for,
3805                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3806                      otherwise get trashed).  */
3807 
3808                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3809 		    {
3810 		      unsigned r;
3811 
3812                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3813 
3814 		      /* Restore this and inner groups' (if any) registers.  */
3815 		      for (r = *p; r < *p + *(p + 1); r++)
3816 			{
3817 			  regstart[r] = old_regstart[r];
3818 
3819 			  /* xx why this test?	*/
3820 			  if (old_regend[r] >= regstart[r])
3821 			    regend[r] = old_regend[r];
3822 			}
3823 		    }
3824 		  p1++;
3825                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3826                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3827 
3828                   goto fail;
3829                 }
3830             }
3831 
3832           /* Move past the register number and the inner group count.  */
3833           p += 2;
3834           break;
3835 
3836 
3837 	/* \<digit> has been turned into a `duplicate' command which is
3838            followed by the numeric value of <digit> as the register number.  */
3839         case duplicate:
3840 	  {
3841 	    register const char *d2, *dend2;
3842 	    int regno = *p++;   /* Get which register to match against.  */
3843 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3844 
3845 	    /* Can't back reference a group which we've never matched.  */
3846             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3847               goto fail;
3848 
3849             /* Where in input to try to start matching.  */
3850             d2 = regstart[regno];
3851 
3852             /* Where to stop matching; if both the place to start and
3853                the place to stop matching are in the same string, then
3854                set to the place to stop, otherwise, for now have to use
3855                the end of the first string.  */
3856 
3857             dend2 = ((FIRST_STRING_P (regstart[regno])
3858 		      == FIRST_STRING_P (regend[regno]))
3859 		     ? regend[regno] : end_match_1);
3860 	    for (;;)
3861 	      {
3862 		/* If necessary, advance to next segment in register
3863                    contents.  */
3864 		while (d2 == dend2)
3865 		  {
3866 		    if (dend2 == end_match_2) break;
3867 		    if (dend2 == regend[regno]) break;
3868 
3869                     /* End of string1 => advance to string2. */
3870                     d2 = string2;
3871                     dend2 = regend[regno];
3872 		  }
3873 		/* At end of register contents => success */
3874 		if (d2 == dend2) break;
3875 
3876 		/* If necessary, advance to next segment in data.  */
3877 		PREFETCH ();
3878 
3879 		/* How many characters left in this segment to match.  */
3880 		mcnt = dend - d;
3881 
3882 		/* Want how many consecutive characters we can match in
3883                    one shot, so, if necessary, adjust the count.  */
3884                 if (mcnt > dend2 - d2)
3885 		  mcnt = dend2 - d2;
3886 
3887 		/* Compare that many; failure if mismatch, else move
3888                    past them.  */
3889 		if (translate
3890                     ? bcmp_translate (d, d2, mcnt, translate)
3891                     : bcmp (d, d2, mcnt))
3892 		  goto fail;
3893 		d += mcnt, d2 += mcnt;
3894 	      }
3895 	  }
3896 	  break;
3897 
3898 
3899         /* begline matches the empty string at the beginning of the string
3900            (unless `not_bol' is set in `bufp'), and, if
3901            `newline_anchor' is set, after newlines.  */
3902 	case begline:
3903           DEBUG_PRINT1 ("EXECUTING begline.\n");
3904 
3905           if (AT_STRINGS_BEG (d))
3906             {
3907               if (!bufp->not_bol) break;
3908             }
3909           else if (d[-1] == '\n' && bufp->newline_anchor)
3910             {
3911               break;
3912             }
3913           /* In all other cases, we fail.  */
3914           goto fail;
3915 
3916 
3917         /* endline is the dual of begline.  */
3918 	case endline:
3919           DEBUG_PRINT1 ("EXECUTING endline.\n");
3920 
3921           if (AT_STRINGS_END (d))
3922             {
3923               if (!bufp->not_eol) break;
3924             }
3925 
3926           /* We have to ``prefetch'' the next character.  */
3927           else if ((d == end1 ? *string2 : *d) == '\n'
3928                    && bufp->newline_anchor)
3929             {
3930               break;
3931             }
3932           goto fail;
3933 
3934 
3935 	/* Match at the very beginning of the data.  */
3936         case begbuf:
3937           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3938           if (AT_STRINGS_BEG (d))
3939             break;
3940           goto fail;
3941 
3942 
3943 	/* Match at the very end of the data.  */
3944         case endbuf:
3945           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3946 	  if (AT_STRINGS_END (d))
3947 	    break;
3948           goto fail;
3949 
3950 
3951         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3952            pushes NULL as the value for the string on the stack.  Then
3953            `pop_failure_point' will keep the current value for the
3954            string, instead of restoring it.  To see why, consider
3955            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3956            then the . fails against the \n.  But the next thing we want
3957            to do is match the \n against the \n; if we restored the
3958            string value, we would be back at the foo.
3959 
3960            Because this is used only in specific cases, we don't need to
3961            check all the things that `on_failure_jump' does, to make
3962            sure the right things get saved on the stack.  Hence we don't
3963            share its code.  The only reason to push anything on the
3964            stack at all is that otherwise we would have to change
3965            `anychar's code to do something besides goto fail in this
3966            case; that seems worse than this.  */
3967         case on_failure_keep_string_jump:
3968           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3969 
3970           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3971           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3972 
3973           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3974           break;
3975 
3976 
3977 	/* Uses of on_failure_jump:
3978 
3979            Each alternative starts with an on_failure_jump that points
3980            to the beginning of the next alternative.  Each alternative
3981            except the last ends with a jump that in effect jumps past
3982            the rest of the alternatives.  (They really jump to the
3983            ending jump of the following alternative, because tensioning
3984            these jumps is a hassle.)
3985 
3986            Repeats start with an on_failure_jump that points past both
3987            the repetition text and either the following jump or
3988            pop_failure_jump back to this on_failure_jump.  */
3989 	case on_failure_jump:
3990         on_failure:
3991           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3992 
3993           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3994           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3995 
3996           /* If this on_failure_jump comes right before a group (i.e.,
3997              the original * applied to a group), save the information
3998              for that group and all inner ones, so that if we fail back
3999              to this point, the group's information will be correct.
4000              For example, in \(a*\)*\1, we need the preceding group,
4001              and in \(\(a*\)b*\)\2, we need the inner group.  */
4002 
4003           /* We can't use `p' to check ahead because we push
4004              a failure point to `p + mcnt' after we do this.  */
4005           p1 = p;
4006 
4007           /* We need to skip no_op's before we look for the
4008              start_memory in case this on_failure_jump is happening as
4009              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4010              against aba.  */
4011           while (p1 < pend && (re_opcode_t) *p1 == no_op)
4012             p1++;
4013 
4014           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4015             {
4016               /* We have a new highest active register now.  This will
4017                  get reset at the start_memory we are about to get to,
4018                  but we will have saved all the registers relevant to
4019                  this repetition op, as described above.  */
4020               highest_active_reg = *(p1 + 1) + *(p1 + 2);
4021               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4022                 lowest_active_reg = *(p1 + 1);
4023             }
4024 
4025           DEBUG_PRINT1 (":\n");
4026           PUSH_FAILURE_POINT (p + mcnt, d, -2);
4027           break;
4028 
4029 
4030         /* A smart repeat ends with `maybe_pop_jump'.
4031 	   We change it to either `pop_failure_jump' or `jump'.  */
4032         case maybe_pop_jump:
4033           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4034           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4035           {
4036 	    register unsigned char *p2 = p;
4037 
4038             /* Compare the beginning of the repeat with what in the
4039                pattern follows its end. If we can establish that there
4040                is nothing that they would both match, i.e., that we
4041                would have to backtrack because of (as in, e.g., `a*a')
4042                then we can change to pop_failure_jump, because we'll
4043                never have to backtrack.
4044 
4045                This is not true in the case of alternatives: in
4046                `(a|ab)*' we do need to backtrack to the `ab' alternative
4047                (e.g., if the string was `ab').  But instead of trying to
4048                detect that here, the alternative has put on a dummy
4049                failure point which is what we will end up popping.  */
4050 
4051 	    /* Skip over open/close-group commands.  */
4052 	    while (p2 + 2 < pend
4053 		   && ((re_opcode_t) *p2 == stop_memory
4054 		       || (re_opcode_t) *p2 == start_memory))
4055 	      p2 += 3;			/* Skip over args, too.  */
4056 
4057             /* If we're at the end of the pattern, we can change.  */
4058             if (p2 == pend)
4059 	      {
4060 		/* Consider what happens when matching ":\(.*\)"
4061 		   against ":/".  I don't really understand this code
4062 		   yet.  */
4063   	        p[-3] = (unsigned char) pop_failure_jump;
4064                 DEBUG_PRINT1
4065                   ("  End of pattern: change to `pop_failure_jump'.\n");
4066               }
4067 
4068             else if ((re_opcode_t) *p2 == exactn
4069 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4070 	      {
4071 		register unsigned char c
4072                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4073 		p1 = p + mcnt;
4074 
4075                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4076                    to the `maybe_finalize_jump' of this case.  Examine what
4077                    follows.  */
4078                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4079                   {
4080   		    p[-3] = (unsigned char) pop_failure_jump;
4081                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4082                                   c, p1[5]);
4083                   }
4084 
4085 		else if ((re_opcode_t) p1[3] == charset
4086 			 || (re_opcode_t) p1[3] == charset_not)
4087 		  {
4088 		    int not = (re_opcode_t) p1[3] == charset_not;
4089 
4090 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4091 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4092 		      not = !not;
4093 
4094                     /* `not' is equal to 1 if c would match, which means
4095                         that we can't change to pop_failure_jump.  */
4096 		    if (!not)
4097                       {
4098   		        p[-3] = (unsigned char) pop_failure_jump;
4099                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4100                       }
4101 		  }
4102 	      }
4103 	  }
4104 	  p -= 2;		/* Point at relative address again.  */
4105 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4106 	    {
4107 	      p[-1] = (unsigned char) jump;
4108               DEBUG_PRINT1 ("  Match => jump.\n");
4109 	      goto unconditional_jump;
4110 	    }
4111         /* Note fall through.  */
4112 
4113 
4114 	/* The end of a simple repeat has a pop_failure_jump back to
4115            its matching on_failure_jump, where the latter will push a
4116            failure point.  The pop_failure_jump takes off failure
4117            points put on by this pop_failure_jump's matching
4118            on_failure_jump; we got through the pattern to here from the
4119            matching on_failure_jump, so didn't fail.  */
4120         case pop_failure_jump:
4121           {
4122             /* We need to pass separate storage for the lowest and
4123                highest registers, even though we don't care about the
4124                actual values.  Otherwise, we will restore only one
4125                register from the stack, since lowest will == highest in
4126                `pop_failure_point'.  */
4127             unsigned dummy_low_reg, dummy_high_reg;
4128             unsigned char *pdummy;
4129             const char *sdummy;
4130 
4131             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4132             POP_FAILURE_POINT (sdummy, pdummy,
4133                                dummy_low_reg, dummy_high_reg,
4134                                reg_dummy, reg_dummy, reg_info_dummy);
4135           }
4136           /* Note fall through.  */
4137 
4138 
4139         /* Unconditionally jump (without popping any failure points).  */
4140         case jump:
4141 	unconditional_jump:
4142 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4143           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4144 	  p += mcnt;				/* Do the jump.  */
4145           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4146 	  break;
4147 
4148 
4149         /* We need this opcode so we can detect where alternatives end
4150            in `group_match_null_string_p' et al.  */
4151         case jump_past_alt:
4152           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4153           goto unconditional_jump;
4154 
4155 
4156         /* Normally, the on_failure_jump pushes a failure point, which
4157            then gets popped at pop_failure_jump.  We will end up at
4158            pop_failure_jump, also, and with a pattern of, say, `a+', we
4159            are skipping over the on_failure_jump, so we have to push
4160            something meaningless for pop_failure_jump to pop.  */
4161         case dummy_failure_jump:
4162           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4163           /* It doesn't matter what we push for the string here.  What
4164              the code at `fail' tests is the value for the pattern.  */
4165           PUSH_FAILURE_POINT (0, 0, -2);
4166           goto unconditional_jump;
4167 
4168 
4169         /* At the end of an alternative, we need to push a dummy failure
4170            point in case we are followed by a `pop_failure_jump', because
4171            we don't want the failure point for the alternative to be
4172            popped.  For example, matching `(a|ab)*' against `aab'
4173            requires that we match the `ab' alternative.  */
4174         case push_dummy_failure:
4175           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4176           /* See comments just above at `dummy_failure_jump' about the
4177              two zeroes.  */
4178           PUSH_FAILURE_POINT (0, 0, -2);
4179           break;
4180 
4181         /* Have to succeed matching what follows at least n times.
4182            After that, handle like `on_failure_jump'.  */
4183         case succeed_n:
4184           EXTRACT_NUMBER (mcnt, p + 2);
4185           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4186 
4187           assert (mcnt >= 0);
4188           /* Originally, this is how many times we HAVE to succeed.  */
4189           if (mcnt > 0)
4190             {
4191                mcnt--;
4192 	       p += 2;
4193                STORE_NUMBER_AND_INCR (p, mcnt);
4194                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4195             }
4196 	  else if (mcnt == 0)
4197             {
4198               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4199 	      p[2] = (unsigned char) no_op;
4200               p[3] = (unsigned char) no_op;
4201               goto on_failure;
4202             }
4203           break;
4204 
4205         case jump_n:
4206           EXTRACT_NUMBER (mcnt, p + 2);
4207           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4208 
4209           /* Originally, this is how many times we CAN jump.  */
4210           if (mcnt)
4211             {
4212                mcnt--;
4213                STORE_NUMBER (p + 2, mcnt);
4214 	       goto unconditional_jump;
4215             }
4216           /* If don't have to jump any more, skip over the rest of command.  */
4217 	  else
4218 	    p += 4;
4219           break;
4220 
4221 	case set_number_at:
4222 	  {
4223             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4224 
4225             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4226             p1 = p + mcnt;
4227             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4228             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4229 	    STORE_NUMBER (p1, mcnt);
4230             break;
4231           }
4232 
4233         case wordbound:
4234           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4235           if (AT_WORD_BOUNDARY (d))
4236 	    break;
4237           goto fail;
4238 
4239 	case notwordbound:
4240           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4241 	  if (AT_WORD_BOUNDARY (d))
4242 	    goto fail;
4243           break;
4244 
4245 	case wordbeg:
4246           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4247 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4248 	    break;
4249           goto fail;
4250 
4251 	case wordend:
4252           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4253 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4254               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4255 	    break;
4256           goto fail;
4257 
4258 #ifdef emacs
4259   	case before_dot:
4260           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4261  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4262   	    goto fail;
4263   	  break;
4264 
4265   	case at_dot:
4266           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4267  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4268   	    goto fail;
4269   	  break;
4270 
4271   	case after_dot:
4272           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4273           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4274   	    goto fail;
4275   	  break;
4276 
4277 	case syntaxspec:
4278           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4279 	  mcnt = *p++;
4280 	  goto matchsyntax;
4281 
4282         case wordchar:
4283           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4284 	  mcnt = (int) Sword;
4285         matchsyntax:
4286 	  PREFETCH ();
4287 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4288             goto fail;
4289 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4290 	  d++;
4291 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4292 	    goto fail;
4293           SET_REGS_MATCHED ();
4294 	  break;
4295 
4296 	case notsyntaxspec:
4297           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4298 	  mcnt = *p++;
4299 	  goto matchnotsyntax;
4300 
4301         case notwordchar:
4302           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4303 	  mcnt = (int) Sword;
4304         matchnotsyntax:
4305 	  PREFETCH ();
4306 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4307 	  d++;
4308 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4309 	    goto fail;
4310 	  SET_REGS_MATCHED ();
4311           break;
4312 
4313 #else /* not emacs */
4314 	case wordchar:
4315           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4316 	  PREFETCH ();
4317           if (!WORDCHAR_P (d))
4318             goto fail;
4319 	  SET_REGS_MATCHED ();
4320           d++;
4321 	  break;
4322 
4323 	case notwordchar:
4324           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4325 	  PREFETCH ();
4326 	  if (WORDCHAR_P (d))
4327             goto fail;
4328           SET_REGS_MATCHED ();
4329           d++;
4330 	  break;
4331 #endif /* not emacs */
4332 
4333         default:
4334           abort ();
4335 	}
4336       continue;  /* Successfully executed one pattern command; keep going.  */
4337 
4338 
4339     /* We goto here if a matching operation fails. */
4340     fail:
4341       if (!FAIL_STACK_EMPTY ())
4342 	{ /* A restart point is known.  Restore to that state.  */
4343           DEBUG_PRINT1 ("\nFAIL:\n");
4344           POP_FAILURE_POINT (d, p,
4345                              lowest_active_reg, highest_active_reg,
4346                              regstart, regend, reg_info);
4347 
4348           /* If this failure point is a dummy, try the next one.  */
4349           if (!p)
4350 	    goto fail;
4351 
4352           /* If we failed to the end of the pattern, don't examine *p.  */
4353 	  assert (p <= pend);
4354           if (p < pend)
4355             {
4356               boolean is_a_jump_n = false;
4357 
4358               /* If failed to a backwards jump that's part of a repetition
4359                  loop, need to pop this failure point and use the next one.  */
4360               switch ((re_opcode_t) *p)
4361                 {
4362                 case jump_n:
4363                   is_a_jump_n = true;
4364                 case maybe_pop_jump:
4365                 case pop_failure_jump:
4366                 case jump:
4367                   p1 = p + 1;
4368                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4369                   p1 += mcnt;
4370 
4371                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4372                       || (!is_a_jump_n
4373                           && (re_opcode_t) *p1 == on_failure_jump))
4374                     goto fail;
4375                   break;
4376                 default:
4377                   /* do nothing */ ;
4378                 }
4379             }
4380 
4381           if (d >= string1 && d <= end1)
4382 	    dend = end_match_1;
4383         }
4384       else
4385         break;   /* Matching at this starting point really fails.  */
4386     } /* for (;;) */
4387 
4388   if (best_regs_set)
4389     goto restore_best_regs;
4390 
4391   FREE_VARIABLES ();
4392 
4393   return -1;         			/* Failure to match.  */
4394 } /* re_match_2 */
4395 
4396 /* Subroutine definitions for re_match_2.  */
4397 
4398 
4399 /* We are passed P pointing to a register number after a start_memory.
4400 
4401    Return true if the pattern up to the corresponding stop_memory can
4402    match the empty string, and false otherwise.
4403 
4404    If we find the matching stop_memory, sets P to point to one past its number.
4405    Otherwise, sets P to an undefined byte less than or equal to END.
4406 
4407    We don't handle duplicates properly (yet).  */
4408 
4409 static boolean
4410 group_match_null_string_p (p, end, reg_info)
4411     unsigned char **p, *end;
4412     register_info_type *reg_info;
4413 {
4414   int mcnt;
4415   /* Point to after the args to the start_memory.  */
4416   unsigned char *p1 = *p + 2;
4417 
4418   while (p1 < end)
4419     {
4420       /* Skip over opcodes that can match nothing, and return true or
4421 	 false, as appropriate, when we get to one that can't, or to the
4422          matching stop_memory.  */
4423 
4424       switch ((re_opcode_t) *p1)
4425         {
4426         /* Could be either a loop or a series of alternatives.  */
4427         case on_failure_jump:
4428           p1++;
4429           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4430 
4431           /* If the next operation is not a jump backwards in the
4432 	     pattern.  */
4433 
4434 	  if (mcnt >= 0)
4435 	    {
4436               /* Go through the on_failure_jumps of the alternatives,
4437                  seeing if any of the alternatives cannot match nothing.
4438                  The last alternative starts with only a jump,
4439                  whereas the rest start with on_failure_jump and end
4440                  with a jump, e.g., here is the pattern for `a|b|c':
4441 
4442                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4443                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4444                  /exactn/1/c
4445 
4446                  So, we have to first go through the first (n-1)
4447                  alternatives and then deal with the last one separately.  */
4448 
4449 
4450               /* Deal with the first (n-1) alternatives, which start
4451                  with an on_failure_jump (see above) that jumps to right
4452                  past a jump_past_alt.  */
4453 
4454               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4455                 {
4456                   /* `mcnt' holds how many bytes long the alternative
4457                      is, including the ending `jump_past_alt' and
4458                      its number.  */
4459 
4460                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4461 				                      reg_info))
4462                     return false;
4463 
4464                   /* Move to right after this alternative, including the
4465 		     jump_past_alt.  */
4466                   p1 += mcnt;
4467 
4468                   /* Break if it's the beginning of an n-th alternative
4469                      that doesn't begin with an on_failure_jump.  */
4470                   if ((re_opcode_t) *p1 != on_failure_jump)
4471                     break;
4472 
4473 		  /* Still have to check that it's not an n-th
4474 		     alternative that starts with an on_failure_jump.  */
4475 		  p1++;
4476                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4477                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4478                     {
4479 		      /* Get to the beginning of the n-th alternative.  */
4480                       p1 -= 3;
4481                       break;
4482                     }
4483                 }
4484 
4485               /* Deal with the last alternative: go back and get number
4486                  of the `jump_past_alt' just before it.  `mcnt' contains
4487                  the length of the alternative.  */
4488               EXTRACT_NUMBER (mcnt, p1 - 2);
4489 
4490               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4491                 return false;
4492 
4493               p1 += mcnt;	/* Get past the n-th alternative.  */
4494             } /* if mcnt > 0 */
4495           break;
4496 
4497 
4498         case stop_memory:
4499 	  assert (p1[1] == **p);
4500           *p = p1 + 2;
4501           return true;
4502 
4503 
4504         default:
4505           if (!common_op_match_null_string_p (&p1, end, reg_info))
4506             return false;
4507         }
4508     } /* while p1 < end */
4509 
4510   return false;
4511 } /* group_match_null_string_p */
4512 
4513 
4514 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4515    It expects P to be the first byte of a single alternative and END one
4516    byte past the last. The alternative can contain groups.  */
4517 
4518 static boolean
4519 alt_match_null_string_p (p, end, reg_info)
4520     unsigned char *p, *end;
4521     register_info_type *reg_info;
4522 {
4523   int mcnt;
4524   unsigned char *p1 = p;
4525 
4526   while (p1 < end)
4527     {
4528       /* Skip over opcodes that can match nothing, and break when we get
4529          to one that can't.  */
4530 
4531       switch ((re_opcode_t) *p1)
4532         {
4533 	/* It's a loop.  */
4534         case on_failure_jump:
4535           p1++;
4536           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4537           p1 += mcnt;
4538           break;
4539 
4540 	default:
4541           if (!common_op_match_null_string_p (&p1, end, reg_info))
4542             return false;
4543         }
4544     }  /* while p1 < end */
4545 
4546   return true;
4547 } /* alt_match_null_string_p */
4548 
4549 
4550 /* Deals with the ops common to group_match_null_string_p and
4551    alt_match_null_string_p.
4552 
4553    Sets P to one after the op and its arguments, if any.  */
4554 
4555 static boolean
4556 common_op_match_null_string_p (p, end, reg_info)
4557     unsigned char **p, *end;
4558     register_info_type *reg_info;
4559 {
4560   int mcnt;
4561   boolean ret;
4562   int reg_no;
4563   unsigned char *p1 = *p;
4564 
4565   switch ((re_opcode_t) *p1++)
4566     {
4567     case no_op:
4568     case begline:
4569     case endline:
4570     case begbuf:
4571     case endbuf:
4572     case wordbeg:
4573     case wordend:
4574     case wordbound:
4575     case notwordbound:
4576 #ifdef emacs
4577     case before_dot:
4578     case at_dot:
4579     case after_dot:
4580 #endif
4581       break;
4582 
4583     case start_memory:
4584       reg_no = *p1;
4585       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4586       ret = group_match_null_string_p (&p1, end, reg_info);
4587 
4588       /* Have to set this here in case we're checking a group which
4589          contains a group and a back reference to it.  */
4590 
4591       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4592         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4593 
4594       if (!ret)
4595         return false;
4596       break;
4597 
4598     /* If this is an optimized succeed_n for zero times, make the jump.  */
4599     case jump:
4600       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4601       if (mcnt >= 0)
4602         p1 += mcnt;
4603       else
4604         return false;
4605       break;
4606 
4607     case succeed_n:
4608       /* Get to the number of times to succeed.  */
4609       p1 += 2;
4610       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4611 
4612       if (mcnt == 0)
4613         {
4614           p1 -= 4;
4615           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4616           p1 += mcnt;
4617         }
4618       else
4619         return false;
4620       break;
4621 
4622     case duplicate:
4623       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4624         return false;
4625       break;
4626 
4627     case set_number_at:
4628       p1 += 4;
4629 
4630     default:
4631       /* All other opcodes mean we cannot match the empty string.  */
4632       return false;
4633   }
4634 
4635   *p = p1;
4636   return true;
4637 } /* common_op_match_null_string_p */
4638 
4639 
4640 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4641    bytes; nonzero otherwise.  */
4642 
4643 static int
4644 bcmp_translate (s1, s2, len, translate)
4645      unsigned char *s1, *s2;
4646      register int len;
4647      char *translate;
4648 {
4649   register unsigned char *p1 = s1, *p2 = s2;
4650   while (len)
4651     {
4652       if (translate[*p1++] != translate[*p2++]) return 1;
4653       len--;
4654     }
4655   return 0;
4656 }
4657 
4658 /* Entry points for GNU code.  */
4659 
4660 /* re_compile_pattern is the GNU regular expression compiler: it
4661    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4662    Returns 0 if the pattern was valid, otherwise an error string.
4663 
4664    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4665    are set in BUFP on entry.
4666 
4667    We call regex_compile to do the actual compilation.  */
4668 
4669 const char *
4670 re_compile_pattern (pattern, length, bufp)
4671      const char *pattern;
4672      int length;
4673      struct re_pattern_buffer *bufp;
4674 {
4675   reg_errcode_t ret;
4676 
4677   /* GNU code is written to assume at least RE_NREGS registers will be set
4678      (and at least one extra will be -1).  */
4679   bufp->regs_allocated = REGS_UNALLOCATED;
4680 
4681   /* And GNU code determines whether or not to get register information
4682      by passing null for the REGS argument to re_match, etc., not by
4683      setting no_sub.  */
4684   bufp->no_sub = 0;
4685 
4686   /* Match anchors at newline.  */
4687   bufp->newline_anchor = 1;
4688 
4689   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4690 
4691   return re_error_msg[(int) ret];
4692 }
4693 
4694 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4695    them if this is an Emacs or POSIX compilation.  */
4696 
4697 #if !defined (emacs)
4698 
4699 /* BSD has one and only one pattern buffer.  */
4700 static struct re_pattern_buffer re_comp_buf;
4701 
4702 char *
4703 re_comp (s)
4704     const char *s;
4705 {
4706   reg_errcode_t ret;
4707 
4708   if (!s)
4709     {
4710       if (!re_comp_buf.buffer)
4711 	return "No previous regular expression";
4712       return 0;
4713     }
4714 
4715   if (!re_comp_buf.buffer)
4716     {
4717       re_comp_buf.buffer = (unsigned char *) malloc (200);
4718       if (re_comp_buf.buffer == NULL)
4719         return "Memory exhausted";
4720       re_comp_buf.allocated = 200;
4721 
4722       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4723       if (re_comp_buf.fastmap == NULL)
4724 	return "Memory exhausted";
4725     }
4726 
4727   /* Since `re_exec' always passes NULL for the `regs' argument, we
4728      don't need to initialize the pattern buffer fields which affect it.  */
4729 
4730   /* Match anchors at newlines.  */
4731   re_comp_buf.newline_anchor = 1;
4732 
4733   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4734 
4735   /* Yes, we're discarding `const' here.  */
4736   return (char *) re_error_msg[(int) ret];
4737 }
4738 
4739 
4740 int
4741 re_exec (s)
4742     const char *s;
4743 {
4744   const int len = strlen (s);
4745   return
4746     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4747 }
4748 #endif /* not emacs and not _POSIX_SOURCE */
4749 
4750 /* POSIX.2 functions.  Don't define these for Emacs.  */
4751 
4752 #ifndef emacs
4753 
4754 /* regcomp takes a regular expression as a string and compiles it.
4755 
4756    PREG is a regex_t *.  We do not expect any fields to be initialized,
4757    since POSIX says we shouldn't.  Thus, we set
4758 
4759      `buffer' to the compiled pattern;
4760      `used' to the length of the compiled pattern;
4761      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4762        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4763        RE_SYNTAX_POSIX_BASIC;
4764      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4765      `fastmap' and `fastmap_accurate' to zero;
4766      `re_nsub' to the number of subexpressions in PATTERN.
4767 
4768    PATTERN is the address of the pattern string.
4769 
4770    CFLAGS is a series of bits which affect compilation.
4771 
4772      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4773      use POSIX basic syntax.
4774 
4775      If REG_NEWLINE is set, then . and [^...] don't match newline.
4776      Also, regexec will try a match beginning after every newline.
4777 
4778      If REG_ICASE is set, then we considers upper- and lowercase
4779      versions of letters to be equivalent when matching.
4780 
4781      If REG_NOSUB is set, then when PREG is passed to regexec, that
4782      routine will report only success or failure, and nothing about the
4783      registers.
4784 
4785    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4786    the return codes and their meanings.)  */
4787 
4788 int
4789 regcomp (preg, pattern, cflags)
4790     regex_t *preg;
4791     const char *pattern;
4792     int cflags;
4793 {
4794   reg_errcode_t ret;
4795   unsigned syntax
4796     = (cflags & REG_EXTENDED) ?
4797       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4798 
4799   /* regex_compile will allocate the space for the compiled pattern.  */
4800   preg->buffer = 0;
4801   preg->allocated = 0;
4802 
4803   /* Don't bother to use a fastmap when searching.  This simplifies the
4804      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4805      characters after newlines into the fastmap.  This way, we just try
4806      every character.  */
4807   preg->fastmap = 0;
4808 
4809   if (cflags & REG_ICASE)
4810     {
4811       unsigned i;
4812 
4813       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4814       if (preg->translate == NULL)
4815         return (int) REG_ESPACE;
4816 
4817       /* Map uppercase characters to corresponding lowercase ones.  */
4818       for (i = 0; i < CHAR_SET_SIZE; i++)
4819         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4820     }
4821   else
4822     preg->translate = NULL;
4823 
4824   /* If REG_NEWLINE is set, newlines are treated differently.  */
4825   if (cflags & REG_NEWLINE)
4826     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4827       syntax &= ~RE_DOT_NEWLINE;
4828       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4829       /* It also changes the matching behavior.  */
4830       preg->newline_anchor = 1;
4831     }
4832   else
4833     preg->newline_anchor = 0;
4834 
4835   preg->no_sub = !!(cflags & REG_NOSUB);
4836 
4837   /* POSIX says a null character in the pattern terminates it, so we
4838      can use strlen here in compiling the pattern.  */
4839   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4840 
4841   /* POSIX doesn't distinguish between an unmatched open-group and an
4842      unmatched close-group: both are REG_EPAREN.  */
4843   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4844 
4845   return (int) ret;
4846 }
4847 
4848 
4849 /* regexec searches for a given pattern, specified by PREG, in the
4850    string STRING.
4851 
4852    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4853    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4854    least NMATCH elements, and we set them to the offsets of the
4855    corresponding matched substrings.
4856 
4857    EFLAGS specifies `execution flags' which affect matching: if
4858    REG_NOTBOL is set, then ^ does not match at the beginning of the
4859    string; if REG_NOTEOL is set, then $ does not match at the end.
4860 
4861    We return 0 if we find a match and REG_NOMATCH if not.  */
4862 
4863 int
4864 regexec (preg, string, nmatch, pmatch, eflags)
4865     const regex_t *preg;
4866     const char *string;
4867     size_t nmatch;
4868     regmatch_t pmatch[];
4869     int eflags;
4870 {
4871   int ret;
4872   struct re_registers regs;
4873   regex_t private_preg;
4874   int len = strlen (string);
4875   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4876 
4877   private_preg = *preg;
4878 
4879   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4880   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4881 
4882   /* The user has told us exactly how many registers to return
4883      information about, via `nmatch'.  We have to pass that on to the
4884      matching routines.  */
4885   private_preg.regs_allocated = REGS_FIXED;
4886 
4887   if (want_reg_info)
4888     {
4889       regs.num_regs = nmatch;
4890       regs.start = TALLOC (nmatch, regoff_t);
4891       regs.end = TALLOC (nmatch, regoff_t);
4892       if (regs.start == NULL || regs.end == NULL)
4893         return (int) REG_NOMATCH;
4894     }
4895 
4896   /* Perform the searching operation.  */
4897   ret = re_search (&private_preg, string, len,
4898                    /* start: */ 0, /* range: */ len,
4899                    want_reg_info ? &regs : (struct re_registers *) 0);
4900 
4901   /* Copy the register information to the POSIX structure.  */
4902   if (want_reg_info)
4903     {
4904       if (ret >= 0)
4905         {
4906           unsigned r;
4907 
4908           for (r = 0; r < nmatch; r++)
4909             {
4910               pmatch[r].rm_so = regs.start[r];
4911               pmatch[r].rm_eo = regs.end[r];
4912             }
4913         }
4914 
4915       /* If we needed the temporary register info, free the space now.  */
4916       free (regs.start);
4917       free (regs.end);
4918     }
4919 
4920   /* We want zero return to mean success, unlike `re_search'.  */
4921   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4922 }
4923 
4924 
4925 /* Returns a message corresponding to an error code, ERRCODE, returned
4926    from either regcomp or regexec.   We don't use PREG here.  */
4927 
4928 size_t
4929 regerror (errcode, preg, errbuf, errbuf_size)
4930     int errcode;
4931     const regex_t *preg;
4932     char *errbuf;
4933     size_t errbuf_size;
4934 {
4935   const char *msg;
4936   size_t msg_size;
4937 
4938   if (errcode < 0
4939       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4940     /* Only error codes returned by the rest of the code should be passed
4941        to this routine.  If we are given anything else, or if other regex
4942        code generates an invalid error code, then the program has a bug.
4943        Dump core so we can fix it.  */
4944     abort ();
4945 
4946   msg = re_error_msg[errcode];
4947 
4948   /* POSIX doesn't require that we do anything in this case, but why
4949      not be nice.  */
4950   if (! msg)
4951     msg = "Success";
4952 
4953   msg_size = strlen (msg) + 1; /* Includes the null.  */
4954 
4955   if (errbuf_size != 0)
4956     {
4957       if (msg_size > errbuf_size)
4958         {
4959           strncpy (errbuf, msg, errbuf_size - 1);
4960           errbuf[errbuf_size - 1] = 0;
4961         }
4962       else
4963         strcpy (errbuf, msg);
4964     }
4965 
4966   return msg_size;
4967 }
4968 
4969 
4970 /* Free dynamically allocated space used by PREG.  */
4971 
4972 void
4973 regfree (preg)
4974     regex_t *preg;
4975 {
4976   if (preg->buffer != NULL)
4977     free (preg->buffer);
4978   preg->buffer = NULL;
4979 
4980   preg->allocated = 0;
4981   preg->used = 0;
4982 
4983   if (preg->fastmap != NULL)
4984     free (preg->fastmap);
4985   preg->fastmap = NULL;
4986   preg->fastmap_accurate = 0;
4987 
4988   if (preg->translate != NULL)
4989     free (preg->translate);
4990   preg->translate = NULL;
4991 }
4992 
4993 #endif /* not emacs  */
4994