xref: /openbsd-src/gnu/usr.bin/cvs/lib/regex.c (revision c26070a5a87b8b908afc23542b77914040a7b4e9)
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /* Trying to define this in the makefile would get hairy, unless we can
23    more gracefully do it for NT, OS/2, unix, etc.  */
24 #define REGEX_MALLOC 1
25 
26 /* AIX requires this to be the first thing in the file. */
27 #if defined (_AIX) && !defined (REGEX_MALLOC)
28   #pragma alloca
29 #endif
30 
31 #define _GNU_SOURCE
32 
33 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
34 #include <sys/types.h>
35 
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39 
40 /* The `emacs' switch turns on certain matching commands
41    that make sense only in Emacs. */
42 #ifdef emacs
43 
44 #include "lisp.h"
45 #include "buffer.h"
46 #include "syntax.h"
47 
48 /* Emacs uses `NULL' as a predicate.  */
49 #undef NULL
50 
51 #else  /* not emacs */
52 
53 /* We used to test for `BSTRING' here, but only GCC and Emacs define
54    `BSTRING', as far as I know, and neither of them use this code.  */
55 #if HAVE_STRING_H || STDC_HEADERS
56 #include <string.h>
57 #ifndef bcmp
58 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
59 #endif
60 #ifndef bcopy
61 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
62 #endif
63 #ifndef bzero
64 #define bzero(s, n)	memset ((s), 0, (n))
65 #endif
66 #else
67 #include <strings.h>
68 #endif
69 
70 #ifdef STDC_HEADERS
71 #include <stdlib.h>
72 #else
73 char *malloc ();
74 char *realloc ();
75 #endif
76 
77 
78 /* Define the syntax stuff for \<, \>, etc.  */
79 
80 /* This must be nonzero for the wordchar and notwordchar pattern
81    commands in re_match_2.  */
82 #ifndef Sword
83 #define Sword 1
84 #endif
85 
86 #ifdef SYNTAX_TABLE
87 
88 extern char *re_syntax_table;
89 
90 #else /* not SYNTAX_TABLE */
91 
92 /* How many characters in the character set.  */
93 #define CHAR_SET_SIZE 256
94 
95 static char re_syntax_table[CHAR_SET_SIZE];
96 
97 static void
98 init_syntax_once ()
99 {
100    register int c;
101    static int done = 0;
102 
103    if (done)
104      return;
105 
106    bzero (re_syntax_table, sizeof re_syntax_table);
107 
108    for (c = 'a'; c <= 'z'; c++)
109      re_syntax_table[c] = Sword;
110 
111    for (c = 'A'; c <= 'Z'; c++)
112      re_syntax_table[c] = Sword;
113 
114    for (c = '0'; c <= '9'; c++)
115      re_syntax_table[c] = Sword;
116 
117    re_syntax_table['_'] = Sword;
118 
119    done = 1;
120 }
121 
122 #endif /* not SYNTAX_TABLE */
123 
124 #define SYNTAX(c) re_syntax_table[c]
125 
126 #endif /* not emacs */
127 
128 /* Get the interface, including the syntax bits.  */
129 #include "regex.h"
130 
131 /* isalpha etc. are used for the character classes.  */
132 #include <ctype.h>
133 
134 #ifndef isascii
135 #define isascii(c) 1
136 #endif
137 
138 #ifdef isblank
139 #define ISBLANK(c) (isascii (c) && isblank (c))
140 #else
141 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
142 #endif
143 #ifdef isgraph
144 #define ISGRAPH(c) (isascii (c) && isgraph (c))
145 #else
146 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
147 #endif
148 
149 #define ISPRINT(c) (isascii (c) && isprint (c))
150 #define ISDIGIT(c) (isascii (c) && isdigit (c))
151 #define ISALNUM(c) (isascii (c) && isalnum (c))
152 #define ISALPHA(c) (isascii (c) && isalpha (c))
153 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
154 #define ISLOWER(c) (isascii (c) && islower (c))
155 #define ISPUNCT(c) (isascii (c) && ispunct (c))
156 #define ISSPACE(c) (isascii (c) && isspace (c))
157 #define ISUPPER(c) (isascii (c) && isupper (c))
158 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
159 
160 #ifndef NULL
161 #define NULL 0
162 #endif
163 
164 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
165    since ours (we hope) works properly with all combinations of
166    machines, compilers, `char' and `unsigned char' argument types.
167    (Per Bothner suggested the basic approach.)  */
168 #undef SIGN_EXTEND_CHAR
169 #if __STDC__
170 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
171 #else  /* not __STDC__ */
172 /* As in Harbison and Steele.  */
173 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
174 #endif
175 
176 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
177    use `alloca' instead of `malloc'.  This is because using malloc in
178    re_search* or re_match* could cause memory leaks when C-g is used in
179    Emacs; also, malloc is slower and causes storage fragmentation.  On
180    the other hand, malloc is more portable, and easier to debug.
181 
182    Because we sometimes use alloca, some routines have to be macros,
183    not functions -- `alloca'-allocated space disappears at the end of the
184    function it is called in.  */
185 
186 #ifdef REGEX_MALLOC
187 
188 #define REGEX_ALLOCATE malloc
189 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
190 
191 #else /* not REGEX_MALLOC  */
192 
193 /* Emacs already defines alloca, sometimes.  */
194 #ifndef alloca
195 
196 /* Make alloca work the best possible way.  */
197 #ifdef __GNUC__
198 #define alloca __builtin_alloca
199 #else /* not __GNUC__ */
200 #if HAVE_ALLOCA_H
201 #include <alloca.h>
202 #else /* not __GNUC__ or HAVE_ALLOCA_H */
203 #ifndef _AIX /* Already did AIX, up at the top.  */
204 char *alloca ();
205 #endif /* not _AIX */
206 #endif /* not HAVE_ALLOCA_H */
207 #endif /* not __GNUC__ */
208 
209 #endif /* not alloca */
210 
211 #define REGEX_ALLOCATE alloca
212 
213 /* Assumes a `char *destination' variable.  */
214 #define REGEX_REALLOCATE(source, osize, nsize)				\
215   (destination = (char *) alloca (nsize),				\
216    bcopy (source, destination, osize),					\
217    destination)
218 
219 #endif /* not REGEX_MALLOC */
220 
221 
222 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
223    `string1' or just past its end.  This works if PTR is NULL, which is
224    a good thing.  */
225 #define FIRST_STRING_P(ptr) 					\
226   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
227 
228 /* (Re)Allocate N items of type T using malloc, or fail.  */
229 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
230 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
231 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
232 
233 #define BYTEWIDTH 8 /* In bits.  */
234 
235 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
236 
237 #define MAX(a, b) ((a) > (b) ? (a) : (b))
238 #define MIN(a, b) ((a) < (b) ? (a) : (b))
239 
240 typedef char boolean;
241 #define false 0
242 #define true 1
243 
244 /* These are the command codes that appear in compiled regular
245    expressions.  Some opcodes are followed by argument bytes.  A
246    command code can specify any interpretation whatsoever for its
247    arguments.  Zero bytes may appear in the compiled regular expression.
248 
249    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
250    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
251    `exactn' we use here must also be 1.  */
252 
253 typedef enum
254 {
255   no_op = 0,
256 
257         /* Followed by one byte giving n, then by n literal bytes.  */
258   exactn = 1,
259 
260         /* Matches any (more or less) character.  */
261   anychar,
262 
263         /* Matches any one char belonging to specified set.  First
264            following byte is number of bitmap bytes.  Then come bytes
265            for a bitmap saying which chars are in.  Bits in each byte
266            are ordered low-bit-first.  A character is in the set if its
267            bit is 1.  A character too large to have a bit in the map is
268            automatically not in the set.  */
269   charset,
270 
271         /* Same parameters as charset, but match any character that is
272            not one of those specified.  */
273   charset_not,
274 
275         /* Start remembering the text that is matched, for storing in a
276            register.  Followed by one byte with the register number, in
277            the range 0 to one less than the pattern buffer's re_nsub
278            field.  Then followed by one byte with the number of groups
279            inner to this one.  (This last has to be part of the
280            start_memory only because we need it in the on_failure_jump
281            of re_match_2.)  */
282   start_memory,
283 
284         /* Stop remembering the text that is matched and store it in a
285            memory register.  Followed by one byte with the register
286            number, in the range 0 to one less than `re_nsub' in the
287            pattern buffer, and one byte with the number of inner groups,
288            just like `start_memory'.  (We need the number of inner
289            groups here because we don't have any easy way of finding the
290            corresponding start_memory when we're at a stop_memory.)  */
291   stop_memory,
292 
293         /* Match a duplicate of something remembered. Followed by one
294            byte containing the register number.  */
295   duplicate,
296 
297         /* Fail unless at beginning of line.  */
298   begline,
299 
300         /* Fail unless at end of line.  */
301   endline,
302 
303         /* Succeeds if at beginning of buffer (if emacs) or at beginning
304            of string to be matched (if not).  */
305   begbuf,
306 
307         /* Analogously, for end of buffer/string.  */
308   endbuf,
309 
310         /* Followed by two byte relative address to which to jump.  */
311   jump,
312 
313 	/* Same as jump, but marks the end of an alternative.  */
314   jump_past_alt,
315 
316         /* Followed by two-byte relative address of place to resume at
317            in case of failure.  */
318   on_failure_jump,
319 
320         /* Like on_failure_jump, but pushes a placeholder instead of the
321            current string position when executed.  */
322   on_failure_keep_string_jump,
323 
324         /* Throw away latest failure point and then jump to following
325            two-byte relative address.  */
326   pop_failure_jump,
327 
328         /* Change to pop_failure_jump if know won't have to backtrack to
329            match; otherwise change to jump.  This is used to jump
330            back to the beginning of a repeat.  If what follows this jump
331            clearly won't match what the repeat does, such that we can be
332            sure that there is no use backtracking out of repetitions
333            already matched, then we change it to a pop_failure_jump.
334            Followed by two-byte address.  */
335   maybe_pop_jump,
336 
337         /* Jump to following two-byte address, and push a dummy failure
338            point. This failure point will be thrown away if an attempt
339            is made to use it for a failure.  A `+' construct makes this
340            before the first repeat.  Also used as an intermediary kind
341            of jump when compiling an alternative.  */
342   dummy_failure_jump,
343 
344 	/* Push a dummy failure point and continue.  Used at the end of
345 	   alternatives.  */
346   push_dummy_failure,
347 
348         /* Followed by two-byte relative address and two-byte number n.
349            After matching N times, jump to the address upon failure.  */
350   succeed_n,
351 
352         /* Followed by two-byte relative address, and two-byte number n.
353            Jump to the address N times, then fail.  */
354   jump_n,
355 
356         /* Set the following two-byte relative address to the
357            subsequent two-byte number.  The address *includes* the two
358            bytes of number.  */
359   set_number_at,
360 
361   wordchar,	/* Matches any word-constituent character.  */
362   notwordchar,	/* Matches any char that is not a word-constituent.  */
363 
364   wordbeg,	/* Succeeds if at word beginning.  */
365   wordend,	/* Succeeds if at word end.  */
366 
367   wordbound,	/* Succeeds if at a word boundary.  */
368   notwordbound	/* Succeeds if not at a word boundary.  */
369 
370 #ifdef emacs
371   ,before_dot,	/* Succeeds if before point.  */
372   at_dot,	/* Succeeds if at point.  */
373   after_dot,	/* Succeeds if after point.  */
374 
375 	/* Matches any character whose syntax is specified.  Followed by
376            a byte which contains a syntax code, e.g., Sword.  */
377   syntaxspec,
378 
379 	/* Matches any character whose syntax is not that specified.  */
380   notsyntaxspec
381 #endif /* emacs */
382 } re_opcode_t;
383 
384 /* Common operations on the compiled pattern.  */
385 
386 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
387 
388 #define STORE_NUMBER(destination, number)				\
389   do {									\
390     (destination)[0] = (number) & 0377;					\
391     (destination)[1] = (number) >> 8;					\
392   } while (0)
393 
394 /* Same as STORE_NUMBER, except increment DESTINATION to
395    the byte after where the number is stored.  Therefore, DESTINATION
396    must be an lvalue.  */
397 
398 #define STORE_NUMBER_AND_INCR(destination, number)			\
399   do {									\
400     STORE_NUMBER (destination, number);					\
401     (destination) += 2;							\
402   } while (0)
403 
404 /* Put into DESTINATION a number stored in two contiguous bytes starting
405    at SOURCE.  */
406 
407 #define EXTRACT_NUMBER(destination, source)				\
408   do {									\
409     (destination) = *(source) & 0377;					\
410     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
411   } while (0)
412 
413 #ifdef DEBUG
414 static void
415 extract_number (dest, source)
416     int *dest;
417     unsigned char *source;
418 {
419   int temp = SIGN_EXTEND_CHAR (*(source + 1));
420   *dest = *source & 0377;
421   *dest += temp << 8;
422 }
423 
424 #ifndef EXTRACT_MACROS /* To debug the macros.  */
425 #undef EXTRACT_NUMBER
426 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
427 #endif /* not EXTRACT_MACROS */
428 
429 #endif /* DEBUG */
430 
431 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
432    SOURCE must be an lvalue.  */
433 
434 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
435   do {									\
436     EXTRACT_NUMBER (destination, source);				\
437     (source) += 2; 							\
438   } while (0)
439 
440 #ifdef DEBUG
441 static void
442 extract_number_and_incr (destination, source)
443     int *destination;
444     unsigned char **source;
445 {
446   extract_number (destination, *source);
447   *source += 2;
448 }
449 
450 #ifndef EXTRACT_MACROS
451 #undef EXTRACT_NUMBER_AND_INCR
452 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
453   extract_number_and_incr (&dest, &src)
454 #endif /* not EXTRACT_MACROS */
455 
456 #endif /* DEBUG */
457 
458 /* If DEBUG is defined, Regex prints many voluminous messages about what
459    it is doing (if the variable `debug' is nonzero).  If linked with the
460    main program in `iregex.c', you can enter patterns and strings
461    interactively.  And if linked with the main program in `main.c' and
462    the other test files, you can run the already-written tests.  */
463 
464 #ifdef DEBUG
465 
466 /* We use standard I/O for debugging.  */
467 #include <stdio.h>
468 
469 /* It is useful to test things that ``must'' be true when debugging.  */
470 #include <assert.h>
471 
472 static int debug = 0;
473 
474 #define DEBUG_STATEMENT(e) e
475 #define DEBUG_PRINT1(x) if (debug) printf (x)
476 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
477 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
478 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
479 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
480   if (debug) print_partial_compiled_pattern (s, e)
481 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
482   if (debug) print_double_string (w, s1, sz1, s2, sz2)
483 
484 
485 extern void printchar ();
486 
487 /* Print the fastmap in human-readable form.  */
488 
489 void
490 print_fastmap (fastmap)
491     char *fastmap;
492 {
493   unsigned was_a_range = 0;
494   unsigned i = 0;
495 
496   while (i < (1 << BYTEWIDTH))
497     {
498       if (fastmap[i++])
499 	{
500 	  was_a_range = 0;
501           printchar (i - 1);
502           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
503             {
504               was_a_range = 1;
505               i++;
506             }
507 	  if (was_a_range)
508             {
509               printf ("-");
510               printchar (i - 1);
511             }
512         }
513     }
514   putchar ('\n');
515 }
516 
517 
518 /* Print a compiled pattern string in human-readable form, starting at
519    the START pointer into it and ending just before the pointer END.  */
520 
521 void
522 print_partial_compiled_pattern (start, end)
523     unsigned char *start;
524     unsigned char *end;
525 {
526   int mcnt, mcnt2;
527   unsigned char *p = start;
528   unsigned char *pend = end;
529 
530   if (start == NULL)
531     {
532       printf ("(null)\n");
533       return;
534     }
535 
536   /* Loop over pattern commands.  */
537   while (p < pend)
538     {
539       switch ((re_opcode_t) *p++)
540 	{
541         case no_op:
542           printf ("/no_op");
543           break;
544 
545 	case exactn:
546 	  mcnt = *p++;
547           printf ("/exactn/%d", mcnt);
548           do
549 	    {
550               putchar ('/');
551 	      printchar (*p++);
552             }
553           while (--mcnt);
554           break;
555 
556 	case start_memory:
557           mcnt = *p++;
558           printf ("/start_memory/%d/%d", mcnt, *p++);
559           break;
560 
561 	case stop_memory:
562           mcnt = *p++;
563 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
564           break;
565 
566 	case duplicate:
567 	  printf ("/duplicate/%d", *p++);
568 	  break;
569 
570 	case anychar:
571 	  printf ("/anychar");
572 	  break;
573 
574 	case charset:
575         case charset_not:
576           {
577             register int c;
578 
579             printf ("/charset%s",
580 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
581 
582             assert (p + *p < pend);
583 
584             for (c = 0; c < *p; c++)
585               {
586                 unsigned bit;
587                 unsigned char map_byte = p[1 + c];
588 
589                 putchar ('/');
590 
591 		for (bit = 0; bit < BYTEWIDTH; bit++)
592                   if (map_byte & (1 << bit))
593                     printchar (c * BYTEWIDTH + bit);
594               }
595 	    p += 1 + *p;
596 	    break;
597 	  }
598 
599 	case begline:
600 	  printf ("/begline");
601           break;
602 
603 	case endline:
604           printf ("/endline");
605           break;
606 
607 	case on_failure_jump:
608           extract_number_and_incr (&mcnt, &p);
609   	  printf ("/on_failure_jump/0/%d", mcnt);
610           break;
611 
612 	case on_failure_keep_string_jump:
613           extract_number_and_incr (&mcnt, &p);
614   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
615           break;
616 
617 	case dummy_failure_jump:
618           extract_number_and_incr (&mcnt, &p);
619   	  printf ("/dummy_failure_jump/0/%d", mcnt);
620           break;
621 
622 	case push_dummy_failure:
623           printf ("/push_dummy_failure");
624           break;
625 
626         case maybe_pop_jump:
627           extract_number_and_incr (&mcnt, &p);
628   	  printf ("/maybe_pop_jump/0/%d", mcnt);
629 	  break;
630 
631         case pop_failure_jump:
632 	  extract_number_and_incr (&mcnt, &p);
633   	  printf ("/pop_failure_jump/0/%d", mcnt);
634 	  break;
635 
636         case jump_past_alt:
637 	  extract_number_and_incr (&mcnt, &p);
638   	  printf ("/jump_past_alt/0/%d", mcnt);
639 	  break;
640 
641         case jump:
642 	  extract_number_and_incr (&mcnt, &p);
643   	  printf ("/jump/0/%d", mcnt);
644 	  break;
645 
646         case succeed_n:
647           extract_number_and_incr (&mcnt, &p);
648           extract_number_and_incr (&mcnt2, &p);
649  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
650           break;
651 
652         case jump_n:
653           extract_number_and_incr (&mcnt, &p);
654           extract_number_and_incr (&mcnt2, &p);
655  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
656           break;
657 
658         case set_number_at:
659           extract_number_and_incr (&mcnt, &p);
660           extract_number_and_incr (&mcnt2, &p);
661  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
662           break;
663 
664         case wordbound:
665 	  printf ("/wordbound");
666 	  break;
667 
668 	case notwordbound:
669 	  printf ("/notwordbound");
670           break;
671 
672 	case wordbeg:
673 	  printf ("/wordbeg");
674 	  break;
675 
676 	case wordend:
677 	  printf ("/wordend");
678 
679 #ifdef emacs
680 	case before_dot:
681 	  printf ("/before_dot");
682           break;
683 
684 	case at_dot:
685 	  printf ("/at_dot");
686           break;
687 
688 	case after_dot:
689 	  printf ("/after_dot");
690           break;
691 
692 	case syntaxspec:
693           printf ("/syntaxspec");
694 	  mcnt = *p++;
695 	  printf ("/%d", mcnt);
696           break;
697 
698 	case notsyntaxspec:
699           printf ("/notsyntaxspec");
700 	  mcnt = *p++;
701 	  printf ("/%d", mcnt);
702 	  break;
703 #endif /* emacs */
704 
705 	case wordchar:
706 	  printf ("/wordchar");
707           break;
708 
709 	case notwordchar:
710 	  printf ("/notwordchar");
711           break;
712 
713 	case begbuf:
714 	  printf ("/begbuf");
715           break;
716 
717 	case endbuf:
718 	  printf ("/endbuf");
719           break;
720 
721         default:
722           printf ("?%d", *(p-1));
723 	}
724     }
725   printf ("/\n");
726 }
727 
728 
729 void
730 print_compiled_pattern (bufp)
731     struct re_pattern_buffer *bufp;
732 {
733   unsigned char *buffer = bufp->buffer;
734 
735   print_partial_compiled_pattern (buffer, buffer + bufp->used);
736   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
737 
738   if (bufp->fastmap_accurate && bufp->fastmap)
739     {
740       printf ("fastmap: ");
741       print_fastmap (bufp->fastmap);
742     }
743 
744   printf ("re_nsub: %d\t", bufp->re_nsub);
745   printf ("regs_alloc: %d\t", bufp->regs_allocated);
746   printf ("can_be_null: %d\t", bufp->can_be_null);
747   printf ("newline_anchor: %d\n", bufp->newline_anchor);
748   printf ("no_sub: %d\t", bufp->no_sub);
749   printf ("not_bol: %d\t", bufp->not_bol);
750   printf ("not_eol: %d\t", bufp->not_eol);
751   printf ("syntax: %d\n", bufp->syntax);
752   /* Perhaps we should print the translate table?  */
753 }
754 
755 
756 void
757 print_double_string (where, string1, size1, string2, size2)
758     const char *where;
759     const char *string1;
760     const char *string2;
761     int size1;
762     int size2;
763 {
764   unsigned this_char;
765 
766   if (where == NULL)
767     printf ("(null)");
768   else
769     {
770       if (FIRST_STRING_P (where))
771         {
772           for (this_char = where - string1; this_char < size1; this_char++)
773             printchar (string1[this_char]);
774 
775           where = string2;
776         }
777 
778       for (this_char = where - string2; this_char < size2; this_char++)
779         printchar (string2[this_char]);
780     }
781 }
782 
783 #else /* not DEBUG */
784 
785 #undef assert
786 #define assert(e)
787 
788 #define DEBUG_STATEMENT(e)
789 #define DEBUG_PRINT1(x)
790 #define DEBUG_PRINT2(x1, x2)
791 #define DEBUG_PRINT3(x1, x2, x3)
792 #define DEBUG_PRINT4(x1, x2, x3, x4)
793 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
794 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
795 
796 #endif /* not DEBUG */
797 
798 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
799    also be assigned to arbitrarily: each pattern buffer stores its own
800    syntax, so it can be changed between regex compilations.  */
801 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
802 
803 
804 /* Specify the precise syntax of regexps for compilation.  This provides
805    for compatibility for various utilities which historically have
806    different, incompatible syntaxes.
807 
808    The argument SYNTAX is a bit mask comprised of the various bits
809    defined in regex.h.  We return the old syntax.  */
810 
811 reg_syntax_t
812 re_set_syntax (syntax)
813     reg_syntax_t syntax;
814 {
815   reg_syntax_t ret = re_syntax_options;
816 
817   re_syntax_options = syntax;
818   return ret;
819 }
820 
821 /* This table gives an error message for each of the error codes listed
822    in regex.h.  Obviously the order here has to be same as there.  */
823 
824 static const char *re_error_msg[] =
825   { NULL,					/* REG_NOERROR */
826     "No match",					/* REG_NOMATCH */
827     "Invalid regular expression",		/* REG_BADPAT */
828     "Invalid collation character",		/* REG_ECOLLATE */
829     "Invalid character class name",		/* REG_ECTYPE */
830     "Trailing backslash",			/* REG_EESCAPE */
831     "Invalid back reference",			/* REG_ESUBREG */
832     "Unmatched [ or [^",			/* REG_EBRACK */
833     "Unmatched ( or \\(",			/* REG_EPAREN */
834     "Unmatched \\{",				/* REG_EBRACE */
835     "Invalid content of \\{\\}",		/* REG_BADBR */
836     "Invalid range end",			/* REG_ERANGE */
837     "Memory exhausted",				/* REG_ESPACE */
838     "Invalid preceding regular expression",	/* REG_BADRPT */
839     "Premature end of regular expression",	/* REG_EEND */
840     "Regular expression too big",		/* REG_ESIZE */
841     "Unmatched ) or \\)",			/* REG_ERPAREN */
842   };
843 
844 /* Subroutine declarations and macros for regex_compile.  */
845 
846 static void store_op1 (), store_op2 ();
847 static void insert_op1 (), insert_op2 ();
848 static boolean at_begline_loc_p (), at_endline_loc_p ();
849 static boolean group_in_compile_stack ();
850 static reg_errcode_t compile_range ();
851 
852 /* Fetch the next character in the uncompiled pattern---translating it
853    if necessary.  Also cast from a signed character in the constant
854    string passed to us by the user to an unsigned char that we can use
855    as an array index (in, e.g., `translate').  */
856 #define PATFETCH(c)							\
857   do {if (p == pend) return REG_EEND;					\
858     c = (unsigned char) *p++;						\
859     if (translate) c = translate[c]; 					\
860   } while (0)
861 
862 /* Fetch the next character in the uncompiled pattern, with no
863    translation.  */
864 #define PATFETCH_RAW(c)							\
865   do {if (p == pend) return REG_EEND;					\
866     c = (unsigned char) *p++; 						\
867   } while (0)
868 
869 /* Go backwards one character in the pattern.  */
870 #define PATUNFETCH p--
871 
872 
873 /* If `translate' is non-null, return translate[D], else just D.  We
874    cast the subscript to translate because some data is declared as
875    `char *', to avoid warnings when a string constant is passed.  But
876    when we use a character as a subscript we must make it unsigned.  */
877 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
878 
879 
880 /* Macros for outputting the compiled pattern into `buffer'.  */
881 
882 /* If the buffer isn't allocated when it comes in, use this.  */
883 #define INIT_BUF_SIZE  32
884 
885 /* Make sure we have at least N more bytes of space in buffer.  */
886 #define GET_BUFFER_SPACE(n)						\
887     while (b - bufp->buffer + (n) > bufp->allocated)			\
888       EXTEND_BUFFER ()
889 
890 /* Make sure we have one more byte of buffer space and then add C to it.  */
891 #define BUF_PUSH(c)							\
892   do {									\
893     GET_BUFFER_SPACE (1);						\
894     *b++ = (unsigned char) (c);						\
895   } while (0)
896 
897 
898 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
899 #define BUF_PUSH_2(c1, c2)						\
900   do {									\
901     GET_BUFFER_SPACE (2);						\
902     *b++ = (unsigned char) (c1);					\
903     *b++ = (unsigned char) (c2);					\
904   } while (0)
905 
906 
907 /* As with BUF_PUSH_2, except for three bytes.  */
908 #define BUF_PUSH_3(c1, c2, c3)						\
909   do {									\
910     GET_BUFFER_SPACE (3);						\
911     *b++ = (unsigned char) (c1);					\
912     *b++ = (unsigned char) (c2);					\
913     *b++ = (unsigned char) (c3);					\
914   } while (0)
915 
916 
917 /* Store a jump with opcode OP at LOC to location TO.  We store a
918    relative address offset by the three bytes the jump itself occupies.  */
919 #define STORE_JUMP(op, loc, to) \
920   store_op1 (op, loc, (to) - (loc) - 3)
921 
922 /* Likewise, for a two-argument jump.  */
923 #define STORE_JUMP2(op, loc, to, arg) \
924   store_op2 (op, loc, (to) - (loc) - 3, arg)
925 
926 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
927 #define INSERT_JUMP(op, loc, to) \
928   insert_op1 (op, loc, (to) - (loc) - 3, b)
929 
930 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
931 #define INSERT_JUMP2(op, loc, to, arg) \
932   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
933 
934 
935 /* This is not an arbitrary limit: the arguments which represent offsets
936    into the pattern are two bytes long.  So if 2^16 bytes turns out to
937    be too small, many things would have to change.  */
938 #define MAX_BUF_SIZE (1L << 16)
939 
940 
941 /* Extend the buffer by twice its current size via realloc and
942    reset the pointers that pointed into the old block to point to the
943    correct places in the new one.  If extending the buffer results in it
944    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
945 #define EXTEND_BUFFER()							\
946   do { 									\
947     unsigned char *old_buffer = bufp->buffer;				\
948     if (bufp->allocated == MAX_BUF_SIZE) 				\
949       return REG_ESIZE;							\
950     bufp->allocated <<= 1;						\
951     if (bufp->allocated > MAX_BUF_SIZE)					\
952       bufp->allocated = MAX_BUF_SIZE; 					\
953     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
954     if (bufp->buffer == NULL)						\
955       return REG_ESPACE;						\
956     /* If the buffer moved, move all the pointers into it.  */		\
957     if (old_buffer != bufp->buffer)					\
958       {									\
959         b = (b - old_buffer) + bufp->buffer;				\
960         begalt = (begalt - old_buffer) + bufp->buffer;			\
961         if (fixup_alt_jump)						\
962           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
963         if (laststart)							\
964           laststart = (laststart - old_buffer) + bufp->buffer;		\
965         if (pending_exact)						\
966           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
967       }									\
968   } while (0)
969 
970 
971 /* Since we have one byte reserved for the register number argument to
972    {start,stop}_memory, the maximum number of groups we can report
973    things about is what fits in that byte.  */
974 #define MAX_REGNUM 255
975 
976 /* But patterns can have more than `MAX_REGNUM' registers.  We just
977    ignore the excess.  */
978 typedef unsigned regnum_t;
979 
980 
981 /* Macros for the compile stack.  */
982 
983 /* Since offsets can go either forwards or backwards, this type needs to
984    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
985 typedef int pattern_offset_t;
986 
987 typedef struct
988 {
989   pattern_offset_t begalt_offset;
990   pattern_offset_t fixup_alt_jump;
991   pattern_offset_t inner_group_offset;
992   pattern_offset_t laststart_offset;
993   regnum_t regnum;
994 } compile_stack_elt_t;
995 
996 
997 typedef struct
998 {
999   compile_stack_elt_t *stack;
1000   unsigned size;
1001   unsigned avail;			/* Offset of next open position.  */
1002 } compile_stack_type;
1003 
1004 
1005 #define INIT_COMPILE_STACK_SIZE 32
1006 
1007 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1008 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1009 
1010 /* The next available element.  */
1011 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1012 
1013 
1014 /* Set the bit for character C in a list.  */
1015 #define SET_LIST_BIT(c)                               \
1016   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1017    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1018 
1019 
1020 /* Get the next unsigned number in the uncompiled pattern.  */
1021 #define GET_UNSIGNED_NUMBER(num) 					\
1022   { if (p != pend)							\
1023      {									\
1024        PATFETCH (c); 							\
1025        while (ISDIGIT (c)) 						\
1026          { 								\
1027            if (num < 0)							\
1028               num = 0;							\
1029            num = num * 10 + c - '0'; 					\
1030            if (p == pend) 						\
1031               break; 							\
1032            PATFETCH (c);						\
1033          } 								\
1034        } 								\
1035     }
1036 
1037 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1038 
1039 #define IS_CHAR_CLASS(string)						\
1040    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1041     || STREQ (string, "lower") || STREQ (string, "digit")		\
1042     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1043     || STREQ (string, "space") || STREQ (string, "print")		\
1044     || STREQ (string, "punct") || STREQ (string, "graph")		\
1045     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1046 
1047 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1048    Returns one of error codes defined in `regex.h', or zero for success.
1049 
1050    Assumes the `allocated' (and perhaps `buffer') and `translate'
1051    fields are set in BUFP on entry.
1052 
1053    If it succeeds, results are put in BUFP (if it returns an error, the
1054    contents of BUFP are undefined):
1055      `buffer' is the compiled pattern;
1056      `syntax' is set to SYNTAX;
1057      `used' is set to the length of the compiled pattern;
1058      `fastmap_accurate' is zero;
1059      `re_nsub' is the number of subexpressions in PATTERN;
1060      `not_bol' and `not_eol' are zero;
1061 
1062    The `fastmap' and `newline_anchor' fields are neither
1063    examined nor set.  */
1064 
1065 static reg_errcode_t
1066 regex_compile (pattern, size, syntax, bufp)
1067      const char *pattern;
1068      int size;
1069      reg_syntax_t syntax;
1070      struct re_pattern_buffer *bufp;
1071 {
1072   /* We fetch characters from PATTERN here.  Even though PATTERN is
1073      `char *' (i.e., signed), we declare these variables as unsigned, so
1074      they can be reliably used as array indices.  */
1075   register unsigned char c, c1;
1076 
1077   /* A random tempory spot in PATTERN.  */
1078   const char *p1;
1079 
1080   /* Points to the end of the buffer, where we should append.  */
1081   register unsigned char *b;
1082 
1083   /* Keeps track of unclosed groups.  */
1084   compile_stack_type compile_stack;
1085 
1086   /* Points to the current (ending) position in the pattern.  */
1087   const char *p = pattern;
1088   const char *pend = pattern + size;
1089 
1090   /* How to translate the characters in the pattern.  */
1091   char *translate = bufp->translate;
1092 
1093   /* Address of the count-byte of the most recently inserted `exactn'
1094      command.  This makes it possible to tell if a new exact-match
1095      character can be added to that command or if the character requires
1096      a new `exactn' command.  */
1097   unsigned char *pending_exact = 0;
1098 
1099   /* Address of start of the most recently finished expression.
1100      This tells, e.g., postfix * where to find the start of its
1101      operand.  Reset at the beginning of groups and alternatives.  */
1102   unsigned char *laststart = 0;
1103 
1104   /* Address of beginning of regexp, or inside of last group.  */
1105   unsigned char *begalt;
1106 
1107   /* Place in the uncompiled pattern (i.e., the {) to
1108      which to go back if the interval is invalid.  */
1109   const char *beg_interval;
1110 
1111   /* Address of the place where a forward jump should go to the end of
1112      the containing expression.  Each alternative of an `or' -- except the
1113      last -- ends with a forward jump of this sort.  */
1114   unsigned char *fixup_alt_jump = 0;
1115 
1116   /* Counts open-groups as they are encountered.  Remembered for the
1117      matching close-group on the compile stack, so the same register
1118      number is put in the stop_memory as the start_memory.  */
1119   regnum_t regnum = 0;
1120 
1121 #ifdef DEBUG
1122   DEBUG_PRINT1 ("\nCompiling pattern: ");
1123   if (debug)
1124     {
1125       unsigned debug_count;
1126 
1127       for (debug_count = 0; debug_count < size; debug_count++)
1128         printchar (pattern[debug_count]);
1129       putchar ('\n');
1130     }
1131 #endif /* DEBUG */
1132 
1133   /* Initialize the compile stack.  */
1134   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1135   if (compile_stack.stack == NULL)
1136     return REG_ESPACE;
1137 
1138   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1139   compile_stack.avail = 0;
1140 
1141   /* Initialize the pattern buffer.  */
1142   bufp->syntax = syntax;
1143   bufp->fastmap_accurate = 0;
1144   bufp->not_bol = bufp->not_eol = 0;
1145 
1146   /* Set `used' to zero, so that if we return an error, the pattern
1147      printer (for debugging) will think there's no pattern.  We reset it
1148      at the end.  */
1149   bufp->used = 0;
1150 
1151   /* Always count groups, whether or not bufp->no_sub is set.  */
1152   bufp->re_nsub = 0;
1153 
1154 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1155   /* Initialize the syntax table.  */
1156    init_syntax_once ();
1157 #endif
1158 
1159   if (bufp->allocated == 0)
1160     {
1161       if (bufp->buffer)
1162 	{ /* If zero allocated, but buffer is non-null, try to realloc
1163              enough space.  This loses if buffer's address is bogus, but
1164              that is the user's responsibility.  */
1165           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1166         }
1167       else
1168         { /* Caller did not allocate a buffer.  Do it for them.  */
1169           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1170         }
1171       if (!bufp->buffer) return REG_ESPACE;
1172 
1173       bufp->allocated = INIT_BUF_SIZE;
1174     }
1175 
1176   begalt = b = bufp->buffer;
1177 
1178   /* Loop through the uncompiled pattern until we're at the end.  */
1179   while (p != pend)
1180     {
1181       PATFETCH (c);
1182 
1183       switch (c)
1184         {
1185         case '^':
1186           {
1187             if (   /* If at start of pattern, it's an operator.  */
1188                    p == pattern + 1
1189                    /* If context independent, it's an operator.  */
1190                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1191                    /* Otherwise, depends on what's come before.  */
1192                 || at_begline_loc_p (pattern, p, syntax))
1193               BUF_PUSH (begline);
1194             else
1195               goto normal_char;
1196           }
1197           break;
1198 
1199 
1200         case '$':
1201           {
1202             if (   /* If at end of pattern, it's an operator.  */
1203                    p == pend
1204                    /* If context independent, it's an operator.  */
1205                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1206                    /* Otherwise, depends on what's next.  */
1207                 || at_endline_loc_p (p, pend, syntax))
1208                BUF_PUSH (endline);
1209              else
1210                goto normal_char;
1211            }
1212            break;
1213 
1214 
1215 	case '+':
1216         case '?':
1217           if ((syntax & RE_BK_PLUS_QM)
1218               || (syntax & RE_LIMITED_OPS))
1219             goto normal_char;
1220         handle_plus:
1221         case '*':
1222           /* If there is no previous pattern... */
1223           if (!laststart)
1224             {
1225               if (syntax & RE_CONTEXT_INVALID_OPS)
1226                 return REG_BADRPT;
1227               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1228                 goto normal_char;
1229             }
1230 
1231           {
1232             /* Are we optimizing this jump?  */
1233             boolean keep_string_p = false;
1234 
1235             /* 1 means zero (many) matches is allowed.  */
1236             char zero_times_ok = 0, many_times_ok = 0;
1237 
1238             /* If there is a sequence of repetition chars, collapse it
1239                down to just one (the right one).  We can't combine
1240                interval operators with these because of, e.g., `a{2}*',
1241                which should only match an even number of `a's.  */
1242 
1243             for (;;)
1244               {
1245                 zero_times_ok |= c != '+';
1246                 many_times_ok |= c != '?';
1247 
1248                 if (p == pend)
1249                   break;
1250 
1251                 PATFETCH (c);
1252 
1253                 if (c == '*'
1254                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1255                   ;
1256 
1257                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1258                   {
1259                     if (p == pend) return REG_EESCAPE;
1260 
1261                     PATFETCH (c1);
1262                     if (!(c1 == '+' || c1 == '?'))
1263                       {
1264                         PATUNFETCH;
1265                         PATUNFETCH;
1266                         break;
1267                       }
1268 
1269                     c = c1;
1270                   }
1271                 else
1272                   {
1273                     PATUNFETCH;
1274                     break;
1275                   }
1276 
1277                 /* If we get here, we found another repeat character.  */
1278                }
1279 
1280             /* Star, etc. applied to an empty pattern is equivalent
1281                to an empty pattern.  */
1282             if (!laststart)
1283               break;
1284 
1285             /* Now we know whether or not zero matches is allowed
1286                and also whether or not two or more matches is allowed.  */
1287             if (many_times_ok)
1288               { /* More than one repetition is allowed, so put in at the
1289                    end a backward relative jump from `b' to before the next
1290                    jump we're going to put in below (which jumps from
1291                    laststart to after this jump).
1292 
1293                    But if we are at the `*' in the exact sequence `.*\n',
1294                    insert an unconditional jump backwards to the .,
1295                    instead of the beginning of the loop.  This way we only
1296                    push a failure point once, instead of every time
1297                    through the loop.  */
1298                 assert (p - 1 > pattern);
1299 
1300                 /* Allocate the space for the jump.  */
1301                 GET_BUFFER_SPACE (3);
1302 
1303                 /* We know we are not at the first character of the pattern,
1304                    because laststart was nonzero.  And we've already
1305                    incremented `p', by the way, to be the character after
1306                    the `*'.  Do we have to do something analogous here
1307                    for null bytes, because of RE_DOT_NOT_NULL?  */
1308                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1309 		    && zero_times_ok
1310                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1311                     && !(syntax & RE_DOT_NEWLINE))
1312                   { /* We have .*\n.  */
1313                     STORE_JUMP (jump, b, laststart);
1314                     keep_string_p = true;
1315                   }
1316                 else
1317                   /* Anything else.  */
1318                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1319 
1320                 /* We've added more stuff to the buffer.  */
1321                 b += 3;
1322               }
1323 
1324             /* On failure, jump from laststart to b + 3, which will be the
1325                end of the buffer after this jump is inserted.  */
1326             GET_BUFFER_SPACE (3);
1327             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1328                                        : on_failure_jump,
1329                          laststart, b + 3);
1330             pending_exact = 0;
1331             b += 3;
1332 
1333             if (!zero_times_ok)
1334               {
1335                 /* At least one repetition is required, so insert a
1336                    `dummy_failure_jump' before the initial
1337                    `on_failure_jump' instruction of the loop. This
1338                    effects a skip over that instruction the first time
1339                    we hit that loop.  */
1340                 GET_BUFFER_SPACE (3);
1341                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1342                 b += 3;
1343               }
1344             }
1345 	  break;
1346 
1347 
1348 	case '.':
1349           laststart = b;
1350           BUF_PUSH (anychar);
1351           break;
1352 
1353 
1354         case '[':
1355           {
1356             boolean had_char_class = false;
1357 
1358             if (p == pend) return REG_EBRACK;
1359 
1360             /* Ensure that we have enough space to push a charset: the
1361                opcode, the length count, and the bitset; 34 bytes in all.  */
1362 	    GET_BUFFER_SPACE (34);
1363 
1364             laststart = b;
1365 
1366             /* We test `*p == '^' twice, instead of using an if
1367                statement, so we only need one BUF_PUSH.  */
1368             BUF_PUSH (*p == '^' ? charset_not : charset);
1369             if (*p == '^')
1370               p++;
1371 
1372             /* Remember the first position in the bracket expression.  */
1373             p1 = p;
1374 
1375             /* Push the number of bytes in the bitmap.  */
1376             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1377 
1378             /* Clear the whole map.  */
1379             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1380 
1381             /* charset_not matches newline according to a syntax bit.  */
1382             if ((re_opcode_t) b[-2] == charset_not
1383                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1384               SET_LIST_BIT ('\n');
1385 
1386             /* Read in characters and ranges, setting map bits.  */
1387             for (;;)
1388               {
1389                 if (p == pend) return REG_EBRACK;
1390 
1391                 PATFETCH (c);
1392 
1393                 /* \ might escape characters inside [...] and [^...].  */
1394                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1395                   {
1396                     if (p == pend) return REG_EESCAPE;
1397 
1398                     PATFETCH (c1);
1399                     SET_LIST_BIT (c1);
1400                     continue;
1401                   }
1402 
1403                 /* Could be the end of the bracket expression.  If it's
1404                    not (i.e., when the bracket expression is `[]' so
1405                    far), the ']' character bit gets set way below.  */
1406                 if (c == ']' && p != p1 + 1)
1407                   break;
1408 
1409                 /* Look ahead to see if it's a range when the last thing
1410                    was a character class.  */
1411                 if (had_char_class && c == '-' && *p != ']')
1412                   return REG_ERANGE;
1413 
1414                 /* Look ahead to see if it's a range when the last thing
1415                    was a character: if this is a hyphen not at the
1416                    beginning or the end of a list, then it's the range
1417                    operator.  */
1418                 if (c == '-'
1419                     && !(p - 2 >= pattern && p[-2] == '[')
1420                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1421                     && *p != ']')
1422                   {
1423                     reg_errcode_t ret
1424                       = compile_range (&p, pend, translate, syntax, b);
1425                     if (ret != REG_NOERROR) return ret;
1426                   }
1427 
1428                 else if (p[0] == '-' && p[1] != ']')
1429                   { /* This handles ranges made up of characters only.  */
1430                     reg_errcode_t ret;
1431 
1432 		    /* Move past the `-'.  */
1433                     PATFETCH (c1);
1434 
1435                     ret = compile_range (&p, pend, translate, syntax, b);
1436                     if (ret != REG_NOERROR) return ret;
1437                   }
1438 
1439                 /* See if we're at the beginning of a possible character
1440                    class.  */
1441 
1442                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1443                   { /* Leave room for the null.  */
1444                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1445 
1446                     PATFETCH (c);
1447                     c1 = 0;
1448 
1449                     /* If pattern is `[[:'.  */
1450                     if (p == pend) return REG_EBRACK;
1451 
1452                     for (;;)
1453                       {
1454                         PATFETCH (c);
1455                         if (c == ':' || c == ']' || p == pend
1456                             || c1 == CHAR_CLASS_MAX_LENGTH)
1457                           break;
1458                         str[c1++] = c;
1459                       }
1460                     str[c1] = '\0';
1461 
1462                     /* If isn't a word bracketed by `[:' and:`]':
1463                        undo the ending character, the letters, and leave
1464                        the leading `:' and `[' (but set bits for them).  */
1465                     if (c == ':' && *p == ']')
1466                       {
1467                         int ch;
1468                         boolean is_alnum = STREQ (str, "alnum");
1469                         boolean is_alpha = STREQ (str, "alpha");
1470                         boolean is_blank = STREQ (str, "blank");
1471                         boolean is_cntrl = STREQ (str, "cntrl");
1472                         boolean is_digit = STREQ (str, "digit");
1473                         boolean is_graph = STREQ (str, "graph");
1474                         boolean is_lower = STREQ (str, "lower");
1475                         boolean is_print = STREQ (str, "print");
1476                         boolean is_punct = STREQ (str, "punct");
1477                         boolean is_space = STREQ (str, "space");
1478                         boolean is_upper = STREQ (str, "upper");
1479                         boolean is_xdigit = STREQ (str, "xdigit");
1480 
1481                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1482 
1483                         /* Throw away the ] at the end of the character
1484                            class.  */
1485                         PATFETCH (c);
1486 
1487                         if (p == pend) return REG_EBRACK;
1488 
1489                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1490                           {
1491                             if (   (is_alnum  && ISALNUM (ch))
1492                                 || (is_alpha  && ISALPHA (ch))
1493                                 || (is_blank  && ISBLANK (ch))
1494                                 || (is_cntrl  && ISCNTRL (ch))
1495                                 || (is_digit  && ISDIGIT (ch))
1496                                 || (is_graph  && ISGRAPH (ch))
1497                                 || (is_lower  && ISLOWER (ch))
1498                                 || (is_print  && ISPRINT (ch))
1499                                 || (is_punct  && ISPUNCT (ch))
1500                                 || (is_space  && ISSPACE (ch))
1501                                 || (is_upper  && ISUPPER (ch))
1502                                 || (is_xdigit && ISXDIGIT (ch)))
1503                             SET_LIST_BIT (ch);
1504                           }
1505                         had_char_class = true;
1506                       }
1507                     else
1508                       {
1509                         c1++;
1510                         while (c1--)
1511                           PATUNFETCH;
1512                         SET_LIST_BIT ('[');
1513                         SET_LIST_BIT (':');
1514                         had_char_class = false;
1515                       }
1516                   }
1517                 else
1518                   {
1519                     had_char_class = false;
1520                     SET_LIST_BIT (c);
1521                   }
1522               }
1523 
1524             /* Discard any (non)matching list bytes that are all 0 at the
1525                end of the map.  Decrease the map-length byte too.  */
1526             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1527               b[-1]--;
1528             b += b[-1];
1529           }
1530           break;
1531 
1532 
1533 	case '(':
1534           if (syntax & RE_NO_BK_PARENS)
1535             goto handle_open;
1536           else
1537             goto normal_char;
1538 
1539 
1540         case ')':
1541           if (syntax & RE_NO_BK_PARENS)
1542             goto handle_close;
1543           else
1544             goto normal_char;
1545 
1546 
1547         case '\n':
1548           if (syntax & RE_NEWLINE_ALT)
1549             goto handle_alt;
1550           else
1551             goto normal_char;
1552 
1553 
1554 	case '|':
1555           if (syntax & RE_NO_BK_VBAR)
1556             goto handle_alt;
1557           else
1558             goto normal_char;
1559 
1560 
1561         case '{':
1562            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1563              goto handle_interval;
1564            else
1565              goto normal_char;
1566 
1567 
1568         case '\\':
1569           if (p == pend) return REG_EESCAPE;
1570 
1571           /* Do not translate the character after the \, so that we can
1572              distinguish, e.g., \B from \b, even if we normally would
1573              translate, e.g., B to b.  */
1574           PATFETCH_RAW (c);
1575 
1576           switch (c)
1577             {
1578             case '(':
1579               if (syntax & RE_NO_BK_PARENS)
1580                 goto normal_backslash;
1581 
1582             handle_open:
1583               bufp->re_nsub++;
1584               regnum++;
1585 
1586               if (COMPILE_STACK_FULL)
1587                 {
1588                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1589                             compile_stack_elt_t);
1590                   if (compile_stack.stack == NULL) return REG_ESPACE;
1591 
1592                   compile_stack.size <<= 1;
1593                 }
1594 
1595               /* These are the values to restore when we hit end of this
1596                  group.  They are all relative offsets, so that if the
1597                  whole pattern moves because of realloc, they will still
1598                  be valid.  */
1599               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1600               COMPILE_STACK_TOP.fixup_alt_jump
1601                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1602               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1603               COMPILE_STACK_TOP.regnum = regnum;
1604 
1605               /* We will eventually replace the 0 with the number of
1606                  groups inner to this one.  But do not push a
1607                  start_memory for groups beyond the last one we can
1608                  represent in the compiled pattern.  */
1609               if (regnum <= MAX_REGNUM)
1610                 {
1611                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1612                   BUF_PUSH_3 (start_memory, regnum, 0);
1613                 }
1614 
1615               compile_stack.avail++;
1616 
1617               fixup_alt_jump = 0;
1618               laststart = 0;
1619               begalt = b;
1620 	      /* If we've reached MAX_REGNUM groups, then this open
1621 		 won't actually generate any code, so we'll have to
1622 		 clear pending_exact explicitly.  */
1623 	      pending_exact = 0;
1624               break;
1625 
1626 
1627             case ')':
1628               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1629 
1630               if (COMPILE_STACK_EMPTY)
1631                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1632                   goto normal_backslash;
1633                 else
1634                   return REG_ERPAREN;
1635 
1636             handle_close:
1637               if (fixup_alt_jump)
1638                 { /* Push a dummy failure point at the end of the
1639                      alternative for a possible future
1640                      `pop_failure_jump' to pop.  See comments at
1641                      `push_dummy_failure' in `re_match_2'.  */
1642                   BUF_PUSH (push_dummy_failure);
1643 
1644                   /* We allocated space for this jump when we assigned
1645                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1646                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1647                 }
1648 
1649               /* See similar code for backslashed left paren above.  */
1650               if (COMPILE_STACK_EMPTY)
1651                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1652                   goto normal_char;
1653                 else
1654                   return REG_ERPAREN;
1655 
1656               /* Since we just checked for an empty stack above, this
1657                  ``can't happen''.  */
1658               assert (compile_stack.avail != 0);
1659               {
1660                 /* We don't just want to restore into `regnum', because
1661                    later groups should continue to be numbered higher,
1662                    as in `(ab)c(de)' -- the second group is #2.  */
1663                 regnum_t this_group_regnum;
1664 
1665                 compile_stack.avail--;
1666                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1667                 fixup_alt_jump
1668                   = COMPILE_STACK_TOP.fixup_alt_jump
1669                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1670                     : 0;
1671                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1672                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1673 		/* If we've reached MAX_REGNUM groups, then this open
1674 		   won't actually generate any code, so we'll have to
1675 		   clear pending_exact explicitly.  */
1676 		pending_exact = 0;
1677 
1678                 /* We're at the end of the group, so now we know how many
1679                    groups were inside this one.  */
1680                 if (this_group_regnum <= MAX_REGNUM)
1681                   {
1682                     unsigned char *inner_group_loc
1683                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1684 
1685                     *inner_group_loc = regnum - this_group_regnum;
1686                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1687                                 regnum - this_group_regnum);
1688                   }
1689               }
1690               break;
1691 
1692 
1693             case '|':					/* `\|'.  */
1694               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1695                 goto normal_backslash;
1696             handle_alt:
1697               if (syntax & RE_LIMITED_OPS)
1698                 goto normal_char;
1699 
1700               /* Insert before the previous alternative a jump which
1701                  jumps to this alternative if the former fails.  */
1702               GET_BUFFER_SPACE (3);
1703               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1704               pending_exact = 0;
1705               b += 3;
1706 
1707               /* The alternative before this one has a jump after it
1708                  which gets executed if it gets matched.  Adjust that
1709                  jump so it will jump to this alternative's analogous
1710                  jump (put in below, which in turn will jump to the next
1711                  (if any) alternative's such jump, etc.).  The last such
1712                  jump jumps to the correct final destination.  A picture:
1713                           _____ _____
1714                           |   | |   |
1715                           |   v |   v
1716                          a | b   | c
1717 
1718                  If we are at `b', then fixup_alt_jump right now points to a
1719                  three-byte space after `a'.  We'll put in the jump, set
1720                  fixup_alt_jump to right after `b', and leave behind three
1721                  bytes which we'll fill in when we get to after `c'.  */
1722 
1723               if (fixup_alt_jump)
1724                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1725 
1726               /* Mark and leave space for a jump after this alternative,
1727                  to be filled in later either by next alternative or
1728                  when know we're at the end of a series of alternatives.  */
1729               fixup_alt_jump = b;
1730               GET_BUFFER_SPACE (3);
1731               b += 3;
1732 
1733               laststart = 0;
1734               begalt = b;
1735               break;
1736 
1737 
1738             case '{':
1739               /* If \{ is a literal.  */
1740               if (!(syntax & RE_INTERVALS)
1741                      /* If we're at `\{' and it's not the open-interval
1742                         operator.  */
1743                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1744                   || (p - 2 == pattern  &&  p == pend))
1745                 goto normal_backslash;
1746 
1747             handle_interval:
1748               {
1749                 /* If got here, then the syntax allows intervals.  */
1750 
1751                 /* At least (most) this many matches must be made.  */
1752                 int lower_bound = -1, upper_bound = -1;
1753 
1754                 beg_interval = p - 1;
1755 
1756                 if (p == pend)
1757                   {
1758                     if (syntax & RE_NO_BK_BRACES)
1759                       goto unfetch_interval;
1760                     else
1761                       return REG_EBRACE;
1762                   }
1763 
1764                 GET_UNSIGNED_NUMBER (lower_bound);
1765 
1766                 if (c == ',')
1767                   {
1768                     GET_UNSIGNED_NUMBER (upper_bound);
1769                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1770                   }
1771                 else
1772                   /* Interval such as `{1}' => match exactly once. */
1773                   upper_bound = lower_bound;
1774 
1775                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1776                     || lower_bound > upper_bound)
1777                   {
1778                     if (syntax & RE_NO_BK_BRACES)
1779                       goto unfetch_interval;
1780                     else
1781                       return REG_BADBR;
1782                   }
1783 
1784                 if (!(syntax & RE_NO_BK_BRACES))
1785                   {
1786                     if (c != '\\') return REG_EBRACE;
1787 
1788                     PATFETCH (c);
1789                   }
1790 
1791                 if (c != '}')
1792                   {
1793                     if (syntax & RE_NO_BK_BRACES)
1794                       goto unfetch_interval;
1795                     else
1796                       return REG_BADBR;
1797                   }
1798 
1799                 /* We just parsed a valid interval.  */
1800 
1801                 /* If it's invalid to have no preceding re.  */
1802                 if (!laststart)
1803                   {
1804                     if (syntax & RE_CONTEXT_INVALID_OPS)
1805                       return REG_BADRPT;
1806                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1807                       laststart = b;
1808                     else
1809                       goto unfetch_interval;
1810                   }
1811 
1812                 /* If the upper bound is zero, don't want to succeed at
1813                    all; jump from `laststart' to `b + 3', which will be
1814                    the end of the buffer after we insert the jump.  */
1815                  if (upper_bound == 0)
1816                    {
1817                      GET_BUFFER_SPACE (3);
1818                      INSERT_JUMP (jump, laststart, b + 3);
1819                      b += 3;
1820                    }
1821 
1822                  /* Otherwise, we have a nontrivial interval.  When
1823                     we're all done, the pattern will look like:
1824                       set_number_at <jump count> <upper bound>
1825                       set_number_at <succeed_n count> <lower bound>
1826                       succeed_n <after jump addr> <succed_n count>
1827                       <body of loop>
1828                       jump_n <succeed_n addr> <jump count>
1829                     (The upper bound and `jump_n' are omitted if
1830                     `upper_bound' is 1, though.)  */
1831                  else
1832                    { /* If the upper bound is > 1, we need to insert
1833                         more at the end of the loop.  */
1834                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1835 
1836                      GET_BUFFER_SPACE (nbytes);
1837 
1838                      /* Initialize lower bound of the `succeed_n', even
1839                         though it will be set during matching by its
1840                         attendant `set_number_at' (inserted next),
1841                         because `re_compile_fastmap' needs to know.
1842                         Jump to the `jump_n' we might insert below.  */
1843                      INSERT_JUMP2 (succeed_n, laststart,
1844                                    b + 5 + (upper_bound > 1) * 5,
1845                                    lower_bound);
1846                      b += 5;
1847 
1848                      /* Code to initialize the lower bound.  Insert
1849                         before the `succeed_n'.  The `5' is the last two
1850                         bytes of this `set_number_at', plus 3 bytes of
1851                         the following `succeed_n'.  */
1852                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1853                      b += 5;
1854 
1855                      if (upper_bound > 1)
1856                        { /* More than one repetition is allowed, so
1857                             append a backward jump to the `succeed_n'
1858                             that starts this interval.
1859 
1860                             When we've reached this during matching,
1861                             we'll have matched the interval once, so
1862                             jump back only `upper_bound - 1' times.  */
1863                          STORE_JUMP2 (jump_n, b, laststart + 5,
1864                                       upper_bound - 1);
1865                          b += 5;
1866 
1867                          /* The location we want to set is the second
1868                             parameter of the `jump_n'; that is `b-2' as
1869                             an absolute address.  `laststart' will be
1870                             the `set_number_at' we're about to insert;
1871                             `laststart+3' the number to set, the source
1872                             for the relative address.  But we are
1873                             inserting into the middle of the pattern --
1874                             so everything is getting moved up by 5.
1875                             Conclusion: (b - 2) - (laststart + 3) + 5,
1876                             i.e., b - laststart.
1877 
1878                             We insert this at the beginning of the loop
1879                             so that if we fail during matching, we'll
1880                             reinitialize the bounds.  */
1881                          insert_op2 (set_number_at, laststart, b - laststart,
1882                                      upper_bound - 1, b);
1883                          b += 5;
1884                        }
1885                    }
1886                 pending_exact = 0;
1887                 beg_interval = NULL;
1888               }
1889               break;
1890 
1891             unfetch_interval:
1892               /* If an invalid interval, match the characters as literals.  */
1893                assert (beg_interval);
1894                p = beg_interval;
1895                beg_interval = NULL;
1896 
1897                /* normal_char and normal_backslash need `c'.  */
1898                PATFETCH (c);
1899 
1900                if (!(syntax & RE_NO_BK_BRACES))
1901                  {
1902                    if (p > pattern  &&  p[-1] == '\\')
1903                      goto normal_backslash;
1904                  }
1905                goto normal_char;
1906 
1907 #ifdef emacs
1908             /* There is no way to specify the before_dot and after_dot
1909                operators.  rms says this is ok.  --karl  */
1910             case '=':
1911               BUF_PUSH (at_dot);
1912               break;
1913 
1914             case 's':
1915               laststart = b;
1916               PATFETCH (c);
1917               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1918               break;
1919 
1920             case 'S':
1921               laststart = b;
1922               PATFETCH (c);
1923               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1924               break;
1925 #endif /* emacs */
1926 
1927 
1928             case 'w':
1929               laststart = b;
1930               BUF_PUSH (wordchar);
1931               break;
1932 
1933 
1934             case 'W':
1935               laststart = b;
1936               BUF_PUSH (notwordchar);
1937               break;
1938 
1939 
1940             case '<':
1941               BUF_PUSH (wordbeg);
1942               break;
1943 
1944             case '>':
1945               BUF_PUSH (wordend);
1946               break;
1947 
1948             case 'b':
1949               BUF_PUSH (wordbound);
1950               break;
1951 
1952             case 'B':
1953               BUF_PUSH (notwordbound);
1954               break;
1955 
1956             case '`':
1957               BUF_PUSH (begbuf);
1958               break;
1959 
1960             case '\'':
1961               BUF_PUSH (endbuf);
1962               break;
1963 
1964             case '1': case '2': case '3': case '4': case '5':
1965             case '6': case '7': case '8': case '9':
1966               if (syntax & RE_NO_BK_REFS)
1967                 goto normal_char;
1968 
1969               c1 = c - '0';
1970 
1971               if (c1 > regnum)
1972                 return REG_ESUBREG;
1973 
1974               /* Can't back reference to a subexpression if inside of it.  */
1975               if (group_in_compile_stack (compile_stack, c1))
1976                 goto normal_char;
1977 
1978               laststart = b;
1979               BUF_PUSH_2 (duplicate, c1);
1980               break;
1981 
1982 
1983             case '+':
1984             case '?':
1985               if (syntax & RE_BK_PLUS_QM)
1986                 goto handle_plus;
1987               else
1988                 goto normal_backslash;
1989 
1990             default:
1991             normal_backslash:
1992               /* You might think it would be useful for \ to mean
1993                  not to translate; but if we don't translate it
1994                  it will never match anything.  */
1995               c = TRANSLATE (c);
1996               goto normal_char;
1997             }
1998           break;
1999 
2000 
2001 	default:
2002         /* Expects the character in `c'.  */
2003 	normal_char:
2004 	      /* If no exactn currently being built.  */
2005           if (!pending_exact
2006 
2007               /* If last exactn not at current position.  */
2008               || pending_exact + *pending_exact + 1 != b
2009 
2010               /* We have only one byte following the exactn for the count.  */
2011 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2012 
2013               /* If followed by a repetition operator.  */
2014               || *p == '*' || *p == '^'
2015 	      || ((syntax & RE_BK_PLUS_QM)
2016 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2017 		  : (*p == '+' || *p == '?'))
2018 	      || ((syntax & RE_INTERVALS)
2019                   && ((syntax & RE_NO_BK_BRACES)
2020 		      ? *p == '{'
2021                       : (p[0] == '\\' && p[1] == '{'))))
2022 	    {
2023 	      /* Start building a new exactn.  */
2024 
2025               laststart = b;
2026 
2027 	      BUF_PUSH_2 (exactn, 0);
2028 	      pending_exact = b - 1;
2029             }
2030 
2031 	  BUF_PUSH (c);
2032           (*pending_exact)++;
2033 	  break;
2034         } /* switch (c) */
2035     } /* while p != pend */
2036 
2037 
2038   /* Through the pattern now.  */
2039 
2040   if (fixup_alt_jump)
2041     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2042 
2043   if (!COMPILE_STACK_EMPTY)
2044     return REG_EPAREN;
2045 
2046   free (compile_stack.stack);
2047 
2048   /* We have succeeded; set the length of the buffer.  */
2049   bufp->used = b - bufp->buffer;
2050 
2051 #ifdef DEBUG
2052   if (debug)
2053     {
2054       DEBUG_PRINT1 ("\nCompiled pattern: ");
2055       print_compiled_pattern (bufp);
2056     }
2057 #endif /* DEBUG */
2058 
2059   return REG_NOERROR;
2060 } /* regex_compile */
2061 
2062 /* Subroutines for `regex_compile'.  */
2063 
2064 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2065 
2066 static void
2067 store_op1 (op, loc, arg)
2068     re_opcode_t op;
2069     unsigned char *loc;
2070     int arg;
2071 {
2072   *loc = (unsigned char) op;
2073   STORE_NUMBER (loc + 1, arg);
2074 }
2075 
2076 
2077 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2078 
2079 static void
2080 store_op2 (op, loc, arg1, arg2)
2081     re_opcode_t op;
2082     unsigned char *loc;
2083     int arg1, arg2;
2084 {
2085   *loc = (unsigned char) op;
2086   STORE_NUMBER (loc + 1, arg1);
2087   STORE_NUMBER (loc + 3, arg2);
2088 }
2089 
2090 
2091 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2092    for OP followed by two-byte integer parameter ARG.  */
2093 
2094 static void
2095 insert_op1 (op, loc, arg, end)
2096     re_opcode_t op;
2097     unsigned char *loc;
2098     int arg;
2099     unsigned char *end;
2100 {
2101   register unsigned char *pfrom = end;
2102   register unsigned char *pto = end + 3;
2103 
2104   while (pfrom != loc)
2105     *--pto = *--pfrom;
2106 
2107   store_op1 (op, loc, arg);
2108 }
2109 
2110 
2111 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2112 
2113 static void
2114 insert_op2 (op, loc, arg1, arg2, end)
2115     re_opcode_t op;
2116     unsigned char *loc;
2117     int arg1, arg2;
2118     unsigned char *end;
2119 {
2120   register unsigned char *pfrom = end;
2121   register unsigned char *pto = end + 5;
2122 
2123   while (pfrom != loc)
2124     *--pto = *--pfrom;
2125 
2126   store_op2 (op, loc, arg1, arg2);
2127 }
2128 
2129 
2130 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2131    after an alternative or a begin-subexpression.  We assume there is at
2132    least one character before the ^.  */
2133 
2134 static boolean
2135 at_begline_loc_p (pattern, p, syntax)
2136     const char *pattern, *p;
2137     reg_syntax_t syntax;
2138 {
2139   const char *prev = p - 2;
2140   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2141 
2142   return
2143        /* After a subexpression?  */
2144        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2145        /* After an alternative?  */
2146     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2147 }
2148 
2149 
2150 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2151    at least one character after the $, i.e., `P < PEND'.  */
2152 
2153 static boolean
2154 at_endline_loc_p (p, pend, syntax)
2155     const char *p, *pend;
2156     int syntax;
2157 {
2158   const char *next = p;
2159   boolean next_backslash = *next == '\\';
2160   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2161 
2162   return
2163        /* Before a subexpression?  */
2164        (syntax & RE_NO_BK_PARENS ? *next == ')'
2165         : next_backslash && next_next && *next_next == ')')
2166        /* Before an alternative?  */
2167     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2168         : next_backslash && next_next && *next_next == '|');
2169 }
2170 
2171 
2172 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2173    false if it's not.  */
2174 
2175 static boolean
2176 group_in_compile_stack (compile_stack, regnum)
2177     compile_stack_type compile_stack;
2178     regnum_t regnum;
2179 {
2180   int this_element;
2181 
2182   for (this_element = compile_stack.avail - 1;
2183        this_element >= 0;
2184        this_element--)
2185     if (compile_stack.stack[this_element].regnum == regnum)
2186       return true;
2187 
2188   return false;
2189 }
2190 
2191 
2192 /* Read the ending character of a range (in a bracket expression) from the
2193    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2194    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2195    Then we set the translation of all bits between the starting and
2196    ending characters (inclusive) in the compiled pattern B.
2197 
2198    Return an error code.
2199 
2200    We use these short variable names so we can use the same macros as
2201    `regex_compile' itself.  */
2202 
2203 static reg_errcode_t
2204 compile_range (p_ptr, pend, translate, syntax, b)
2205     const char **p_ptr, *pend;
2206     char *translate;
2207     reg_syntax_t syntax;
2208     unsigned char *b;
2209 {
2210   unsigned this_char;
2211 
2212   const char *p = *p_ptr;
2213   int range_start, range_end;
2214 
2215   if (p == pend)
2216     return REG_ERANGE;
2217 
2218   /* Even though the pattern is a signed `char *', we need to fetch
2219      with unsigned char *'s; if the high bit of the pattern character
2220      is set, the range endpoints will be negative if we fetch using a
2221      signed char *.
2222 
2223      We also want to fetch the endpoints without translating them; the
2224      appropriate translation is done in the bit-setting loop below.  */
2225   range_start = ((unsigned char *) p)[-2];
2226   range_end   = ((unsigned char *) p)[0];
2227 
2228   /* Have to increment the pointer into the pattern string, so the
2229      caller isn't still at the ending character.  */
2230   (*p_ptr)++;
2231 
2232   /* If the start is after the end, the range is empty.  */
2233   if (range_start > range_end)
2234     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2235 
2236   /* Here we see why `this_char' has to be larger than an `unsigned
2237      char' -- the range is inclusive, so if `range_end' == 0xff
2238      (assuming 8-bit characters), we would otherwise go into an infinite
2239      loop, since all characters <= 0xff.  */
2240   for (this_char = range_start; this_char <= range_end; this_char++)
2241     {
2242       SET_LIST_BIT (TRANSLATE (this_char));
2243     }
2244 
2245   return REG_NOERROR;
2246 }
2247 
2248 /* Failure stack declarations and macros; both re_compile_fastmap and
2249    re_match_2 use a failure stack.  These have to be macros because of
2250    REGEX_ALLOCATE.  */
2251 
2252 
2253 /* Number of failure points for which to initially allocate space
2254    when matching.  If this number is exceeded, we allocate more
2255    space, so it is not a hard limit.  */
2256 #ifndef INIT_FAILURE_ALLOC
2257 #define INIT_FAILURE_ALLOC 5
2258 #endif
2259 
2260 /* Roughly the maximum number of failure points on the stack.  Would be
2261    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2262    This is a variable only so users of regex can assign to it; we never
2263    change it ourselves.  */
2264 int re_max_failures = 2000;
2265 
2266 typedef const unsigned char *fail_stack_elt_t;
2267 
2268 typedef struct
2269 {
2270   fail_stack_elt_t *stack;
2271   unsigned size;
2272   unsigned avail;			/* Offset of next open position.  */
2273 } fail_stack_type;
2274 
2275 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2276 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2277 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2278 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2279 
2280 
2281 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2282 
2283 #define INIT_FAIL_STACK()						\
2284   do {									\
2285     fail_stack.stack = (fail_stack_elt_t *)				\
2286       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2287 									\
2288     if (fail_stack.stack == NULL)					\
2289       return -2;							\
2290 									\
2291     fail_stack.size = INIT_FAILURE_ALLOC;				\
2292     fail_stack.avail = 0;						\
2293   } while (0)
2294 
2295 
2296 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2297 
2298    Return 1 if succeeds, and 0 if either ran out of memory
2299    allocating space for it or it was already too large.
2300 
2301    REGEX_REALLOCATE requires `destination' be declared.   */
2302 
2303 #define DOUBLE_FAIL_STACK(fail_stack)					\
2304   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2305    ? 0									\
2306    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2307         REGEX_REALLOCATE ((fail_stack).stack, 				\
2308           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2309           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2310 									\
2311       (fail_stack).stack == NULL					\
2312       ? 0								\
2313       : ((fail_stack).size <<= 1, 					\
2314          1)))
2315 
2316 
2317 /* Push PATTERN_OP on FAIL_STACK.
2318 
2319    Return 1 if was able to do so and 0 if ran out of memory allocating
2320    space to do so.  */
2321 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2322   ((FAIL_STACK_FULL ()							\
2323     && !DOUBLE_FAIL_STACK (fail_stack))					\
2324     ? 0									\
2325     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2326        1))
2327 
2328 /* This pushes an item onto the failure stack.  Must be a four-byte
2329    value.  Assumes the variable `fail_stack'.  Probably should only
2330    be called from within `PUSH_FAILURE_POINT'.  */
2331 #define PUSH_FAILURE_ITEM(item)						\
2332   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2333 
2334 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2335 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2336 
2337 /* Used to omit pushing failure point id's when we're not debugging.  */
2338 #ifdef DEBUG
2339 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2340 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2341 #else
2342 #define DEBUG_PUSH(item)
2343 #define DEBUG_POP(item_addr)
2344 #endif
2345 
2346 
2347 /* Push the information about the state we will need
2348    if we ever fail back to it.
2349 
2350    Requires variables fail_stack, regstart, regend, reg_info, and
2351    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2352    declared.
2353 
2354    Does `return FAILURE_CODE' if runs out of memory.  */
2355 
2356 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2357   do {									\
2358     char *destination;							\
2359     /* Must be int, so when we don't save any registers, the arithmetic	\
2360        of 0 + -1 isn't done as unsigned.  */				\
2361     int this_reg;							\
2362     									\
2363     DEBUG_STATEMENT (failure_id++);					\
2364     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2365     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2366     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2367     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2368 									\
2369     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2370     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2371 									\
2372     /* Ensure we have enough space allocated for what we will push.  */	\
2373     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2374       {									\
2375         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2376           return failure_code;						\
2377 									\
2378         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2379 		       (fail_stack).size);				\
2380         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2381       }									\
2382 									\
2383     /* Push the info, starting with the registers.  */			\
2384     DEBUG_PRINT1 ("\n");						\
2385 									\
2386     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2387          this_reg++)							\
2388       {									\
2389 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2390         DEBUG_STATEMENT (num_regs_pushed++);				\
2391 									\
2392 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2393         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2394                                                                         \
2395 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2396         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2397 									\
2398 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2399         DEBUG_PRINT2 (" match_null=%d",					\
2400                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2401         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2402         DEBUG_PRINT2 (" matched_something=%d",				\
2403                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2404         DEBUG_PRINT2 (" ever_matched=%d",				\
2405                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2406 	DEBUG_PRINT1 ("\n");						\
2407         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2408       }									\
2409 									\
2410     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2411     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2412 									\
2413     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2414     PUSH_FAILURE_ITEM (highest_active_reg);				\
2415 									\
2416     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2417     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2418     PUSH_FAILURE_ITEM (pattern_place);					\
2419 									\
2420     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2421     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2422 				 size2);				\
2423     DEBUG_PRINT1 ("'\n");						\
2424     PUSH_FAILURE_ITEM (string_place);					\
2425 									\
2426     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2427     DEBUG_PUSH (failure_id);						\
2428   } while (0)
2429 
2430 /* This is the number of items that are pushed and popped on the stack
2431    for each register.  */
2432 #define NUM_REG_ITEMS  3
2433 
2434 /* Individual items aside from the registers.  */
2435 #ifdef DEBUG
2436 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2437 #else
2438 #define NUM_NONREG_ITEMS 4
2439 #endif
2440 
2441 /* We push at most this many items on the stack.  */
2442 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2443 
2444 /* We actually push this many items.  */
2445 #define NUM_FAILURE_ITEMS						\
2446   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2447     + NUM_NONREG_ITEMS)
2448 
2449 /* How many items can still be added to the stack without overflowing it.  */
2450 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2451 
2452 
2453 /* Pops what PUSH_FAIL_STACK pushes.
2454 
2455    We restore into the parameters, all of which should be lvalues:
2456      STR -- the saved data position.
2457      PAT -- the saved pattern position.
2458      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2459      REGSTART, REGEND -- arrays of string positions.
2460      REG_INFO -- array of information about each subexpression.
2461 
2462    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2463    `pend', `string1', `size1', `string2', and `size2'.  */
2464 
2465 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2466 {									\
2467   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2468   int this_reg;								\
2469   const unsigned char *string_temp;					\
2470 									\
2471   assert (!FAIL_STACK_EMPTY ());					\
2472 									\
2473   /* Remove failure points and point to how many regs pushed.  */	\
2474   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2475   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2476   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2477 									\
2478   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2479 									\
2480   DEBUG_POP (&failure_id);						\
2481   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2482 									\
2483   /* If the saved string location is NULL, it came from an		\
2484      on_failure_keep_string_jump opcode, and we want to throw away the	\
2485      saved NULL, thus retaining our current position in the string.  */	\
2486   string_temp = POP_FAILURE_ITEM ();					\
2487   if (string_temp != NULL)						\
2488     str = (const char *) string_temp;					\
2489 									\
2490   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2491   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2492   DEBUG_PRINT1 ("'\n");							\
2493 									\
2494   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2495   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2496   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2497 									\
2498   /* Restore register info.  */						\
2499   high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2500   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2501 									\
2502   low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2503   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2504 									\
2505   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2506     {									\
2507       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2508 									\
2509       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2510       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2511 									\
2512       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2513       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2514 									\
2515       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2516       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2517     }									\
2518 									\
2519   DEBUG_STATEMENT (nfailure_points_popped++);				\
2520 } /* POP_FAILURE_POINT */
2521 
2522 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2523    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2524    characters can start a string that matches the pattern.  This fastmap
2525    is used by re_search to skip quickly over impossible starting points.
2526 
2527    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2528    area as BUFP->fastmap.
2529 
2530    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2531    the pattern buffer.
2532 
2533    Returns 0 if we succeed, -2 if an internal error.   */
2534 
2535 int
2536 re_compile_fastmap (bufp)
2537      struct re_pattern_buffer *bufp;
2538 {
2539   int j, k;
2540   fail_stack_type fail_stack;
2541 #ifndef REGEX_MALLOC
2542   char *destination;
2543 #endif
2544   /* We don't push any register information onto the failure stack.  */
2545   unsigned num_regs = 0;
2546 
2547   register char *fastmap = bufp->fastmap;
2548   unsigned char *pattern = bufp->buffer;
2549   unsigned long size = bufp->used;
2550   const unsigned char *p = pattern;
2551   register unsigned char *pend = pattern + size;
2552 
2553   /* Assume that each path through the pattern can be null until
2554      proven otherwise.  We set this false at the bottom of switch
2555      statement, to which we get only if a particular path doesn't
2556      match the empty string.  */
2557   boolean path_can_be_null = true;
2558 
2559   /* We aren't doing a `succeed_n' to begin with.  */
2560   boolean succeed_n_p = false;
2561 
2562   assert (fastmap != NULL && p != NULL);
2563 
2564   INIT_FAIL_STACK ();
2565   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2566   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2567   bufp->can_be_null = 0;
2568 
2569   while (p != pend || !FAIL_STACK_EMPTY ())
2570     {
2571       if (p == pend)
2572         {
2573           bufp->can_be_null |= path_can_be_null;
2574 
2575           /* Reset for next path.  */
2576           path_can_be_null = true;
2577 
2578           p = fail_stack.stack[--fail_stack.avail];
2579 	}
2580 
2581       /* We should never be about to go beyond the end of the pattern.  */
2582       assert (p < pend);
2583 
2584 #ifdef SWITCH_ENUM_BUG
2585       switch ((int) ((re_opcode_t) *p++))
2586 #else
2587       switch ((re_opcode_t) *p++)
2588 #endif
2589 	{
2590 
2591         /* I guess the idea here is to simply not bother with a fastmap
2592            if a backreference is used, since it's too hard to figure out
2593            the fastmap for the corresponding group.  Setting
2594            `can_be_null' stops `re_search_2' from using the fastmap, so
2595            that is all we do.  */
2596 	case duplicate:
2597 	  bufp->can_be_null = 1;
2598           return 0;
2599 
2600 
2601       /* Following are the cases which match a character.  These end
2602          with `break'.  */
2603 
2604 	case exactn:
2605           fastmap[p[1]] = 1;
2606 	  break;
2607 
2608 
2609         case charset:
2610           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2611 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2612               fastmap[j] = 1;
2613 	  break;
2614 
2615 
2616 	case charset_not:
2617 	  /* Chars beyond end of map must be allowed.  */
2618 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2619             fastmap[j] = 1;
2620 
2621 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2622 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2623               fastmap[j] = 1;
2624           break;
2625 
2626 
2627 	case wordchar:
2628 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2629 	    if (SYNTAX (j) == Sword)
2630 	      fastmap[j] = 1;
2631 	  break;
2632 
2633 
2634 	case notwordchar:
2635 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2636 	    if (SYNTAX (j) != Sword)
2637 	      fastmap[j] = 1;
2638 	  break;
2639 
2640 
2641         case anychar:
2642           /* `.' matches anything ...  */
2643 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2644             fastmap[j] = 1;
2645 
2646           /* ... except perhaps newline.  */
2647           if (!(bufp->syntax & RE_DOT_NEWLINE))
2648             fastmap['\n'] = 0;
2649 
2650           /* Return if we have already set `can_be_null'; if we have,
2651              then the fastmap is irrelevant.  Something's wrong here.  */
2652 	  else if (bufp->can_be_null)
2653 	    return 0;
2654 
2655           /* Otherwise, have to check alternative paths.  */
2656 	  break;
2657 
2658 
2659 #ifdef emacs
2660         case syntaxspec:
2661 	  k = *p++;
2662 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2663 	    if (SYNTAX (j) == (enum syntaxcode) k)
2664 	      fastmap[j] = 1;
2665 	  break;
2666 
2667 
2668 	case notsyntaxspec:
2669 	  k = *p++;
2670 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2671 	    if (SYNTAX (j) != (enum syntaxcode) k)
2672 	      fastmap[j] = 1;
2673 	  break;
2674 
2675 
2676       /* All cases after this match the empty string.  These end with
2677          `continue'.  */
2678 
2679 
2680 	case before_dot:
2681 	case at_dot:
2682 	case after_dot:
2683           continue;
2684 #endif /* not emacs */
2685 
2686 
2687         case no_op:
2688         case begline:
2689         case endline:
2690 	case begbuf:
2691 	case endbuf:
2692 	case wordbound:
2693 	case notwordbound:
2694 	case wordbeg:
2695 	case wordend:
2696         case push_dummy_failure:
2697           continue;
2698 
2699 
2700 	case jump_n:
2701         case pop_failure_jump:
2702 	case maybe_pop_jump:
2703 	case jump:
2704         case jump_past_alt:
2705 	case dummy_failure_jump:
2706           EXTRACT_NUMBER_AND_INCR (j, p);
2707 	  p += j;
2708 	  if (j > 0)
2709 	    continue;
2710 
2711           /* Jump backward implies we just went through the body of a
2712              loop and matched nothing.  Opcode jumped to should be
2713              `on_failure_jump' or `succeed_n'.  Just treat it like an
2714              ordinary jump.  For a * loop, it has pushed its failure
2715              point already; if so, discard that as redundant.  */
2716           if ((re_opcode_t) *p != on_failure_jump
2717 	      && (re_opcode_t) *p != succeed_n)
2718 	    continue;
2719 
2720           p++;
2721           EXTRACT_NUMBER_AND_INCR (j, p);
2722           p += j;
2723 
2724           /* If what's on the stack is where we are now, pop it.  */
2725           if (!FAIL_STACK_EMPTY ()
2726 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2727             fail_stack.avail--;
2728 
2729           continue;
2730 
2731 
2732         case on_failure_jump:
2733         case on_failure_keep_string_jump:
2734 	handle_on_failure_jump:
2735           EXTRACT_NUMBER_AND_INCR (j, p);
2736 
2737           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2738              end of the pattern.  We don't want to push such a point,
2739              since when we restore it above, entering the switch will
2740              increment `p' past the end of the pattern.  We don't need
2741              to push such a point since we obviously won't find any more
2742              fastmap entries beyond `pend'.  Such a pattern can match
2743              the null string, though.  */
2744           if (p + j < pend)
2745             {
2746               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2747                 return -2;
2748             }
2749           else
2750             bufp->can_be_null = 1;
2751 
2752           if (succeed_n_p)
2753             {
2754               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2755               succeed_n_p = false;
2756 	    }
2757 
2758           continue;
2759 
2760 
2761 	case succeed_n:
2762           /* Get to the number of times to succeed.  */
2763           p += 2;
2764 
2765           /* Increment p past the n for when k != 0.  */
2766           EXTRACT_NUMBER_AND_INCR (k, p);
2767           if (k == 0)
2768 	    {
2769               p -= 4;
2770   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2771               goto handle_on_failure_jump;
2772             }
2773           continue;
2774 
2775 
2776 	case set_number_at:
2777           p += 4;
2778           continue;
2779 
2780 
2781 	case start_memory:
2782         case stop_memory:
2783 	  p += 2;
2784 	  continue;
2785 
2786 
2787 	default:
2788           abort (); /* We have listed all the cases.  */
2789         } /* switch *p++ */
2790 
2791       /* Getting here means we have found the possible starting
2792          characters for one path of the pattern -- and that the empty
2793          string does not match.  We need not follow this path further.
2794          Instead, look at the next alternative (remembered on the
2795          stack), or quit if no more.  The test at the top of the loop
2796          does these things.  */
2797       path_can_be_null = false;
2798       p = pend;
2799     } /* while p */
2800 
2801   /* Set `can_be_null' for the last path (also the first path, if the
2802      pattern is empty).  */
2803   bufp->can_be_null |= path_can_be_null;
2804   return 0;
2805 } /* re_compile_fastmap */
2806 
2807 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2808    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2809    this memory for recording register information.  STARTS and ENDS
2810    must be allocated using the malloc library routine, and must each
2811    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2812 
2813    If NUM_REGS == 0, then subsequent matches should allocate their own
2814    register data.
2815 
2816    Unless this function is called, the first search or match using
2817    PATTERN_BUFFER will allocate its own register data, without
2818    freeing the old data.  */
2819 
2820 void
2821 re_set_registers (bufp, regs, num_regs, starts, ends)
2822     struct re_pattern_buffer *bufp;
2823     struct re_registers *regs;
2824     unsigned num_regs;
2825     regoff_t *starts, *ends;
2826 {
2827   if (num_regs)
2828     {
2829       bufp->regs_allocated = REGS_REALLOCATE;
2830       regs->num_regs = num_regs;
2831       regs->start = starts;
2832       regs->end = ends;
2833     }
2834   else
2835     {
2836       bufp->regs_allocated = REGS_UNALLOCATED;
2837       regs->num_regs = 0;
2838       regs->start = regs->end = (regoff_t *) 0;
2839     }
2840 }
2841 
2842 /* Searching routines.  */
2843 
2844 /* Like re_search_2, below, but only one string is specified, and
2845    doesn't let you say where to stop matching. */
2846 
2847 int
2848 re_search (bufp, string, size, startpos, range, regs)
2849      struct re_pattern_buffer *bufp;
2850      const char *string;
2851      int size, startpos, range;
2852      struct re_registers *regs;
2853 {
2854   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2855 		      regs, size);
2856 }
2857 
2858 
2859 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2860    virtual concatenation of STRING1 and STRING2, starting first at index
2861    STARTPOS, then at STARTPOS + 1, and so on.
2862 
2863    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2864 
2865    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2866    only at STARTPOS; in general, the last start tried is STARTPOS +
2867    RANGE.
2868 
2869    In REGS, return the indices of the virtual concatenation of STRING1
2870    and STRING2 that matched the entire BUFP->buffer and its contained
2871    subexpressions.
2872 
2873    Do not consider matching one past the index STOP in the virtual
2874    concatenation of STRING1 and STRING2.
2875 
2876    We return either the position in the strings at which the match was
2877    found, -1 if no match, or -2 if error (such as failure
2878    stack overflow).  */
2879 
2880 int
2881 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2882      struct re_pattern_buffer *bufp;
2883      const char *string1, *string2;
2884      int size1, size2;
2885      int startpos;
2886      int range;
2887      struct re_registers *regs;
2888      int stop;
2889 {
2890   int val;
2891   register char *fastmap = bufp->fastmap;
2892   register char *translate = bufp->translate;
2893   int total_size = size1 + size2;
2894   int endpos = startpos + range;
2895 
2896   /* Check for out-of-range STARTPOS.  */
2897   if (startpos < 0 || startpos > total_size)
2898     return -1;
2899 
2900   /* Fix up RANGE if it might eventually take us outside
2901      the virtual concatenation of STRING1 and STRING2.  */
2902   if (endpos < -1)
2903     range = -1 - startpos;
2904   else if (endpos > total_size)
2905     range = total_size - startpos;
2906 
2907   /* If the search isn't to be a backwards one, don't waste time in a
2908      search for a pattern that must be anchored.  */
2909   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2910     {
2911       if (startpos > 0)
2912 	return -1;
2913       else
2914 	range = 1;
2915     }
2916 
2917   /* Update the fastmap now if not correct already.  */
2918   if (fastmap && !bufp->fastmap_accurate)
2919     if (re_compile_fastmap (bufp) == -2)
2920       return -2;
2921 
2922   /* Loop through the string, looking for a place to start matching.  */
2923   for (;;)
2924     {
2925       /* If a fastmap is supplied, skip quickly over characters that
2926          cannot be the start of a match.  If the pattern can match the
2927          null string, however, we don't need to skip characters; we want
2928          the first null string.  */
2929       if (fastmap && startpos < total_size && !bufp->can_be_null)
2930 	{
2931 	  if (range > 0)	/* Searching forwards.  */
2932 	    {
2933 	      register const char *d;
2934 	      register int lim = 0;
2935 	      int irange = range;
2936 
2937               if (startpos < size1 && startpos + range >= size1)
2938                 lim = range - (size1 - startpos);
2939 
2940 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2941 
2942               /* Written out as an if-else to avoid testing `translate'
2943                  inside the loop.  */
2944 	      if (translate)
2945                 while (range > lim
2946                        && !fastmap[(unsigned char)
2947 				   translate[(unsigned char) *d++]])
2948                   range--;
2949 	      else
2950                 while (range > lim && !fastmap[(unsigned char) *d++])
2951                   range--;
2952 
2953 	      startpos += irange - range;
2954 	    }
2955 	  else				/* Searching backwards.  */
2956 	    {
2957 	      register char c = (size1 == 0 || startpos >= size1
2958                                  ? string2[startpos - size1]
2959                                  : string1[startpos]);
2960 
2961 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2962 		goto advance;
2963 	    }
2964 	}
2965 
2966       /* If can't match the null string, and that's all we have left, fail.  */
2967       if (range >= 0 && startpos == total_size && fastmap
2968           && !bufp->can_be_null)
2969 	return -1;
2970 
2971       val = re_match_2 (bufp, string1, size1, string2, size2,
2972 	                startpos, regs, stop);
2973       if (val >= 0)
2974 	return startpos;
2975 
2976       if (val == -2)
2977 	return -2;
2978 
2979     advance:
2980       if (!range)
2981         break;
2982       else if (range > 0)
2983         {
2984           range--;
2985           startpos++;
2986         }
2987       else
2988         {
2989           range++;
2990           startpos--;
2991         }
2992     }
2993   return -1;
2994 } /* re_search_2 */
2995 
2996 /* Declarations and macros for re_match_2.  */
2997 
2998 static int bcmp_translate ();
2999 static boolean alt_match_null_string_p (),
3000                common_op_match_null_string_p (),
3001                group_match_null_string_p ();
3002 
3003 /* Structure for per-register (a.k.a. per-group) information.
3004    This must not be longer than one word, because we push this value
3005    onto the failure stack.  Other register information, such as the
3006    starting and ending positions (which are addresses), and the list of
3007    inner groups (which is a bits list) are maintained in separate
3008    variables.
3009 
3010    We are making a (strictly speaking) nonportable assumption here: that
3011    the compiler will pack our bit fields into something that fits into
3012    the type of `word', i.e., is something that fits into one item on the
3013    failure stack.  */
3014 typedef union
3015 {
3016   fail_stack_elt_t word;
3017   struct
3018   {
3019       /* This field is one if this group can match the empty string,
3020          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3021 #define MATCH_NULL_UNSET_VALUE 3
3022     unsigned match_null_string_p : 2;
3023     unsigned is_active : 1;
3024     unsigned matched_something : 1;
3025     unsigned ever_matched_something : 1;
3026   } bits;
3027 } register_info_type;
3028 
3029 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3030 #define IS_ACTIVE(R)  ((R).bits.is_active)
3031 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3032 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3033 
3034 
3035 /* Call this when have matched a real character; it sets `matched' flags
3036    for the subexpressions which we are currently inside.  Also records
3037    that those subexprs have matched.  */
3038 #define SET_REGS_MATCHED()						\
3039   do									\
3040     {									\
3041       unsigned r;							\
3042       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3043         {								\
3044           MATCHED_SOMETHING (reg_info[r])				\
3045             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3046             = 1;							\
3047         }								\
3048     }									\
3049   while (0)
3050 
3051 
3052 /* This converts PTR, a pointer into one of the search strings `string1'
3053    and `string2' into an offset from the beginning of that string.  */
3054 #define POINTER_TO_OFFSET(ptr)						\
3055   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3056 
3057 /* Registers are set to a sentinel when they haven't yet matched.  */
3058 #define REG_UNSET_VALUE ((char *) -1)
3059 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3060 
3061 
3062 /* Macros for dealing with the split strings in re_match_2.  */
3063 
3064 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3065 
3066 /* Call before fetching a character with *d.  This switches over to
3067    string2 if necessary.  */
3068 #define PREFETCH()							\
3069   while (d == dend)						    	\
3070     {									\
3071       /* End of string2 => fail.  */					\
3072       if (dend == end_match_2) 						\
3073         goto fail;							\
3074       /* End of string1 => advance to string2.  */ 			\
3075       d = string2;						        \
3076       dend = end_match_2;						\
3077     }
3078 
3079 
3080 /* Test if at very beginning or at very end of the virtual concatenation
3081    of `string1' and `string2'.  If only one string, it's `string2'.  */
3082 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3083 #define AT_STRINGS_END(d) ((d) == end2)
3084 
3085 
3086 /* Test if D points to a character which is word-constituent.  We have
3087    two special cases to check for: if past the end of string1, look at
3088    the first character in string2; and if before the beginning of
3089    string2, look at the last character in string1.  */
3090 #define WORDCHAR_P(d)							\
3091   (SYNTAX ((d) == end1 ? *string2					\
3092            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3093    == Sword)
3094 
3095 /* Test if the character before D and the one at D differ with respect
3096    to being word-constituent.  */
3097 #define AT_WORD_BOUNDARY(d)						\
3098   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3099    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3100 
3101 
3102 /* Free everything we malloc.  */
3103 #ifdef REGEX_MALLOC
3104 #define FREE_VAR(var) if (var) free (var); var = NULL
3105 #define FREE_VARIABLES()						\
3106   do {									\
3107     FREE_VAR (fail_stack.stack);					\
3108     FREE_VAR (regstart);						\
3109     FREE_VAR (regend);							\
3110     FREE_VAR (old_regstart);						\
3111     FREE_VAR (old_regend);						\
3112     FREE_VAR (best_regstart);						\
3113     FREE_VAR (best_regend);						\
3114     FREE_VAR (reg_info);						\
3115     FREE_VAR (reg_dummy);						\
3116     FREE_VAR (reg_info_dummy);						\
3117   } while (0)
3118 #else /* not REGEX_MALLOC */
3119 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3120 #define FREE_VARIABLES() alloca (0)
3121 #endif /* not REGEX_MALLOC */
3122 
3123 
3124 /* These values must meet several constraints.  They must not be valid
3125    register values; since we have a limit of 255 registers (because
3126    we use only one byte in the pattern for the register number), we can
3127    use numbers larger than 255.  They must differ by 1, because of
3128    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3129    be larger than the value for the highest register, so we do not try
3130    to actually save any registers when none are active.  */
3131 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3132 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3133 
3134 /* Matching routines.  */
3135 
3136 #ifndef emacs   /* Emacs never uses this.  */
3137 /* re_match is like re_match_2 except it takes only a single string.  */
3138 
3139 int
3140 re_match (bufp, string, size, pos, regs)
3141      struct re_pattern_buffer *bufp;
3142      const char *string;
3143      int size, pos;
3144      struct re_registers *regs;
3145  {
3146   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3147 }
3148 #endif /* not emacs */
3149 
3150 
3151 /* re_match_2 matches the compiled pattern in BUFP against the
3152    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3153    and SIZE2, respectively).  We start matching at POS, and stop
3154    matching at STOP.
3155 
3156    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3157    store offsets for the substring each group matched in REGS.  See the
3158    documentation for exactly how many groups we fill.
3159 
3160    We return -1 if no match, -2 if an internal error (such as the
3161    failure stack overflowing).  Otherwise, we return the length of the
3162    matched substring.  */
3163 
3164 int
3165 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3166      struct re_pattern_buffer *bufp;
3167      const char *string1, *string2;
3168      int size1, size2;
3169      int pos;
3170      struct re_registers *regs;
3171      int stop;
3172 {
3173   /* General temporaries.  */
3174   int mcnt;
3175   unsigned char *p1;
3176 
3177   /* Just past the end of the corresponding string.  */
3178   const char *end1, *end2;
3179 
3180   /* Pointers into string1 and string2, just past the last characters in
3181      each to consider matching.  */
3182   const char *end_match_1, *end_match_2;
3183 
3184   /* Where we are in the data, and the end of the current string.  */
3185   const char *d, *dend;
3186 
3187   /* Where we are in the pattern, and the end of the pattern.  */
3188   unsigned char *p = bufp->buffer;
3189   register unsigned char *pend = p + bufp->used;
3190 
3191   /* We use this to map every character in the string.  */
3192   char *translate = bufp->translate;
3193 
3194   /* Failure point stack.  Each place that can handle a failure further
3195      down the line pushes a failure point on this stack.  It consists of
3196      restart, regend, and reg_info for all registers corresponding to
3197      the subexpressions we're currently inside, plus the number of such
3198      registers, and, finally, two char *'s.  The first char * is where
3199      to resume scanning the pattern; the second one is where to resume
3200      scanning the strings.  If the latter is zero, the failure point is
3201      a ``dummy''; if a failure happens and the failure point is a dummy,
3202      it gets discarded and the next next one is tried.  */
3203   fail_stack_type fail_stack;
3204 #ifdef DEBUG
3205   static unsigned failure_id = 0;
3206   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3207 #endif
3208 
3209   /* We fill all the registers internally, independent of what we
3210      return, for use in backreferences.  The number here includes
3211      an element for register zero.  */
3212   unsigned num_regs = bufp->re_nsub + 1;
3213 
3214   /* The currently active registers.  */
3215   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3216   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3217 
3218   /* Information on the contents of registers. These are pointers into
3219      the input strings; they record just what was matched (on this
3220      attempt) by a subexpression part of the pattern, that is, the
3221      regnum-th regstart pointer points to where in the pattern we began
3222      matching and the regnum-th regend points to right after where we
3223      stopped matching the regnum-th subexpression.  (The zeroth register
3224      keeps track of what the whole pattern matches.)  */
3225   const char **regstart, **regend;
3226 
3227   /* If a group that's operated upon by a repetition operator fails to
3228      match anything, then the register for its start will need to be
3229      restored because it will have been set to wherever in the string we
3230      are when we last see its open-group operator.  Similarly for a
3231      register's end.  */
3232   const char **old_regstart, **old_regend;
3233 
3234   /* The is_active field of reg_info helps us keep track of which (possibly
3235      nested) subexpressions we are currently in. The matched_something
3236      field of reg_info[reg_num] helps us tell whether or not we have
3237      matched any of the pattern so far this time through the reg_num-th
3238      subexpression.  These two fields get reset each time through any
3239      loop their register is in.  */
3240   register_info_type *reg_info;
3241 
3242   /* The following record the register info as found in the above
3243      variables when we find a match better than any we've seen before.
3244      This happens as we backtrack through the failure points, which in
3245      turn happens only if we have not yet matched the entire string. */
3246   unsigned best_regs_set = false;
3247   const char **best_regstart, **best_regend;
3248 
3249   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3250      allocate space for that if we're not allocating space for anything
3251      else (see below).  Also, we never need info about register 0 for
3252      any of the other register vectors, and it seems rather a kludge to
3253      treat `best_regend' differently than the rest.  So we keep track of
3254      the end of the best match so far in a separate variable.  We
3255      initialize this to NULL so that when we backtrack the first time
3256      and need to test it, it's not garbage.  */
3257   const char *match_end = NULL;
3258 
3259   /* Used when we pop values we don't care about.  */
3260   const char **reg_dummy;
3261   register_info_type *reg_info_dummy;
3262 
3263 #ifdef DEBUG
3264   /* Counts the total number of registers pushed.  */
3265   unsigned num_regs_pushed = 0;
3266 #endif
3267 
3268   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3269 
3270   INIT_FAIL_STACK ();
3271 
3272   /* Do not bother to initialize all the register variables if there are
3273      no groups in the pattern, as it takes a fair amount of time.  If
3274      there are groups, we include space for register 0 (the whole
3275      pattern), even though we never use it, since it simplifies the
3276      array indexing.  We should fix this.  */
3277   if (bufp->re_nsub)
3278     {
3279       regstart = REGEX_TALLOC (num_regs, const char *);
3280       regend = REGEX_TALLOC (num_regs, const char *);
3281       old_regstart = REGEX_TALLOC (num_regs, const char *);
3282       old_regend = REGEX_TALLOC (num_regs, const char *);
3283       best_regstart = REGEX_TALLOC (num_regs, const char *);
3284       best_regend = REGEX_TALLOC (num_regs, const char *);
3285       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3286       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3287       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3288 
3289       if (!(regstart && regend && old_regstart && old_regend && reg_info
3290             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3291         {
3292           FREE_VARIABLES ();
3293           return -2;
3294         }
3295     }
3296 #ifdef REGEX_MALLOC
3297   else
3298     {
3299       /* We must initialize all our variables to NULL, so that
3300          `FREE_VARIABLES' doesn't try to free them.  */
3301       regstart = regend = old_regstart = old_regend = best_regstart
3302         = best_regend = reg_dummy = NULL;
3303       reg_info = reg_info_dummy = (register_info_type *) NULL;
3304     }
3305 #endif /* REGEX_MALLOC */
3306 
3307   /* The starting position is bogus.  */
3308   if (pos < 0 || pos > size1 + size2)
3309     {
3310       FREE_VARIABLES ();
3311       return -1;
3312     }
3313 
3314   /* Initialize subexpression text positions to -1 to mark ones that no
3315      start_memory/stop_memory has been seen for. Also initialize the
3316      register information struct.  */
3317   for (mcnt = 1; mcnt < num_regs; mcnt++)
3318     {
3319       regstart[mcnt] = regend[mcnt]
3320         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3321 
3322       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3323       IS_ACTIVE (reg_info[mcnt]) = 0;
3324       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3325       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3326     }
3327 
3328   /* We move `string1' into `string2' if the latter's empty -- but not if
3329      `string1' is null.  */
3330   if (size2 == 0 && string1 != NULL)
3331     {
3332       string2 = string1;
3333       size2 = size1;
3334       string1 = 0;
3335       size1 = 0;
3336     }
3337   end1 = string1 + size1;
3338   end2 = string2 + size2;
3339 
3340   /* Compute where to stop matching, within the two strings.  */
3341   if (stop <= size1)
3342     {
3343       end_match_1 = string1 + stop;
3344       end_match_2 = string2;
3345     }
3346   else
3347     {
3348       end_match_1 = end1;
3349       end_match_2 = string2 + stop - size1;
3350     }
3351 
3352   /* `p' scans through the pattern as `d' scans through the data.
3353      `dend' is the end of the input string that `d' points within.  `d'
3354      is advanced into the following input string whenever necessary, but
3355      this happens before fetching; therefore, at the beginning of the
3356      loop, `d' can be pointing at the end of a string, but it cannot
3357      equal `string2'.  */
3358   if (size1 > 0 && pos <= size1)
3359     {
3360       d = string1 + pos;
3361       dend = end_match_1;
3362     }
3363   else
3364     {
3365       d = string2 + pos - size1;
3366       dend = end_match_2;
3367     }
3368 
3369   DEBUG_PRINT1 ("The compiled pattern is: ");
3370   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3371   DEBUG_PRINT1 ("The string to match is: `");
3372   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3373   DEBUG_PRINT1 ("'\n");
3374 
3375   /* This loops over pattern commands.  It exits by returning from the
3376      function if the match is complete, or it drops through if the match
3377      fails at this starting point in the input data.  */
3378   for (;;)
3379     {
3380       DEBUG_PRINT2 ("\n0x%x: ", p);
3381 
3382       if (p == pend)
3383 	{ /* End of pattern means we might have succeeded.  */
3384           DEBUG_PRINT1 ("end of pattern ... ");
3385 
3386 	  /* If we haven't matched the entire string, and we want the
3387              longest match, try backtracking.  */
3388           if (d != end_match_2)
3389 	    {
3390               DEBUG_PRINT1 ("backtracking.\n");
3391 
3392               if (!FAIL_STACK_EMPTY ())
3393                 { /* More failure points to try.  */
3394                   boolean same_str_p = (FIRST_STRING_P (match_end)
3395 	        	                == MATCHING_IN_FIRST_STRING);
3396 
3397                   /* If exceeds best match so far, save it.  */
3398                   if (!best_regs_set
3399                       || (same_str_p && d > match_end)
3400                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3401                     {
3402                       best_regs_set = true;
3403                       match_end = d;
3404 
3405                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3406 
3407                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3408                         {
3409                           best_regstart[mcnt] = regstart[mcnt];
3410                           best_regend[mcnt] = regend[mcnt];
3411                         }
3412                     }
3413                   goto fail;
3414                 }
3415 
3416               /* If no failure points, don't restore garbage.  */
3417               else if (best_regs_set)
3418                 {
3419   	        restore_best_regs:
3420                   /* Restore best match.  It may happen that `dend ==
3421                      end_match_1' while the restored d is in string2.
3422                      For example, the pattern `x.*y.*z' against the
3423                      strings `x-' and `y-z-', if the two strings are
3424                      not consecutive in memory.  */
3425                   DEBUG_PRINT1 ("Restoring best registers.\n");
3426 
3427                   d = match_end;
3428                   dend = ((d >= string1 && d <= end1)
3429 		           ? end_match_1 : end_match_2);
3430 
3431 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3432 		    {
3433 		      regstart[mcnt] = best_regstart[mcnt];
3434 		      regend[mcnt] = best_regend[mcnt];
3435 		    }
3436                 }
3437             } /* d != end_match_2 */
3438 
3439           DEBUG_PRINT1 ("Accepting match.\n");
3440 
3441           /* If caller wants register contents data back, do it.  */
3442           if (regs && !bufp->no_sub)
3443 	    {
3444               /* Have the register data arrays been allocated?  */
3445               if (bufp->regs_allocated == REGS_UNALLOCATED)
3446                 { /* No.  So allocate them with malloc.  We need one
3447                      extra element beyond `num_regs' for the `-1' marker
3448                      GNU code uses.  */
3449                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3450                   regs->start = TALLOC (regs->num_regs, regoff_t);
3451                   regs->end = TALLOC (regs->num_regs, regoff_t);
3452                   if (regs->start == NULL || regs->end == NULL)
3453                     return -2;
3454                   bufp->regs_allocated = REGS_REALLOCATE;
3455                 }
3456               else if (bufp->regs_allocated == REGS_REALLOCATE)
3457                 { /* Yes.  If we need more elements than were already
3458                      allocated, reallocate them.  If we need fewer, just
3459                      leave it alone.  */
3460                   if (regs->num_regs < num_regs + 1)
3461                     {
3462                       regs->num_regs = num_regs + 1;
3463                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3464                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3465                       if (regs->start == NULL || regs->end == NULL)
3466                         return -2;
3467                     }
3468                 }
3469               else
3470                 assert (bufp->regs_allocated == REGS_FIXED);
3471 
3472               /* Convert the pointer data in `regstart' and `regend' to
3473                  indices.  Register zero has to be set differently,
3474                  since we haven't kept track of any info for it.  */
3475               if (regs->num_regs > 0)
3476                 {
3477                   regs->start[0] = pos;
3478                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3479 			          : d - string2 + size1);
3480                 }
3481 
3482               /* Go through the first `min (num_regs, regs->num_regs)'
3483                  registers, since that is all we initialized.  */
3484 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3485 		{
3486                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3487                     regs->start[mcnt] = regs->end[mcnt] = -1;
3488                   else
3489                     {
3490 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3491                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3492                     }
3493 		}
3494 
3495               /* If the regs structure we return has more elements than
3496                  were in the pattern, set the extra elements to -1.  If
3497                  we (re)allocated the registers, this is the case,
3498                  because we always allocate enough to have at least one
3499                  -1 at the end.  */
3500               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3501                 regs->start[mcnt] = regs->end[mcnt] = -1;
3502 	    } /* regs && !bufp->no_sub */
3503 
3504           FREE_VARIABLES ();
3505           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3506                         nfailure_points_pushed, nfailure_points_popped,
3507                         nfailure_points_pushed - nfailure_points_popped);
3508           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3509 
3510           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3511 			    ? string1
3512 			    : string2 - size1);
3513 
3514           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3515 
3516           return mcnt;
3517         }
3518 
3519       /* Otherwise match next pattern command.  */
3520 #ifdef SWITCH_ENUM_BUG
3521       switch ((int) ((re_opcode_t) *p++))
3522 #else
3523       switch ((re_opcode_t) *p++)
3524 #endif
3525 	{
3526         /* Ignore these.  Used to ignore the n of succeed_n's which
3527            currently have n == 0.  */
3528         case no_op:
3529           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3530           break;
3531 
3532 
3533         /* Match the next n pattern characters exactly.  The following
3534            byte in the pattern defines n, and the n bytes after that
3535            are the characters to match.  */
3536 	case exactn:
3537 	  mcnt = *p++;
3538           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3539 
3540           /* This is written out as an if-else so we don't waste time
3541              testing `translate' inside the loop.  */
3542           if (translate)
3543 	    {
3544 	      do
3545 		{
3546 		  PREFETCH ();
3547 		  if (translate[(unsigned char) *d++] != (char) *p++)
3548                     goto fail;
3549 		}
3550 	      while (--mcnt);
3551 	    }
3552 	  else
3553 	    {
3554 	      do
3555 		{
3556 		  PREFETCH ();
3557 		  if (*d++ != (char) *p++) goto fail;
3558 		}
3559 	      while (--mcnt);
3560 	    }
3561 	  SET_REGS_MATCHED ();
3562           break;
3563 
3564 
3565         /* Match any character except possibly a newline or a null.  */
3566 	case anychar:
3567           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3568 
3569           PREFETCH ();
3570 
3571           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3572               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3573 	    goto fail;
3574 
3575           SET_REGS_MATCHED ();
3576           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3577           d++;
3578 	  break;
3579 
3580 
3581 	case charset:
3582 	case charset_not:
3583 	  {
3584 	    register unsigned char c;
3585 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3586 
3587             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3588 
3589 	    PREFETCH ();
3590 	    c = TRANSLATE (*d); /* The character to match.  */
3591 
3592             /* Cast to `unsigned' instead of `unsigned char' in case the
3593                bit list is a full 32 bytes long.  */
3594 	    if (c < (unsigned) (*p * BYTEWIDTH)
3595 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3596 	      not = !not;
3597 
3598 	    p += 1 + *p;
3599 
3600 	    if (!not) goto fail;
3601 
3602 	    SET_REGS_MATCHED ();
3603             d++;
3604 	    break;
3605 	  }
3606 
3607 
3608         /* The beginning of a group is represented by start_memory.
3609            The arguments are the register number in the next byte, and the
3610            number of groups inner to this one in the next.  The text
3611            matched within the group is recorded (in the internal
3612            registers data structure) under the register number.  */
3613         case start_memory:
3614 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3615 
3616           /* Find out if this group can match the empty string.  */
3617 	  p1 = p;		/* To send to group_match_null_string_p.  */
3618 
3619           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3620             REG_MATCH_NULL_STRING_P (reg_info[*p])
3621               = group_match_null_string_p (&p1, pend, reg_info);
3622 
3623           /* Save the position in the string where we were the last time
3624              we were at this open-group operator in case the group is
3625              operated upon by a repetition operator, e.g., with `(a*)*b'
3626              against `ab'; then we want to ignore where we are now in
3627              the string in case this attempt to match fails.  */
3628           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3629                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3630                              : regstart[*p];
3631 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3632 			 POINTER_TO_OFFSET (old_regstart[*p]));
3633 
3634           regstart[*p] = d;
3635 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3636 
3637           IS_ACTIVE (reg_info[*p]) = 1;
3638           MATCHED_SOMETHING (reg_info[*p]) = 0;
3639 
3640           /* This is the new highest active register.  */
3641           highest_active_reg = *p;
3642 
3643           /* If nothing was active before, this is the new lowest active
3644              register.  */
3645           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3646             lowest_active_reg = *p;
3647 
3648           /* Move past the register number and inner group count.  */
3649           p += 2;
3650           break;
3651 
3652 
3653         /* The stop_memory opcode represents the end of a group.  Its
3654            arguments are the same as start_memory's: the register
3655            number, and the number of inner groups.  */
3656 	case stop_memory:
3657 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3658 
3659           /* We need to save the string position the last time we were at
3660              this close-group operator in case the group is operated
3661              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3662              against `aba'; then we want to ignore where we are now in
3663              the string in case this attempt to match fails.  */
3664           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3665                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3666 			   : regend[*p];
3667 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3668 			 POINTER_TO_OFFSET (old_regend[*p]));
3669 
3670           regend[*p] = d;
3671 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3672 
3673           /* This register isn't active anymore.  */
3674           IS_ACTIVE (reg_info[*p]) = 0;
3675 
3676           /* If this was the only register active, nothing is active
3677              anymore.  */
3678           if (lowest_active_reg == highest_active_reg)
3679             {
3680               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3681               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3682             }
3683           else
3684             { /* We must scan for the new highest active register, since
3685                  it isn't necessarily one less than now: consider
3686                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3687                  new highest active register is 1.  */
3688               unsigned char r = *p - 1;
3689               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3690                 r--;
3691 
3692               /* If we end up at register zero, that means that we saved
3693                  the registers as the result of an `on_failure_jump', not
3694                  a `start_memory', and we jumped to past the innermost
3695                  `stop_memory'.  For example, in ((.)*) we save
3696                  registers 1 and 2 as a result of the *, but when we pop
3697                  back to the second ), we are at the stop_memory 1.
3698                  Thus, nothing is active.  */
3699 	      if (r == 0)
3700                 {
3701                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3702                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3703                 }
3704               else
3705                 highest_active_reg = r;
3706             }
3707 
3708           /* If just failed to match something this time around with a
3709              group that's operated on by a repetition operator, try to
3710              force exit from the ``loop'', and restore the register
3711              information for this group that we had before trying this
3712              last match.  */
3713           if ((!MATCHED_SOMETHING (reg_info[*p])
3714                || (re_opcode_t) p[-3] == start_memory)
3715 	      && (p + 2) < pend)
3716             {
3717               boolean is_a_jump_n = false;
3718 
3719               p1 = p + 2;
3720               mcnt = 0;
3721               switch ((re_opcode_t) *p1++)
3722                 {
3723                   case jump_n:
3724 		    is_a_jump_n = true;
3725                   case pop_failure_jump:
3726 		  case maybe_pop_jump:
3727 		  case jump:
3728 		  case dummy_failure_jump:
3729                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3730 		    if (is_a_jump_n)
3731 		      p1 += 2;
3732                     break;
3733 
3734                   default:
3735                     /* do nothing */ ;
3736                 }
3737 	      p1 += mcnt;
3738 
3739               /* If the next operation is a jump backwards in the pattern
3740 	         to an on_failure_jump right before the start_memory
3741                  corresponding to this stop_memory, exit from the loop
3742                  by forcing a failure after pushing on the stack the
3743                  on_failure_jump's jump in the pattern, and d.  */
3744               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3745                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3746 		{
3747                   /* If this group ever matched anything, then restore
3748                      what its registers were before trying this last
3749                      failed match, e.g., with `(a*)*b' against `ab' for
3750                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3751                      against `aba' for regend[3].
3752 
3753                      Also restore the registers for inner groups for,
3754                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3755                      otherwise get trashed).  */
3756 
3757                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3758 		    {
3759 		      unsigned r;
3760 
3761                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3762 
3763 		      /* Restore this and inner groups' (if any) registers.  */
3764                       for (r = *p; r < *p + *(p + 1); r++)
3765                         {
3766                           regstart[r] = old_regstart[r];
3767 
3768                           /* xx why this test?  */
3769                           if ((int) old_regend[r] >= (int) regstart[r])
3770                             regend[r] = old_regend[r];
3771                         }
3772                     }
3773 		  p1++;
3774                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3775                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3776 
3777                   goto fail;
3778                 }
3779             }
3780 
3781           /* Move past the register number and the inner group count.  */
3782           p += 2;
3783           break;
3784 
3785 
3786 	/* \<digit> has been turned into a `duplicate' command which is
3787            followed by the numeric value of <digit> as the register number.  */
3788         case duplicate:
3789 	  {
3790 	    register const char *d2, *dend2;
3791 	    int regno = *p++;   /* Get which register to match against.  */
3792 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3793 
3794 	    /* Can't back reference a group which we've never matched.  */
3795             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3796               goto fail;
3797 
3798             /* Where in input to try to start matching.  */
3799             d2 = regstart[regno];
3800 
3801             /* Where to stop matching; if both the place to start and
3802                the place to stop matching are in the same string, then
3803                set to the place to stop, otherwise, for now have to use
3804                the end of the first string.  */
3805 
3806             dend2 = ((FIRST_STRING_P (regstart[regno])
3807 		      == FIRST_STRING_P (regend[regno]))
3808 		     ? regend[regno] : end_match_1);
3809 	    for (;;)
3810 	      {
3811 		/* If necessary, advance to next segment in register
3812                    contents.  */
3813 		while (d2 == dend2)
3814 		  {
3815 		    if (dend2 == end_match_2) break;
3816 		    if (dend2 == regend[regno]) break;
3817 
3818                     /* End of string1 => advance to string2. */
3819                     d2 = string2;
3820                     dend2 = regend[regno];
3821 		  }
3822 		/* At end of register contents => success */
3823 		if (d2 == dend2) break;
3824 
3825 		/* If necessary, advance to next segment in data.  */
3826 		PREFETCH ();
3827 
3828 		/* How many characters left in this segment to match.  */
3829 		mcnt = dend - d;
3830 
3831 		/* Want how many consecutive characters we can match in
3832                    one shot, so, if necessary, adjust the count.  */
3833                 if (mcnt > dend2 - d2)
3834 		  mcnt = dend2 - d2;
3835 
3836 		/* Compare that many; failure if mismatch, else move
3837                    past them.  */
3838 		if (translate
3839                     ? bcmp_translate (d, d2, mcnt, translate)
3840                     : bcmp (d, d2, mcnt))
3841 		  goto fail;
3842 		d += mcnt, d2 += mcnt;
3843 	      }
3844 	  }
3845 	  break;
3846 
3847 
3848         /* begline matches the empty string at the beginning of the string
3849            (unless `not_bol' is set in `bufp'), and, if
3850            `newline_anchor' is set, after newlines.  */
3851 	case begline:
3852           DEBUG_PRINT1 ("EXECUTING begline.\n");
3853 
3854           if (AT_STRINGS_BEG (d))
3855             {
3856               if (!bufp->not_bol) break;
3857             }
3858           else if (d[-1] == '\n' && bufp->newline_anchor)
3859             {
3860               break;
3861             }
3862           /* In all other cases, we fail.  */
3863           goto fail;
3864 
3865 
3866         /* endline is the dual of begline.  */
3867 	case endline:
3868           DEBUG_PRINT1 ("EXECUTING endline.\n");
3869 
3870           if (AT_STRINGS_END (d))
3871             {
3872               if (!bufp->not_eol) break;
3873             }
3874 
3875           /* We have to ``prefetch'' the next character.  */
3876           else if ((d == end1 ? *string2 : *d) == '\n'
3877                    && bufp->newline_anchor)
3878             {
3879               break;
3880             }
3881           goto fail;
3882 
3883 
3884 	/* Match at the very beginning of the data.  */
3885         case begbuf:
3886           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3887           if (AT_STRINGS_BEG (d))
3888             break;
3889           goto fail;
3890 
3891 
3892 	/* Match at the very end of the data.  */
3893         case endbuf:
3894           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3895 	  if (AT_STRINGS_END (d))
3896 	    break;
3897           goto fail;
3898 
3899 
3900         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3901            pushes NULL as the value for the string on the stack.  Then
3902            `pop_failure_point' will keep the current value for the
3903            string, instead of restoring it.  To see why, consider
3904            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3905            then the . fails against the \n.  But the next thing we want
3906            to do is match the \n against the \n; if we restored the
3907            string value, we would be back at the foo.
3908 
3909            Because this is used only in specific cases, we don't need to
3910            check all the things that `on_failure_jump' does, to make
3911            sure the right things get saved on the stack.  Hence we don't
3912            share its code.  The only reason to push anything on the
3913            stack at all is that otherwise we would have to change
3914            `anychar's code to do something besides goto fail in this
3915            case; that seems worse than this.  */
3916         case on_failure_keep_string_jump:
3917           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3918 
3919           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3920           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3921 
3922           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3923           break;
3924 
3925 
3926 	/* Uses of on_failure_jump:
3927 
3928            Each alternative starts with an on_failure_jump that points
3929            to the beginning of the next alternative.  Each alternative
3930            except the last ends with a jump that in effect jumps past
3931            the rest of the alternatives.  (They really jump to the
3932            ending jump of the following alternative, because tensioning
3933            these jumps is a hassle.)
3934 
3935            Repeats start with an on_failure_jump that points past both
3936            the repetition text and either the following jump or
3937            pop_failure_jump back to this on_failure_jump.  */
3938 	case on_failure_jump:
3939         on_failure:
3940           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3941 
3942           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3943           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3944 
3945           /* If this on_failure_jump comes right before a group (i.e.,
3946              the original * applied to a group), save the information
3947              for that group and all inner ones, so that if we fail back
3948              to this point, the group's information will be correct.
3949              For example, in \(a*\)*\1, we need the preceding group,
3950              and in \(\(a*\)b*\)\2, we need the inner group.  */
3951 
3952           /* We can't use `p' to check ahead because we push
3953              a failure point to `p + mcnt' after we do this.  */
3954           p1 = p;
3955 
3956           /* We need to skip no_op's before we look for the
3957              start_memory in case this on_failure_jump is happening as
3958              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3959              against aba.  */
3960           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3961             p1++;
3962 
3963           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3964             {
3965               /* We have a new highest active register now.  This will
3966                  get reset at the start_memory we are about to get to,
3967                  but we will have saved all the registers relevant to
3968                  this repetition op, as described above.  */
3969               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3970               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3971                 lowest_active_reg = *(p1 + 1);
3972             }
3973 
3974           DEBUG_PRINT1 (":\n");
3975           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3976           break;
3977 
3978 
3979         /* A smart repeat ends with `maybe_pop_jump'.
3980 	   We change it to either `pop_failure_jump' or `jump'.  */
3981         case maybe_pop_jump:
3982           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3983           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3984           {
3985 	    register unsigned char *p2 = p;
3986 
3987             /* Compare the beginning of the repeat with what in the
3988                pattern follows its end. If we can establish that there
3989                is nothing that they would both match, i.e., that we
3990                would have to backtrack because of (as in, e.g., `a*a')
3991                then we can change to pop_failure_jump, because we'll
3992                never have to backtrack.
3993 
3994                This is not true in the case of alternatives: in
3995                `(a|ab)*' we do need to backtrack to the `ab' alternative
3996                (e.g., if the string was `ab').  But instead of trying to
3997                detect that here, the alternative has put on a dummy
3998                failure point which is what we will end up popping.  */
3999 
4000 	    /* Skip over open/close-group commands.  */
4001 	    while (p2 + 2 < pend
4002 		   && ((re_opcode_t) *p2 == stop_memory
4003 		       || (re_opcode_t) *p2 == start_memory))
4004 	      p2 += 3;			/* Skip over args, too.  */
4005 
4006             /* If we're at the end of the pattern, we can change.  */
4007             if (p2 == pend)
4008 	      {
4009 		/* Consider what happens when matching ":\(.*\)"
4010 		   against ":/".  I don't really understand this code
4011 		   yet.  */
4012   	        p[-3] = (unsigned char) pop_failure_jump;
4013                 DEBUG_PRINT1
4014                   ("  End of pattern: change to `pop_failure_jump'.\n");
4015               }
4016 
4017             else if ((re_opcode_t) *p2 == exactn
4018 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4019 	      {
4020 		register unsigned char c
4021                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4022 		p1 = p + mcnt;
4023 
4024                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4025                    to the `maybe_finalize_jump' of this case.  Examine what
4026                    follows.  */
4027                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4028                   {
4029   		    p[-3] = (unsigned char) pop_failure_jump;
4030                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4031                                   c, p1[5]);
4032                   }
4033 
4034 		else if ((re_opcode_t) p1[3] == charset
4035 			 || (re_opcode_t) p1[3] == charset_not)
4036 		  {
4037 		    int not = (re_opcode_t) p1[3] == charset_not;
4038 
4039 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4040 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4041 		      not = !not;
4042 
4043                     /* `not' is equal to 1 if c would match, which means
4044                         that we can't change to pop_failure_jump.  */
4045 		    if (!not)
4046                       {
4047   		        p[-3] = (unsigned char) pop_failure_jump;
4048                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4049                       }
4050 		  }
4051 	      }
4052 	  }
4053 	  p -= 2;		/* Point at relative address again.  */
4054 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4055 	    {
4056 	      p[-1] = (unsigned char) jump;
4057               DEBUG_PRINT1 ("  Match => jump.\n");
4058 	      goto unconditional_jump;
4059 	    }
4060         /* Note fall through.  */
4061 
4062 
4063 	/* The end of a simple repeat has a pop_failure_jump back to
4064            its matching on_failure_jump, where the latter will push a
4065            failure point.  The pop_failure_jump takes off failure
4066            points put on by this pop_failure_jump's matching
4067            on_failure_jump; we got through the pattern to here from the
4068            matching on_failure_jump, so didn't fail.  */
4069         case pop_failure_jump:
4070           {
4071             /* We need to pass separate storage for the lowest and
4072                highest registers, even though we don't care about the
4073                actual values.  Otherwise, we will restore only one
4074                register from the stack, since lowest will == highest in
4075                `pop_failure_point'.  */
4076             unsigned dummy_low_reg, dummy_high_reg;
4077             unsigned char *pdummy;
4078             const char *sdummy;
4079 
4080             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4081             POP_FAILURE_POINT (sdummy, pdummy,
4082                                dummy_low_reg, dummy_high_reg,
4083                                reg_dummy, reg_dummy, reg_info_dummy);
4084           }
4085           /* Note fall through.  */
4086 
4087 
4088         /* Unconditionally jump (without popping any failure points).  */
4089         case jump:
4090 	unconditional_jump:
4091 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4092           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4093 	  p += mcnt;				/* Do the jump.  */
4094           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4095 	  break;
4096 
4097 
4098         /* We need this opcode so we can detect where alternatives end
4099            in `group_match_null_string_p' et al.  */
4100         case jump_past_alt:
4101           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4102           goto unconditional_jump;
4103 
4104 
4105         /* Normally, the on_failure_jump pushes a failure point, which
4106            then gets popped at pop_failure_jump.  We will end up at
4107            pop_failure_jump, also, and with a pattern of, say, `a+', we
4108            are skipping over the on_failure_jump, so we have to push
4109            something meaningless for pop_failure_jump to pop.  */
4110         case dummy_failure_jump:
4111           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4112           /* It doesn't matter what we push for the string here.  What
4113              the code at `fail' tests is the value for the pattern.  */
4114           PUSH_FAILURE_POINT (0, 0, -2);
4115           goto unconditional_jump;
4116 
4117 
4118         /* At the end of an alternative, we need to push a dummy failure
4119            point in case we are followed by a `pop_failure_jump', because
4120            we don't want the failure point for the alternative to be
4121            popped.  For example, matching `(a|ab)*' against `aab'
4122            requires that we match the `ab' alternative.  */
4123         case push_dummy_failure:
4124           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4125           /* See comments just above at `dummy_failure_jump' about the
4126              two zeroes.  */
4127           PUSH_FAILURE_POINT (0, 0, -2);
4128           break;
4129 
4130         /* Have to succeed matching what follows at least n times.
4131            After that, handle like `on_failure_jump'.  */
4132         case succeed_n:
4133           EXTRACT_NUMBER (mcnt, p + 2);
4134           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4135 
4136           assert (mcnt >= 0);
4137           /* Originally, this is how many times we HAVE to succeed.  */
4138           if (mcnt > 0)
4139             {
4140                mcnt--;
4141 	       p += 2;
4142                STORE_NUMBER_AND_INCR (p, mcnt);
4143                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4144             }
4145 	  else if (mcnt == 0)
4146             {
4147               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4148 	      p[2] = (unsigned char) no_op;
4149               p[3] = (unsigned char) no_op;
4150               goto on_failure;
4151             }
4152           break;
4153 
4154         case jump_n:
4155           EXTRACT_NUMBER (mcnt, p + 2);
4156           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4157 
4158           /* Originally, this is how many times we CAN jump.  */
4159           if (mcnt)
4160             {
4161                mcnt--;
4162                STORE_NUMBER (p + 2, mcnt);
4163 	       goto unconditional_jump;
4164             }
4165           /* If don't have to jump any more, skip over the rest of command.  */
4166 	  else
4167 	    p += 4;
4168           break;
4169 
4170 	case set_number_at:
4171 	  {
4172             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4173 
4174             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4175             p1 = p + mcnt;
4176             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4177             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4178 	    STORE_NUMBER (p1, mcnt);
4179             break;
4180           }
4181 
4182         case wordbound:
4183           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4184           if (AT_WORD_BOUNDARY (d))
4185 	    break;
4186           goto fail;
4187 
4188 	case notwordbound:
4189           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4190 	  if (AT_WORD_BOUNDARY (d))
4191 	    goto fail;
4192           break;
4193 
4194 	case wordbeg:
4195           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4196 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4197 	    break;
4198           goto fail;
4199 
4200 	case wordend:
4201           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4202 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4203               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4204 	    break;
4205           goto fail;
4206 
4207 #ifdef emacs
4208 #ifdef emacs19
4209   	case before_dot:
4210           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4211  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4212   	    goto fail;
4213   	  break;
4214 
4215   	case at_dot:
4216           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4217  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4218   	    goto fail;
4219   	  break;
4220 
4221   	case after_dot:
4222           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4223           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4224   	    goto fail;
4225   	  break;
4226 #else /* not emacs19 */
4227 	case at_dot:
4228           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4229 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4230 	    goto fail;
4231 	  break;
4232 #endif /* not emacs19 */
4233 
4234 	case syntaxspec:
4235           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4236 	  mcnt = *p++;
4237 	  goto matchsyntax;
4238 
4239         case wordchar:
4240           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4241 	  mcnt = (int) Sword;
4242         matchsyntax:
4243 	  PREFETCH ();
4244 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4245             goto fail;
4246           SET_REGS_MATCHED ();
4247 	  break;
4248 
4249 	case notsyntaxspec:
4250           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4251 	  mcnt = *p++;
4252 	  goto matchnotsyntax;
4253 
4254         case notwordchar:
4255           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4256 	  mcnt = (int) Sword;
4257         matchnotsyntax:
4258 	  PREFETCH ();
4259 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4260             goto fail;
4261 	  SET_REGS_MATCHED ();
4262           break;
4263 
4264 #else /* not emacs */
4265 	case wordchar:
4266           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4267 	  PREFETCH ();
4268           if (!WORDCHAR_P (d))
4269             goto fail;
4270 	  SET_REGS_MATCHED ();
4271           d++;
4272 	  break;
4273 
4274 	case notwordchar:
4275           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4276 	  PREFETCH ();
4277 	  if (WORDCHAR_P (d))
4278             goto fail;
4279           SET_REGS_MATCHED ();
4280           d++;
4281 	  break;
4282 #endif /* not emacs */
4283 
4284         default:
4285           abort ();
4286 	}
4287       continue;  /* Successfully executed one pattern command; keep going.  */
4288 
4289 
4290     /* We goto here if a matching operation fails. */
4291     fail:
4292       if (!FAIL_STACK_EMPTY ())
4293 	{ /* A restart point is known.  Restore to that state.  */
4294           DEBUG_PRINT1 ("\nFAIL:\n");
4295           POP_FAILURE_POINT (d, p,
4296                              lowest_active_reg, highest_active_reg,
4297                              regstart, regend, reg_info);
4298 
4299           /* If this failure point is a dummy, try the next one.  */
4300           if (!p)
4301 	    goto fail;
4302 
4303           /* If we failed to the end of the pattern, don't examine *p.  */
4304 	  assert (p <= pend);
4305           if (p < pend)
4306             {
4307               boolean is_a_jump_n = false;
4308 
4309               /* If failed to a backwards jump that's part of a repetition
4310                  loop, need to pop this failure point and use the next one.  */
4311               switch ((re_opcode_t) *p)
4312                 {
4313                 case jump_n:
4314                   is_a_jump_n = true;
4315                 case maybe_pop_jump:
4316                 case pop_failure_jump:
4317                 case jump:
4318                   p1 = p + 1;
4319                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4320                   p1 += mcnt;
4321 
4322                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4323                       || (!is_a_jump_n
4324                           && (re_opcode_t) *p1 == on_failure_jump))
4325                     goto fail;
4326                   break;
4327                 default:
4328                   /* do nothing */ ;
4329                 }
4330             }
4331 
4332           if (d >= string1 && d <= end1)
4333 	    dend = end_match_1;
4334         }
4335       else
4336         break;   /* Matching at this starting point really fails.  */
4337     } /* for (;;) */
4338 
4339   if (best_regs_set)
4340     goto restore_best_regs;
4341 
4342   FREE_VARIABLES ();
4343 
4344   return -1;         			/* Failure to match.  */
4345 } /* re_match_2 */
4346 
4347 /* Subroutine definitions for re_match_2.  */
4348 
4349 
4350 /* We are passed P pointing to a register number after a start_memory.
4351 
4352    Return true if the pattern up to the corresponding stop_memory can
4353    match the empty string, and false otherwise.
4354 
4355    If we find the matching stop_memory, sets P to point to one past its number.
4356    Otherwise, sets P to an undefined byte less than or equal to END.
4357 
4358    We don't handle duplicates properly (yet).  */
4359 
4360 static boolean
4361 group_match_null_string_p (p, end, reg_info)
4362     unsigned char **p, *end;
4363     register_info_type *reg_info;
4364 {
4365   int mcnt;
4366   /* Point to after the args to the start_memory.  */
4367   unsigned char *p1 = *p + 2;
4368 
4369   while (p1 < end)
4370     {
4371       /* Skip over opcodes that can match nothing, and return true or
4372 	 false, as appropriate, when we get to one that can't, or to the
4373          matching stop_memory.  */
4374 
4375       switch ((re_opcode_t) *p1)
4376         {
4377         /* Could be either a loop or a series of alternatives.  */
4378         case on_failure_jump:
4379           p1++;
4380           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4381 
4382           /* If the next operation is not a jump backwards in the
4383 	     pattern.  */
4384 
4385 	  if (mcnt >= 0)
4386 	    {
4387               /* Go through the on_failure_jumps of the alternatives,
4388                  seeing if any of the alternatives cannot match nothing.
4389                  The last alternative starts with only a jump,
4390                  whereas the rest start with on_failure_jump and end
4391                  with a jump, e.g., here is the pattern for `a|b|c':
4392 
4393                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4394                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4395                  /exactn/1/c
4396 
4397                  So, we have to first go through the first (n-1)
4398                  alternatives and then deal with the last one separately.  */
4399 
4400 
4401               /* Deal with the first (n-1) alternatives, which start
4402                  with an on_failure_jump (see above) that jumps to right
4403                  past a jump_past_alt.  */
4404 
4405               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4406                 {
4407                   /* `mcnt' holds how many bytes long the alternative
4408                      is, including the ending `jump_past_alt' and
4409                      its number.  */
4410 
4411                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4412 				                      reg_info))
4413                     return false;
4414 
4415                   /* Move to right after this alternative, including the
4416 		     jump_past_alt.  */
4417                   p1 += mcnt;
4418 
4419                   /* Break if it's the beginning of an n-th alternative
4420                      that doesn't begin with an on_failure_jump.  */
4421                   if ((re_opcode_t) *p1 != on_failure_jump)
4422                     break;
4423 
4424 		  /* Still have to check that it's not an n-th
4425 		     alternative that starts with an on_failure_jump.  */
4426 		  p1++;
4427                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4428                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4429                     {
4430 		      /* Get to the beginning of the n-th alternative.  */
4431                       p1 -= 3;
4432                       break;
4433                     }
4434                 }
4435 
4436               /* Deal with the last alternative: go back and get number
4437                  of the `jump_past_alt' just before it.  `mcnt' contains
4438                  the length of the alternative.  */
4439               EXTRACT_NUMBER (mcnt, p1 - 2);
4440 
4441               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4442                 return false;
4443 
4444               p1 += mcnt;	/* Get past the n-th alternative.  */
4445             } /* if mcnt > 0 */
4446           break;
4447 
4448 
4449         case stop_memory:
4450 	  assert (p1[1] == **p);
4451           *p = p1 + 2;
4452           return true;
4453 
4454 
4455         default:
4456           if (!common_op_match_null_string_p (&p1, end, reg_info))
4457             return false;
4458         }
4459     } /* while p1 < end */
4460 
4461   return false;
4462 } /* group_match_null_string_p */
4463 
4464 
4465 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4466    It expects P to be the first byte of a single alternative and END one
4467    byte past the last. The alternative can contain groups.  */
4468 
4469 static boolean
4470 alt_match_null_string_p (p, end, reg_info)
4471     unsigned char *p, *end;
4472     register_info_type *reg_info;
4473 {
4474   int mcnt;
4475   unsigned char *p1 = p;
4476 
4477   while (p1 < end)
4478     {
4479       /* Skip over opcodes that can match nothing, and break when we get
4480          to one that can't.  */
4481 
4482       switch ((re_opcode_t) *p1)
4483         {
4484 	/* It's a loop.  */
4485         case on_failure_jump:
4486           p1++;
4487           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4488           p1 += mcnt;
4489           break;
4490 
4491 	default:
4492           if (!common_op_match_null_string_p (&p1, end, reg_info))
4493             return false;
4494         }
4495     }  /* while p1 < end */
4496 
4497   return true;
4498 } /* alt_match_null_string_p */
4499 
4500 
4501 /* Deals with the ops common to group_match_null_string_p and
4502    alt_match_null_string_p.
4503 
4504    Sets P to one after the op and its arguments, if any.  */
4505 
4506 static boolean
4507 common_op_match_null_string_p (p, end, reg_info)
4508     unsigned char **p, *end;
4509     register_info_type *reg_info;
4510 {
4511   int mcnt;
4512   boolean ret;
4513   int reg_no;
4514   unsigned char *p1 = *p;
4515 
4516   switch ((re_opcode_t) *p1++)
4517     {
4518     case no_op:
4519     case begline:
4520     case endline:
4521     case begbuf:
4522     case endbuf:
4523     case wordbeg:
4524     case wordend:
4525     case wordbound:
4526     case notwordbound:
4527 #ifdef emacs
4528     case before_dot:
4529     case at_dot:
4530     case after_dot:
4531 #endif
4532       break;
4533 
4534     case start_memory:
4535       reg_no = *p1;
4536       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4537       ret = group_match_null_string_p (&p1, end, reg_info);
4538 
4539       /* Have to set this here in case we're checking a group which
4540          contains a group and a back reference to it.  */
4541 
4542       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4543         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4544 
4545       if (!ret)
4546         return false;
4547       break;
4548 
4549     /* If this is an optimized succeed_n for zero times, make the jump.  */
4550     case jump:
4551       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4552       if (mcnt >= 0)
4553         p1 += mcnt;
4554       else
4555         return false;
4556       break;
4557 
4558     case succeed_n:
4559       /* Get to the number of times to succeed.  */
4560       p1 += 2;
4561       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4562 
4563       if (mcnt == 0)
4564         {
4565           p1 -= 4;
4566           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4567           p1 += mcnt;
4568         }
4569       else
4570         return false;
4571       break;
4572 
4573     case duplicate:
4574       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4575         return false;
4576       break;
4577 
4578     case set_number_at:
4579       p1 += 4;
4580 
4581     default:
4582       /* All other opcodes mean we cannot match the empty string.  */
4583       return false;
4584   }
4585 
4586   *p = p1;
4587   return true;
4588 } /* common_op_match_null_string_p */
4589 
4590 
4591 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4592    bytes; nonzero otherwise.  */
4593 
4594 static int
4595 bcmp_translate (s1, s2, len, translate)
4596      unsigned char *s1, *s2;
4597      register int len;
4598      char *translate;
4599 {
4600   register unsigned char *p1 = s1, *p2 = s2;
4601   while (len)
4602     {
4603       if (translate[*p1++] != translate[*p2++]) return 1;
4604       len--;
4605     }
4606   return 0;
4607 }
4608 
4609 /* Entry points for GNU code.  */
4610 
4611 /* re_compile_pattern is the GNU regular expression compiler: it
4612    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4613    Returns 0 if the pattern was valid, otherwise an error string.
4614 
4615    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4616    are set in BUFP on entry.
4617 
4618    We call regex_compile to do the actual compilation.  */
4619 
4620 const char *
4621 re_compile_pattern (pattern, length, bufp)
4622      const char *pattern;
4623      int length;
4624      struct re_pattern_buffer *bufp;
4625 {
4626   reg_errcode_t ret;
4627 
4628   /* GNU code is written to assume at least RE_NREGS registers will be set
4629      (and at least one extra will be -1).  */
4630   bufp->regs_allocated = REGS_UNALLOCATED;
4631 
4632   /* And GNU code determines whether or not to get register information
4633      by passing null for the REGS argument to re_match, etc., not by
4634      setting no_sub.  */
4635   bufp->no_sub = 0;
4636 
4637   /* Match anchors at newline.  */
4638   bufp->newline_anchor = 1;
4639 
4640   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4641 
4642   return re_error_msg[(int) ret];
4643 }
4644 
4645 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4646    them if this is an Emacs or POSIX compilation.  */
4647 
4648 #if !defined (emacs)
4649 
4650 /* BSD has one and only one pattern buffer.  */
4651 static struct re_pattern_buffer re_comp_buf;
4652 
4653 char *
4654 re_comp (s)
4655     const char *s;
4656 {
4657   reg_errcode_t ret;
4658 
4659   if (!s)
4660     {
4661       if (!re_comp_buf.buffer)
4662 	return "No previous regular expression";
4663       return 0;
4664     }
4665 
4666   if (!re_comp_buf.buffer)
4667     {
4668       re_comp_buf.buffer = (unsigned char *) malloc (200);
4669       if (re_comp_buf.buffer == NULL)
4670         return "Memory exhausted";
4671       re_comp_buf.allocated = 200;
4672 
4673       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4674       if (re_comp_buf.fastmap == NULL)
4675 	return "Memory exhausted";
4676     }
4677 
4678   /* Since `re_exec' always passes NULL for the `regs' argument, we
4679      don't need to initialize the pattern buffer fields which affect it.  */
4680 
4681   /* Match anchors at newlines.  */
4682   re_comp_buf.newline_anchor = 1;
4683 
4684   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4685 
4686   /* Yes, we're discarding `const' here.  */
4687   return (char *) re_error_msg[(int) ret];
4688 }
4689 
4690 
4691 int
4692 re_exec (s)
4693     const char *s;
4694 {
4695   const int len = strlen (s);
4696   return
4697     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4698 }
4699 #endif /* not emacs and not _POSIX_SOURCE */
4700 
4701 /* POSIX.2 functions.  Don't define these for Emacs.  */
4702 
4703 #ifndef emacs
4704 
4705 /* regcomp takes a regular expression as a string and compiles it.
4706 
4707    PREG is a regex_t *.  We do not expect any fields to be initialized,
4708    since POSIX says we shouldn't.  Thus, we set
4709 
4710      `buffer' to the compiled pattern;
4711      `used' to the length of the compiled pattern;
4712      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4713        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4714        RE_SYNTAX_POSIX_BASIC;
4715      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4716      `fastmap' and `fastmap_accurate' to zero;
4717      `re_nsub' to the number of subexpressions in PATTERN.
4718 
4719    PATTERN is the address of the pattern string.
4720 
4721    CFLAGS is a series of bits which affect compilation.
4722 
4723      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4724      use POSIX basic syntax.
4725 
4726      If REG_NEWLINE is set, then . and [^...] don't match newline.
4727      Also, regexec will try a match beginning after every newline.
4728 
4729      If REG_ICASE is set, then we considers upper- and lowercase
4730      versions of letters to be equivalent when matching.
4731 
4732      If REG_NOSUB is set, then when PREG is passed to regexec, that
4733      routine will report only success or failure, and nothing about the
4734      registers.
4735 
4736    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4737    the return codes and their meanings.)  */
4738 
4739 int
4740 regcomp (preg, pattern, cflags)
4741     regex_t *preg;
4742     const char *pattern;
4743     int cflags;
4744 {
4745   reg_errcode_t ret;
4746   unsigned syntax
4747     = (cflags & REG_EXTENDED) ?
4748       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4749 
4750   /* regex_compile will allocate the space for the compiled pattern.  */
4751   preg->buffer = 0;
4752   preg->allocated = 0;
4753 
4754   /* Don't bother to use a fastmap when searching.  This simplifies the
4755      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4756      characters after newlines into the fastmap.  This way, we just try
4757      every character.  */
4758   preg->fastmap = 0;
4759 
4760   if (cflags & REG_ICASE)
4761     {
4762       unsigned i;
4763 
4764       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4765       if (preg->translate == NULL)
4766         return (int) REG_ESPACE;
4767 
4768       /* Map uppercase characters to corresponding lowercase ones.  */
4769       for (i = 0; i < CHAR_SET_SIZE; i++)
4770         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4771     }
4772   else
4773     preg->translate = NULL;
4774 
4775   /* If REG_NEWLINE is set, newlines are treated differently.  */
4776   if (cflags & REG_NEWLINE)
4777     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4778       syntax &= ~RE_DOT_NEWLINE;
4779       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4780       /* It also changes the matching behavior.  */
4781       preg->newline_anchor = 1;
4782     }
4783   else
4784     preg->newline_anchor = 0;
4785 
4786   preg->no_sub = !!(cflags & REG_NOSUB);
4787 
4788   /* POSIX says a null character in the pattern terminates it, so we
4789      can use strlen here in compiling the pattern.  */
4790   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4791 
4792   /* POSIX doesn't distinguish between an unmatched open-group and an
4793      unmatched close-group: both are REG_EPAREN.  */
4794   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4795 
4796   return (int) ret;
4797 }
4798 
4799 
4800 /* regexec searches for a given pattern, specified by PREG, in the
4801    string STRING.
4802 
4803    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4804    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4805    least NMATCH elements, and we set them to the offsets of the
4806    corresponding matched substrings.
4807 
4808    EFLAGS specifies `execution flags' which affect matching: if
4809    REG_NOTBOL is set, then ^ does not match at the beginning of the
4810    string; if REG_NOTEOL is set, then $ does not match at the end.
4811 
4812    We return 0 if we find a match and REG_NOMATCH if not.  */
4813 
4814 int
4815 regexec (preg, string, nmatch, pmatch, eflags)
4816     const regex_t *preg;
4817     const char *string;
4818     size_t nmatch;
4819     regmatch_t pmatch[];
4820     int eflags;
4821 {
4822   int ret;
4823   struct re_registers regs;
4824   regex_t private_preg;
4825   int len = strlen (string);
4826   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4827 
4828   private_preg = *preg;
4829 
4830   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4831   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4832 
4833   /* The user has told us exactly how many registers to return
4834      information about, via `nmatch'.  We have to pass that on to the
4835      matching routines.  */
4836   private_preg.regs_allocated = REGS_FIXED;
4837 
4838   if (want_reg_info)
4839     {
4840       regs.num_regs = nmatch;
4841       regs.start = TALLOC (nmatch, regoff_t);
4842       regs.end = TALLOC (nmatch, regoff_t);
4843       if (regs.start == NULL || regs.end == NULL)
4844         return (int) REG_NOMATCH;
4845     }
4846 
4847   /* Perform the searching operation.  */
4848   ret = re_search (&private_preg, string, len,
4849                    /* start: */ 0, /* range: */ len,
4850                    want_reg_info ? &regs : (struct re_registers *) 0);
4851 
4852   /* Copy the register information to the POSIX structure.  */
4853   if (want_reg_info)
4854     {
4855       if (ret >= 0)
4856         {
4857           unsigned r;
4858 
4859           for (r = 0; r < nmatch; r++)
4860             {
4861               pmatch[r].rm_so = regs.start[r];
4862               pmatch[r].rm_eo = regs.end[r];
4863             }
4864         }
4865 
4866       /* If we needed the temporary register info, free the space now.  */
4867       free (regs.start);
4868       free (regs.end);
4869     }
4870 
4871   /* We want zero return to mean success, unlike `re_search'.  */
4872   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4873 }
4874 
4875 
4876 /* Returns a message corresponding to an error code, ERRCODE, returned
4877    from either regcomp or regexec.   We don't use PREG here.  */
4878 
4879 size_t
4880 regerror (errcode, preg, errbuf, errbuf_size)
4881     int errcode;
4882     const regex_t *preg;
4883     char *errbuf;
4884     size_t errbuf_size;
4885 {
4886   const char *msg;
4887   size_t msg_size;
4888 
4889   if (errcode < 0
4890       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4891     /* Only error codes returned by the rest of the code should be passed
4892        to this routine.  If we are given anything else, or if other regex
4893        code generates an invalid error code, then the program has a bug.
4894        Dump core so we can fix it.  */
4895     abort ();
4896 
4897   msg = re_error_msg[errcode];
4898 
4899   /* POSIX doesn't require that we do anything in this case, but why
4900      not be nice.  */
4901   if (! msg)
4902     msg = "Success";
4903 
4904   msg_size = strlen (msg) + 1; /* Includes the null.  */
4905 
4906   if (errbuf_size != 0)
4907     {
4908       if (msg_size > errbuf_size)
4909         {
4910           strncpy (errbuf, msg, errbuf_size - 1);
4911           errbuf[errbuf_size - 1] = 0;
4912         }
4913       else
4914         strcpy (errbuf, msg);
4915     }
4916 
4917   return msg_size;
4918 }
4919 
4920 
4921 /* Free dynamically allocated space used by PREG.  */
4922 
4923 void
4924 regfree (preg)
4925     regex_t *preg;
4926 {
4927   if (preg->buffer != NULL)
4928     free (preg->buffer);
4929   preg->buffer = NULL;
4930 
4931   preg->allocated = 0;
4932   preg->used = 0;
4933 
4934   if (preg->fastmap != NULL)
4935     free (preg->fastmap);
4936   preg->fastmap = NULL;
4937   preg->fastmap_accurate = 0;
4938 
4939   if (preg->translate != NULL)
4940     free (preg->translate);
4941   preg->translate = NULL;
4942 }
4943 
4944 #endif /* not emacs  */
4945 
4946 /*
4947 Local variables:
4948 make-backup-files: t
4949 version-control: t
4950 trim-versions-without-asking: nil
4951 End:
4952 */
4953