xref: /openbsd-src/gnu/usr.bin/perl/regcomp.c (revision 8500990981f885cbe5e6a4958549cacc238b5ae6)
1 /*    regcomp.c
2  */
3 
4 /*
5  * "A fair jaw-cracker dwarf-language must be."  --Samwise Gamgee
6  */
7 
8 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
9  * confused with the original package (see point 3 below).  Thanks, Henry!
10  */
11 
12 /* Additional note: this code is very heavily munged from Henry's version
13  * in places.  In some spots I've traded clarity for efficiency, so don't
14  * blame Henry for some of the lack of readability.
15  */
16 
17 /* The names of the functions have been changed from regcomp and
18  * regexec to  pregcomp and pregexec in order to avoid conflicts
19  * with the POSIX routines of the same names.
20 */
21 
22 #ifdef PERL_EXT_RE_BUILD
23 /* need to replace pregcomp et al, so enable that */
24 #  ifndef PERL_IN_XSUB_RE
25 #    define PERL_IN_XSUB_RE
26 #  endif
27 /* need access to debugger hooks */
28 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
29 #    define DEBUGGING
30 #  endif
31 #endif
32 
33 #ifdef PERL_IN_XSUB_RE
34 /* We *really* need to overwrite these symbols: */
35 #  define Perl_pregcomp my_regcomp
36 #  define Perl_regdump my_regdump
37 #  define Perl_regprop my_regprop
38 #  define Perl_pregfree my_regfree
39 #  define Perl_re_intuit_string my_re_intuit_string
40 /* *These* symbols are masked to allow static link. */
41 #  define Perl_regnext my_regnext
42 #  define Perl_save_re_context my_save_re_context
43 #  define Perl_reginitcolors my_reginitcolors
44 
45 #  define PERL_NO_GET_CONTEXT
46 #endif
47 
48 /*SUPPRESS 112*/
49 /*
50  * pregcomp and pregexec -- regsub and regerror are not used in perl
51  *
52  *	Copyright (c) 1986 by University of Toronto.
53  *	Written by Henry Spencer.  Not derived from licensed software.
54  *
55  *	Permission is granted to anyone to use this software for any
56  *	purpose on any computer system, and to redistribute it freely,
57  *	subject to the following restrictions:
58  *
59  *	1. The author is not responsible for the consequences of use of
60  *		this software, no matter how awful, even if they arise
61  *		from defects in it.
62  *
63  *	2. The origin of this software must not be misrepresented, either
64  *		by explicit claim or by omission.
65  *
66  *	3. Altered versions must be plainly marked as such, and must not
67  *		be misrepresented as being the original software.
68  *
69  *
70  ****    Alterations to Henry's code are...
71  ****
72  ****    Copyright (c) 1991-2002, Larry Wall
73  ****
74  ****    You may distribute under the terms of either the GNU General Public
75  ****    License or the Artistic License, as specified in the README file.
76 
77  *
78  * Beware that some of this code is subtly aware of the way operator
79  * precedence is structured in regular expressions.  Serious changes in
80  * regular-expression syntax might require a total rethink.
81  */
82 #include "EXTERN.h"
83 #define PERL_IN_REGCOMP_C
84 #include "perl.h"
85 
86 #ifndef PERL_IN_XSUB_RE
87 #  include "INTERN.h"
88 #endif
89 
90 #define REG_COMP_C
91 #include "regcomp.h"
92 
93 #ifdef op
94 #undef op
95 #endif /* op */
96 
97 #ifdef MSDOS
98 # if defined(BUGGY_MSC6)
99  /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
100  # pragma optimize("a",off)
101  /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
102  # pragma optimize("w",on )
103 # endif /* BUGGY_MSC6 */
104 #endif /* MSDOS */
105 
106 #ifndef STATIC
107 #define	STATIC	static
108 #endif
109 
110 typedef struct RExC_state_t {
111     U32		flags;			/* are we folding, multilining? */
112     char	*precomp;		/* uncompiled string. */
113     regexp	*rx;
114     char	*start;			/* Start of input for compile */
115     char	*end;			/* End of input for compile */
116     char	*parse;			/* Input-scan pointer. */
117     I32		whilem_seen;		/* number of WHILEM in this expr */
118     regnode	*emit_start;		/* Start of emitted-code area */
119     regnode	*emit;			/* Code-emit pointer; &regdummy = don't = compiling */
120     I32		naughty;		/* How bad is this pattern? */
121     I32		sawback;		/* Did we see \1, ...? */
122     U32		seen;
123     I32		size;			/* Code size. */
124     I32		npar;			/* () count. */
125     I32		extralen;
126     I32		seen_zerolen;
127     I32		seen_evals;
128     I32		utf8;
129 #if ADD_TO_REGEXEC
130     char 	*starttry;		/* -Dr: where regtry was called. */
131 #define RExC_starttry	(pRExC_state->starttry)
132 #endif
133 } RExC_state_t;
134 
135 #define RExC_flags	(pRExC_state->flags)
136 #define RExC_precomp	(pRExC_state->precomp)
137 #define RExC_rx		(pRExC_state->rx)
138 #define RExC_start	(pRExC_state->start)
139 #define RExC_end	(pRExC_state->end)
140 #define RExC_parse	(pRExC_state->parse)
141 #define RExC_whilem_seen	(pRExC_state->whilem_seen)
142 #define RExC_offsets	(pRExC_state->rx->offsets) /* I am not like the others */
143 #define RExC_emit	(pRExC_state->emit)
144 #define RExC_emit_start	(pRExC_state->emit_start)
145 #define RExC_naughty	(pRExC_state->naughty)
146 #define RExC_sawback	(pRExC_state->sawback)
147 #define RExC_seen	(pRExC_state->seen)
148 #define RExC_size	(pRExC_state->size)
149 #define RExC_npar	(pRExC_state->npar)
150 #define RExC_extralen	(pRExC_state->extralen)
151 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
152 #define RExC_seen_evals	(pRExC_state->seen_evals)
153 #define RExC_utf8	(pRExC_state->utf8)
154 
155 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
156 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
157 	((*s) == '{' && regcurly(s)))
158 
159 #ifdef SPSTART
160 #undef SPSTART		/* dratted cpp namespace... */
161 #endif
162 /*
163  * Flags to be passed up and down.
164  */
165 #define	WORST		0	/* Worst case. */
166 #define	HASWIDTH	0x1	/* Known to match non-null strings. */
167 #define	SIMPLE		0x2	/* Simple enough to be STAR/PLUS operand. */
168 #define	SPSTART		0x4	/* Starts with * or +. */
169 #define TRYAGAIN	0x8	/* Weeded out a declaration. */
170 
171 /* Length of a variant. */
172 
173 typedef struct scan_data_t {
174     I32 len_min;
175     I32 len_delta;
176     I32 pos_min;
177     I32 pos_delta;
178     SV *last_found;
179     I32 last_end;			/* min value, <0 unless valid. */
180     I32 last_start_min;
181     I32 last_start_max;
182     SV **longest;			/* Either &l_fixed, or &l_float. */
183     SV *longest_fixed;
184     I32 offset_fixed;
185     SV *longest_float;
186     I32 offset_float_min;
187     I32 offset_float_max;
188     I32 flags;
189     I32 whilem_c;
190     I32 *last_closep;
191     struct regnode_charclass_class *start_class;
192 } scan_data_t;
193 
194 /*
195  * Forward declarations for pregcomp()'s friends.
196  */
197 
198 static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 				      0, 0, 0, 0, 0, 0};
200 
201 #define SF_BEFORE_EOL		(SF_BEFORE_SEOL|SF_BEFORE_MEOL)
202 #define SF_BEFORE_SEOL		0x1
203 #define SF_BEFORE_MEOL		0x2
204 #define SF_FIX_BEFORE_EOL	(SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
205 #define SF_FL_BEFORE_EOL	(SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
206 
207 #ifdef NO_UNARY_PLUS
208 #  define SF_FIX_SHIFT_EOL	(0+2)
209 #  define SF_FL_SHIFT_EOL		(0+4)
210 #else
211 #  define SF_FIX_SHIFT_EOL	(+2)
212 #  define SF_FL_SHIFT_EOL		(+4)
213 #endif
214 
215 #define SF_FIX_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
216 #define SF_FIX_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
217 
218 #define SF_FL_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
219 #define SF_FL_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
220 #define SF_IS_INF		0x40
221 #define SF_HAS_PAR		0x80
222 #define SF_IN_PAR		0x100
223 #define SF_HAS_EVAL		0x200
224 #define SCF_DO_SUBSTR		0x400
225 #define SCF_DO_STCLASS_AND	0x0800
226 #define SCF_DO_STCLASS_OR	0x1000
227 #define SCF_DO_STCLASS		(SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
228 #define SCF_WHILEM_VISITED_POS	0x2000
229 
230 #define UTF (RExC_utf8 != 0)
231 #define LOC ((RExC_flags & PMf_LOCALE) != 0)
232 #define FOLD ((RExC_flags & PMf_FOLD) != 0)
233 
234 #define OOB_UNICODE		12345678
235 #define OOB_NAMEDCLASS		-1
236 
237 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
238 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
239 
240 
241 /* length of regex to show in messages that don't mark a position within */
242 #define RegexLengthToShowInErrorMessages 127
243 
244 /*
245  * If MARKER[12] are adjusted, be sure to adjust the constants at the top
246  * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
247  * op/pragma/warn/regcomp.
248  */
249 #define MARKER1 "<-- HERE"    /* marker as it appears in the description */
250 #define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
251 
252 #define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
253 
254 /*
255  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
256  * arg. Show regex, up to a maximum length. If it's too long, chop and add
257  * "...".
258  */
259 #define	FAIL(msg)                                                             \
260     STMT_START {                                                             \
261         char *ellipses = "";                                                 \
262         IV len = RExC_end - RExC_precomp;                                \
263                                                                              \
264 	if (!SIZE_ONLY)                                                      \
265 	    SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                 \
266                                                                              \
267 	if (len > RegexLengthToShowInErrorMessages) {                        \
268             /* chop 10 shorter than the max, to ensure meaning of "..." */   \
269 	    len = RegexLengthToShowInErrorMessages - 10;                     \
270 	    ellipses = "...";                                                \
271 	}                                                                    \
272 	Perl_croak(aTHX_ "%s in regex m/%.*s%s/",                            \
273 		   msg, (int)len, RExC_precomp, ellipses);                  \
274     } STMT_END
275 
276 /*
277  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
278  * args. Show regex, up to a maximum length. If it's too long, chop and add
279  * "...".
280  */
281 #define	FAIL2(pat,msg)                                                        \
282     STMT_START {                                                             \
283         char *ellipses = "";                                                 \
284         IV len = RExC_end - RExC_precomp;                                \
285                                                                              \
286 	if (!SIZE_ONLY)                                                      \
287 	    SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                 \
288                                                                              \
289 	if (len > RegexLengthToShowInErrorMessages) {                        \
290             /* chop 10 shorter than the max, to ensure meaning of "..." */   \
291 	    len = RegexLengthToShowInErrorMessages - 10;                     \
292 	    ellipses = "...";                                                \
293 	}                                                                    \
294 	S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/",                        \
295 		    msg, (int)len, RExC_precomp, ellipses);                \
296     } STMT_END
297 
298 
299 /*
300  * Simple_vFAIL -- like FAIL, but marks the current location in the scan
301  */
302 #define	Simple_vFAIL(m)                                                      \
303     STMT_START {                                                             \
304       IV offset = RExC_parse - RExC_precomp; \
305                                                                              \
306       Perl_croak(aTHX_ "%s" REPORT_LOCATION,               \
307 		 m, (int)offset, RExC_precomp, RExC_precomp + offset);     \
308     } STMT_END
309 
310 /*
311  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
312  */
313 #define	vFAIL(m)                                                             \
314     STMT_START {                                                             \
315       if (!SIZE_ONLY)                                                        \
316 	    SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                 \
317       Simple_vFAIL(m);                                                       \
318     } STMT_END
319 
320 /*
321  * Like Simple_vFAIL(), but accepts two arguments.
322  */
323 #define	Simple_vFAIL2(m,a1)                                                  \
324     STMT_START {                                                             \
325       IV offset = RExC_parse - RExC_precomp; \
326                                                                              \
327       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1,       \
328 		  (int)offset, RExC_precomp, RExC_precomp + offset);       \
329     } STMT_END
330 
331 /*
332  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
333  */
334 #define	vFAIL2(m,a1)                                                         \
335     STMT_START {                                                             \
336       if (!SIZE_ONLY)                                                        \
337 	    SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                 \
338       Simple_vFAIL2(m, a1);                                                  \
339     } STMT_END
340 
341 
342 /*
343  * Like Simple_vFAIL(), but accepts three arguments.
344  */
345 #define	Simple_vFAIL3(m, a1, a2)                                             \
346     STMT_START {                                                             \
347       IV offset = RExC_parse - RExC_precomp; \
348                                                                              \
349       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2,   \
350 		  (int)offset, RExC_precomp, RExC_precomp + offset);       \
351     } STMT_END
352 
353 /*
354  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
355  */
356 #define	vFAIL3(m,a1,a2)                                                      \
357     STMT_START {                                                             \
358       if (!SIZE_ONLY)                                                        \
359 	    SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);                 \
360       Simple_vFAIL3(m, a1, a2);                                              \
361     } STMT_END
362 
363 /*
364  * Like Simple_vFAIL(), but accepts four arguments.
365  */
366 #define	Simple_vFAIL4(m, a1, a2, a3)                                         \
367     STMT_START {                                                             \
368       IV offset = RExC_parse - RExC_precomp; \
369                                                                              \
370       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\
371 		  (int)offset, RExC_precomp, RExC_precomp + offset);       \
372     } STMT_END
373 
374 /*
375  * Like Simple_vFAIL(), but accepts five arguments.
376  */
377 #define	Simple_vFAIL5(m, a1, a2, a3, a4)                                     \
378     STMT_START {                                                             \
379       IV offset = RExC_parse - RExC_precomp; \
380       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\
381 		  (int)offset, RExC_precomp, RExC_precomp + offset);       \
382     } STMT_END
383 
384 
385 #define	vWARN(loc,m)                                                         \
386     STMT_START {                                                             \
387         IV offset = loc - RExC_precomp;          \
388 	Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,\
389 		 m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
390     } STMT_END                                                               \
391 
392 #define	vWARNdep(loc,m)                                                         \
393     STMT_START {                                                             \
394         IV offset = loc - RExC_precomp;          \
395 	Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), "%s" REPORT_LOCATION,\
396 		 m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
397     } STMT_END                                                               \
398 
399 
400 #define	vWARN2(loc, m, a1)                                                   \
401     STMT_START {                                                             \
402         IV offset = loc - RExC_precomp;          \
403 	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,\
404                  a1,                                                         \
405 		 (int)offset, RExC_precomp, RExC_precomp + offset);        \
406     } STMT_END
407 
408 #define	vWARN3(loc, m, a1, a2)                                               \
409     STMT_START {                                                             \
410       IV offset = loc - RExC_precomp;        \
411 	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                    \
412                  a1, a2,                                                     \
413 		 (int)offset, RExC_precomp, RExC_precomp + offset);        \
414     } STMT_END
415 
416 #define	vWARN4(loc, m, a1, a2, a3)                                           \
417     STMT_START {                                                             \
418       IV offset = loc - RExC_precomp;            \
419 	Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,\
420                  a1, a2, a3,                                                 \
421 		 (int)offset, RExC_precomp, RExC_precomp + offset);        \
422     } STMT_END
423 
424 /* used for the parse_flags section for (?c) -- japhy */
425 #define	vWARN5(loc, m, a1, a2, a3, a4)                                       \
426   STMT_START {                                                   \
427       IV offset = loc - RExC_precomp;   \
428         Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,      \
429                  a1, a2, a3, a4,                                 \
430                  (int)offset, RExC_precomp, RExC_precomp + offset);  \
431     } STMT_END
432 
433 
434 /* Allow for side effects in s */
435 #define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (void)(s);} STMT_END
436 
437 /* Macros for recording node offsets.   20001227 mjd@plover.com
438  * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
439  * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
440  * Element 0 holds the number n.
441  */
442 
443 #define MJD_OFFSET_DEBUG(x)
444 /* #define MJD_OFFSET_DEBUG(x) fprintf x */
445 
446 
447 #  define Set_Node_Offset_To_R(node,byte)                           \
448    STMT_START {                                        \
449      if (! SIZE_ONLY) {                                  \
450        if((node) < 0) {                   \
451          Perl_croak(aTHX_ "value of node is %d in Offset macro", node); \
452        } else {                                                        \
453          RExC_offsets[2*(node)-1] = (byte);                               \
454        }                                                               \
455      }                                                                 \
456    } STMT_END
457 
458 #  define Set_Node_Offset(node,byte) Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
459 #  define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
460 
461 #  define Set_Node_Length_To_R(node,len)                            \
462    STMT_START {                                        \
463      if (! SIZE_ONLY) {                                  \
464        MJD_OFFSET_DEBUG((stderr, "** (%d) size of node %d is %d.\n", __LINE__, (node), (len))); \
465        if((node) < 0) {                   \
466          Perl_croak(aTHX_ "value of node is %d in Length macro", node); \
467        } else {                                                        \
468          RExC_offsets[2*(node)] = (len);                               \
469        }                                                               \
470      }                                                                 \
471    } STMT_END
472 
473 #  define Set_Node_Length(node,len) Set_Node_Length_To_R((node)-RExC_emit_start, len)
474 #  define Set_Cur_Node_Length(len)  Set_Node_Length(RExC_emit, len)
475 #  define Set_Node_Cur_Length(node)   Set_Node_Length(node, RExC_parse - parse_start)
476 
477 /* Get offsets and lengths */
478 #define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
479 #define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
480 
481 static void clear_re(pTHX_ void *r);
482 
483 /* Mark that we cannot extend a found fixed substring at this point.
484    Updata the longest found anchored substring and the longest found
485    floating substrings if needed. */
486 
487 STATIC void
488 S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
489 {
490     STRLEN l = CHR_SVLEN(data->last_found);
491     STRLEN old_l = CHR_SVLEN(*data->longest);
492 
493     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
494 	sv_setsv(*data->longest, data->last_found);
495 	if (*data->longest == data->longest_fixed) {
496 	    data->offset_fixed = l ? data->last_start_min : data->pos_min;
497 	    if (data->flags & SF_BEFORE_EOL)
498 		data->flags
499 		    |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
500 	    else
501 		data->flags &= ~SF_FIX_BEFORE_EOL;
502 	}
503 	else {
504 	    data->offset_float_min = l ? data->last_start_min : data->pos_min;
505 	    data->offset_float_max = (l
506 				      ? data->last_start_max
507 				      : data->pos_min + data->pos_delta);
508 	    if ((U32)data->offset_float_max > (U32)I32_MAX)
509 		data->offset_float_max = I32_MAX;
510 	    if (data->flags & SF_BEFORE_EOL)
511 		data->flags
512 		    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
513 	    else
514 		data->flags &= ~SF_FL_BEFORE_EOL;
515 	}
516     }
517     SvCUR_set(data->last_found, 0);
518     data->last_end = -1;
519     data->flags &= ~SF_BEFORE_EOL;
520 }
521 
522 /* Can match anything (initialization) */
523 STATIC void
524 S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
525 {
526     ANYOF_CLASS_ZERO(cl);
527     ANYOF_BITMAP_SETALL(cl);
528     cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
529     if (LOC)
530 	cl->flags |= ANYOF_LOCALE;
531 }
532 
533 /* Can match anything (initialization) */
534 STATIC int
535 S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl)
536 {
537     int value;
538 
539     for (value = 0; value <= ANYOF_MAX; value += 2)
540 	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
541 	    return 1;
542     if (!(cl->flags & ANYOF_UNICODE_ALL))
543 	return 0;
544     if (!ANYOF_BITMAP_TESTALLSET(cl))
545 	return 0;
546     return 1;
547 }
548 
549 /* Can match anything (initialization) */
550 STATIC void
551 S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
552 {
553     Zero(cl, 1, struct regnode_charclass_class);
554     cl->type = ANYOF;
555     cl_anything(pRExC_state, cl);
556 }
557 
558 STATIC void
559 S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
560 {
561     Zero(cl, 1, struct regnode_charclass_class);
562     cl->type = ANYOF;
563     cl_anything(pRExC_state, cl);
564     if (LOC)
565 	cl->flags |= ANYOF_LOCALE;
566 }
567 
568 /* 'And' a given class with another one.  Can create false positives */
569 /* We assume that cl is not inverted */
570 STATIC void
571 S_cl_and(pTHX_ struct regnode_charclass_class *cl,
572 	 struct regnode_charclass_class *and_with)
573 {
574     if (!(and_with->flags & ANYOF_CLASS)
575 	&& !(cl->flags & ANYOF_CLASS)
576 	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
577 	&& !(and_with->flags & ANYOF_FOLD)
578 	&& !(cl->flags & ANYOF_FOLD)) {
579 	int i;
580 
581 	if (and_with->flags & ANYOF_INVERT)
582 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
583 		cl->bitmap[i] &= ~and_with->bitmap[i];
584 	else
585 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
586 		cl->bitmap[i] &= and_with->bitmap[i];
587     } /* XXXX: logic is complicated otherwise, leave it along for a moment. */
588     if (!(and_with->flags & ANYOF_EOS))
589 	cl->flags &= ~ANYOF_EOS;
590 
591     if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) {
592 	cl->flags &= ~ANYOF_UNICODE_ALL;
593 	cl->flags |= ANYOF_UNICODE;
594 	ARG_SET(cl, ARG(and_with));
595     }
596     if (!(and_with->flags & ANYOF_UNICODE_ALL))
597 	cl->flags &= ~ANYOF_UNICODE_ALL;
598     if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)))
599 	cl->flags &= ~ANYOF_UNICODE;
600 }
601 
602 /* 'OR' a given class with another one.  Can create false positives */
603 /* We assume that cl is not inverted */
604 STATIC void
605 S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, struct regnode_charclass_class *or_with)
606 {
607     if (or_with->flags & ANYOF_INVERT) {
608 	/* We do not use
609 	 * (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
610 	 *   <= (B1 | !B2) | (CL1 | !CL2)
611 	 * which is wasteful if CL2 is small, but we ignore CL2:
612 	 *   (B1 | CL1) | (!B2 & !CL2) <= (B1 | CL1) | !B2 = (B1 | !B2) | CL1
613 	 * XXXX Can we handle case-fold?  Unclear:
614 	 *   (OK1(i) | OK1(i')) | !(OK1(i) | OK1(i')) =
615 	 *   (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
616 	 */
617 	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
618 	     && !(or_with->flags & ANYOF_FOLD)
619 	     && !(cl->flags & ANYOF_FOLD) ) {
620 	    int i;
621 
622 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
623 		cl->bitmap[i] |= ~or_with->bitmap[i];
624 	} /* XXXX: logic is complicated otherwise */
625 	else {
626 	    cl_anything(pRExC_state, cl);
627 	}
628     } else {
629 	/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
630 	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
631 	     && (!(or_with->flags & ANYOF_FOLD)
632 		 || (cl->flags & ANYOF_FOLD)) ) {
633 	    int i;
634 
635 	    /* OR char bitmap and class bitmap separately */
636 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
637 		cl->bitmap[i] |= or_with->bitmap[i];
638 	    if (or_with->flags & ANYOF_CLASS) {
639 		for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
640 		    cl->classflags[i] |= or_with->classflags[i];
641 		cl->flags |= ANYOF_CLASS;
642 	    }
643 	}
644 	else { /* XXXX: logic is complicated, leave it along for a moment. */
645 	    cl_anything(pRExC_state, cl);
646 	}
647     }
648     if (or_with->flags & ANYOF_EOS)
649 	cl->flags |= ANYOF_EOS;
650 
651     if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
652 	ARG(cl) != ARG(or_with)) {
653 	cl->flags |= ANYOF_UNICODE_ALL;
654 	cl->flags &= ~ANYOF_UNICODE;
655     }
656     if (or_with->flags & ANYOF_UNICODE_ALL) {
657 	cl->flags |= ANYOF_UNICODE_ALL;
658 	cl->flags &= ~ANYOF_UNICODE;
659     }
660 }
661 
662 /*
663  * There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
664  * These need to be revisited when a newer toolchain becomes available.
665  */
666 #if defined(__sparc64__) && defined(__GNUC__)
667 #   if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
668 #       undef  SPARC64_GCC_WORKAROUND
669 #       define SPARC64_GCC_WORKAROUND 1
670 #   endif
671 #endif
672 
673 /* REx optimizer.  Converts nodes into quickier variants "in place".
674    Finds fixed substrings.  */
675 
676 /* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set
677    to the position after last scanned or to NULL. */
678 
679 STATIC I32
680 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags)
681 			/* scanp: Start here (read-write). */
682 			/* deltap: Write maxlen-minlen here. */
683 			/* last: Stop before this one. */
684 {
685     I32 min = 0, pars = 0, code;
686     regnode *scan = *scanp, *next;
687     I32 delta = 0;
688     int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
689     int is_inf_internal = 0;		/* The studied chunk is infinite */
690     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
691     scan_data_t data_fake;
692     struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
693 
694     while (scan && OP(scan) != END && scan < last) {
695 	/* Peephole optimizer: */
696 
697 	if (PL_regkind[(U8)OP(scan)] == EXACT) {
698 	    /* Merge several consecutive EXACTish nodes into one. */
699 	    regnode *n = regnext(scan);
700 	    U32 stringok = 1;
701 #ifdef DEBUGGING
702 	    regnode *stop = scan;
703 #endif
704 
705 	    next = scan + NODE_SZ_STR(scan);
706 	    /* Skip NOTHING, merge EXACT*. */
707 	    while (n &&
708 		   ( PL_regkind[(U8)OP(n)] == NOTHING ||
709 		     (stringok && (OP(n) == OP(scan))))
710 		   && NEXT_OFF(n)
711 		   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
712 		if (OP(n) == TAIL || n > next)
713 		    stringok = 0;
714 		if (PL_regkind[(U8)OP(n)] == NOTHING) {
715 		    NEXT_OFF(scan) += NEXT_OFF(n);
716 		    next = n + NODE_STEP_REGNODE;
717 #ifdef DEBUGGING
718 		    if (stringok)
719 			stop = n;
720 #endif
721 		    n = regnext(n);
722 		}
723 		else if (stringok) {
724 		    int oldl = STR_LEN(scan);
725 		    regnode *nnext = regnext(n);
726 
727 		    if (oldl + STR_LEN(n) > U8_MAX)
728 			break;
729 		    NEXT_OFF(scan) += NEXT_OFF(n);
730 		    STR_LEN(scan) += STR_LEN(n);
731 		    next = n + NODE_SZ_STR(n);
732 		    /* Now we can overwrite *n : */
733 		    Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
734 #ifdef DEBUGGING
735 		    stop = next - 1;
736 #endif
737 		    n = nnext;
738 		}
739 	    }
740 
741 	    if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
742 /*
743   Two problematic code points in Unicode casefolding of EXACT nodes:
744 
745    U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
746    U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
747 
748    which casefold to
749 
750    Unicode			UTF-8
751 
752    U+03B9 U+0308 U+0301		0xCE 0xB9 0xCC 0x88 0xCC 0x81
753    U+03C5 U+0308 U+0301		0xCF 0x85 0xCC 0x88 0xCC 0x81
754 
755    This means that in case-insensitive matching (or "loose matching",
756    as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
757    length of the above casefolded versions) can match a target string
758    of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
759    This would rather mess up the minimum length computation.
760 
761    What we'll do is to look for the tail four bytes, and then peek
762    at the preceding two bytes to see whether we need to decrease
763    the minimum length by four (six minus two).
764 
765    Thanks to the design of UTF-8, there cannot be false matches:
766    A sequence of valid UTF-8 bytes cannot be a subsequence of
767    another valid sequence of UTF-8 bytes.
768 
769 */
770 		 char *s0 = STRING(scan), *s, *t;
771 		 char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4;
772 		 char *t0 = "\xcc\x88\xcc\x81";
773 		 char *t1 = t0 + 3;
774 
775 		 for (s = s0 + 2;
776 		      s < s2 && (t = ninstr(s, s1, t0, t1));
777 		      s = t + 4) {
778 		      if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
779 			  ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
780 			   min -= 4;
781 		 }
782 	    }
783 
784 #ifdef DEBUGGING
785 	    /* Allow dumping */
786 	    n = scan + NODE_SZ_STR(scan);
787 	    while (n <= stop) {
788 		if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) {
789 		    OP(n) = OPTIMIZED;
790 		    NEXT_OFF(n) = 0;
791 		}
792 		n++;
793 	    }
794 #endif
795 	}
796 	/* Follow the next-chain of the current node and optimize
797 	   away all the NOTHINGs from it.  */
798 	if (OP(scan) != CURLYX) {
799 	    int max = (reg_off_by_arg[OP(scan)]
800 		       ? I32_MAX
801 		       /* I32 may be smaller than U16 on CRAYs! */
802 		       : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
803 	    int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
804 	    int noff;
805 	    regnode *n = scan;
806 
807 	    /* Skip NOTHING and LONGJMP. */
808 	    while ((n = regnext(n))
809 		   && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
810 		       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
811 		   && off + noff < max)
812 		off += noff;
813 	    if (reg_off_by_arg[OP(scan)])
814 		ARG(scan) = off;
815 	    else
816 		NEXT_OFF(scan) = off;
817 	}
818 	/* The principal pseudo-switch.  Cannot be a switch, since we
819 	   look into several different things.  */
820 	if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
821 		   || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
822 	    next = regnext(scan);
823 	    code = OP(scan);
824 
825 	    if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
826 		I32 max1 = 0, min1 = I32_MAX, num = 0;
827 		struct regnode_charclass_class accum;
828 
829 		if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
830 		    scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
831 		if (flags & SCF_DO_STCLASS)
832 		    cl_init_zero(pRExC_state, &accum);
833 		while (OP(scan) == code) {
834 		    I32 deltanext, minnext, f = 0, fake;
835 		    struct regnode_charclass_class this_class;
836 
837 		    num++;
838 		    data_fake.flags = 0;
839 		    if (data) {
840 			data_fake.whilem_c = data->whilem_c;
841 			data_fake.last_closep = data->last_closep;
842 		    }
843 		    else
844 			data_fake.last_closep = &fake;
845 		    next = regnext(scan);
846 		    scan = NEXTOPER(scan);
847 		    if (code != BRANCH)
848 			scan = NEXTOPER(scan);
849 		    if (flags & SCF_DO_STCLASS) {
850 			cl_init(pRExC_state, &this_class);
851 			data_fake.start_class = &this_class;
852 			f = SCF_DO_STCLASS_AND;
853 		    }
854 		    if (flags & SCF_WHILEM_VISITED_POS)
855 			f |= SCF_WHILEM_VISITED_POS;
856 		    /* we suppose the run is continuous, last=next...*/
857 		    minnext = study_chunk(pRExC_state, &scan, &deltanext,
858 					  next, &data_fake, f);
859 		    if (min1 > minnext)
860 			min1 = minnext;
861 		    if (max1 < minnext + deltanext)
862 			max1 = minnext + deltanext;
863 		    if (deltanext == I32_MAX)
864 			is_inf = is_inf_internal = 1;
865 		    scan = next;
866 		    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
867 			pars++;
868 		    if (data && (data_fake.flags & SF_HAS_EVAL))
869 			data->flags |= SF_HAS_EVAL;
870 		    if (data)
871 			data->whilem_c = data_fake.whilem_c;
872 		    if (flags & SCF_DO_STCLASS)
873 			cl_or(pRExC_state, &accum, &this_class);
874 		    if (code == SUSPEND)
875 			break;
876 		}
877 		if (code == IFTHEN && num < 2) /* Empty ELSE branch */
878 		    min1 = 0;
879 		if (flags & SCF_DO_SUBSTR) {
880 		    data->pos_min += min1;
881 		    data->pos_delta += max1 - min1;
882 		    if (max1 != min1 || is_inf)
883 			data->longest = &(data->longest_float);
884 		}
885 		min += min1;
886 		delta += max1 - min1;
887 		if (flags & SCF_DO_STCLASS_OR) {
888 		    cl_or(pRExC_state, data->start_class, &accum);
889 		    if (min1) {
890 			cl_and(data->start_class, &and_with);
891 			flags &= ~SCF_DO_STCLASS;
892 		    }
893 		}
894 		else if (flags & SCF_DO_STCLASS_AND) {
895 		    if (min1) {
896 			cl_and(data->start_class, &accum);
897 			flags &= ~SCF_DO_STCLASS;
898 		    }
899 		    else {
900 			/* Switch to OR mode: cache the old value of
901 			 * data->start_class */
902 			StructCopy(data->start_class, &and_with,
903 				   struct regnode_charclass_class);
904 			flags &= ~SCF_DO_STCLASS_AND;
905 			StructCopy(&accum, data->start_class,
906 				   struct regnode_charclass_class);
907 			flags |= SCF_DO_STCLASS_OR;
908 			data->start_class->flags |= ANYOF_EOS;
909 		    }
910 		}
911 	    }
912 	    else if (code == BRANCHJ)	/* single branch is optimized. */
913 		scan = NEXTOPER(NEXTOPER(scan));
914 	    else			/* single branch is optimized. */
915 		scan = NEXTOPER(scan);
916 	    continue;
917 	}
918 	else if (OP(scan) == EXACT) {
919 	    I32 l = STR_LEN(scan);
920 	    UV uc = *((U8*)STRING(scan));
921 	    if (UTF) {
922 		U8 *s = (U8*)STRING(scan);
923 		l = utf8_length(s, s + l);
924 		uc = utf8_to_uvchr(s, NULL);
925 	    }
926 	    min += l;
927 	    if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
928 		/* The code below prefers earlier match for fixed
929 		   offset, later match for variable offset.  */
930 		if (data->last_end == -1) { /* Update the start info. */
931 		    data->last_start_min = data->pos_min;
932  		    data->last_start_max = is_inf
933  			? I32_MAX : data->pos_min + data->pos_delta;
934 		}
935 		sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
936 		if (UTF)
937 		    SvUTF8_on(data->last_found);
938 		data->last_end = data->pos_min + l;
939 		data->pos_min += l; /* As in the first entry. */
940 		data->flags &= ~SF_BEFORE_EOL;
941 	    }
942 	    if (flags & SCF_DO_STCLASS_AND) {
943 		/* Check whether it is compatible with what we know already! */
944 		int compat = 1;
945 
946 		if (uc >= 0x100 ||
947 		    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
948 		    && !ANYOF_BITMAP_TEST(data->start_class, uc)
949 		    && (!(data->start_class->flags & ANYOF_FOLD)
950 			|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
951                     )
952 		    compat = 0;
953 		ANYOF_CLASS_ZERO(data->start_class);
954 		ANYOF_BITMAP_ZERO(data->start_class);
955 		if (compat)
956 		    ANYOF_BITMAP_SET(data->start_class, uc);
957 		data->start_class->flags &= ~ANYOF_EOS;
958 		if (uc < 0x100)
959 		  data->start_class->flags &= ~ANYOF_UNICODE_ALL;
960 	    }
961 	    else if (flags & SCF_DO_STCLASS_OR) {
962 		/* false positive possible if the class is case-folded */
963 		if (uc < 0x100)
964 		    ANYOF_BITMAP_SET(data->start_class, uc);
965 		else
966 		    data->start_class->flags |= ANYOF_UNICODE_ALL;
967 		data->start_class->flags &= ~ANYOF_EOS;
968 		cl_and(data->start_class, &and_with);
969 	    }
970 	    flags &= ~SCF_DO_STCLASS;
971 	}
972 	else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
973 	    I32 l = STR_LEN(scan);
974 	    UV uc = *((U8*)STRING(scan));
975 
976 	    /* Search for fixed substrings supports EXACT only. */
977 	    if (flags & SCF_DO_SUBSTR)
978 		scan_commit(pRExC_state, data);
979 	    if (UTF) {
980 		U8 *s = (U8 *)STRING(scan);
981 		l = utf8_length(s, s + l);
982 		uc = utf8_to_uvchr(s, NULL);
983 	    }
984 	    min += l;
985 	    if (data && (flags & SCF_DO_SUBSTR))
986 		data->pos_min += l;
987 	    if (flags & SCF_DO_STCLASS_AND) {
988 		/* Check whether it is compatible with what we know already! */
989 		int compat = 1;
990 
991 		if (uc >= 0x100 ||
992 		    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
993 		    && !ANYOF_BITMAP_TEST(data->start_class, uc)
994 		     && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
995 		    compat = 0;
996 		ANYOF_CLASS_ZERO(data->start_class);
997 		ANYOF_BITMAP_ZERO(data->start_class);
998 		if (compat) {
999 		    ANYOF_BITMAP_SET(data->start_class, uc);
1000 		    data->start_class->flags &= ~ANYOF_EOS;
1001 		    data->start_class->flags |= ANYOF_FOLD;
1002 		    if (OP(scan) == EXACTFL)
1003 			data->start_class->flags |= ANYOF_LOCALE;
1004 		}
1005 	    }
1006 	    else if (flags & SCF_DO_STCLASS_OR) {
1007 		if (data->start_class->flags & ANYOF_FOLD) {
1008 		    /* false positive possible if the class is case-folded.
1009 		       Assume that the locale settings are the same... */
1010 		    if (uc < 0x100)
1011 			ANYOF_BITMAP_SET(data->start_class, uc);
1012 		    data->start_class->flags &= ~ANYOF_EOS;
1013 		}
1014 		cl_and(data->start_class, &and_with);
1015 	    }
1016 	    flags &= ~SCF_DO_STCLASS;
1017 	}
1018 	else if (strchr((char*)PL_varies,OP(scan))) {
1019 	    I32 mincount, maxcount, minnext, deltanext, fl = 0;
1020 	    I32 f = flags, pos_before = 0;
1021 	    regnode *oscan = scan;
1022 	    struct regnode_charclass_class this_class;
1023 	    struct regnode_charclass_class *oclass = NULL;
1024 	    I32 next_is_eval = 0;
1025 
1026 	    switch (PL_regkind[(U8)OP(scan)]) {
1027 	    case WHILEM:		/* End of (?:...)* . */
1028 		scan = NEXTOPER(scan);
1029 		goto finish;
1030 	    case PLUS:
1031 		if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
1032 		    next = NEXTOPER(scan);
1033 		    if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
1034 			mincount = 1;
1035 			maxcount = REG_INFTY;
1036 			next = regnext(scan);
1037 			scan = NEXTOPER(scan);
1038 			goto do_curly;
1039 		    }
1040 		}
1041 		if (flags & SCF_DO_SUBSTR)
1042 		    data->pos_min++;
1043 		min++;
1044 		/* Fall through. */
1045 	    case STAR:
1046 		if (flags & SCF_DO_STCLASS) {
1047 		    mincount = 0;
1048 		    maxcount = REG_INFTY;
1049 		    next = regnext(scan);
1050 		    scan = NEXTOPER(scan);
1051 		    goto do_curly;
1052 		}
1053 		is_inf = is_inf_internal = 1;
1054 		scan = regnext(scan);
1055 		if (flags & SCF_DO_SUBSTR) {
1056 		    scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
1057 		    data->longest = &(data->longest_float);
1058 		}
1059 		goto optimize_curly_tail;
1060 	    case CURLY:
1061 		mincount = ARG1(scan);
1062 		maxcount = ARG2(scan);
1063 		next = regnext(scan);
1064 		if (OP(scan) == CURLYX) {
1065 		    I32 lp = (data ? *(data->last_closep) : 0);
1066 
1067 		    scan->flags = ((lp <= U8_MAX) ? lp : U8_MAX);
1068 		}
1069 		scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
1070 		next_is_eval = (OP(scan) == EVAL);
1071 	      do_curly:
1072 		if (flags & SCF_DO_SUBSTR) {
1073 		    if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
1074 		    pos_before = data->pos_min;
1075 		}
1076 		if (data) {
1077 		    fl = data->flags;
1078 		    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
1079 		    if (is_inf)
1080 			data->flags |= SF_IS_INF;
1081 		}
1082 		if (flags & SCF_DO_STCLASS) {
1083 		    cl_init(pRExC_state, &this_class);
1084 		    oclass = data->start_class;
1085 		    data->start_class = &this_class;
1086 		    f |= SCF_DO_STCLASS_AND;
1087 		    f &= ~SCF_DO_STCLASS_OR;
1088 		}
1089 		/* These are the cases when once a subexpression
1090 		   fails at a particular position, it cannot succeed
1091 		   even after backtracking at the enclosing scope.
1092 
1093 		   XXXX what if minimal match and we are at the
1094 		        initial run of {n,m}? */
1095 		if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
1096 		    f &= ~SCF_WHILEM_VISITED_POS;
1097 
1098 		/* This will finish on WHILEM, setting scan, or on NULL: */
1099 		minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
1100 				      mincount == 0
1101 					? (f & ~SCF_DO_SUBSTR) : f);
1102 
1103 		if (flags & SCF_DO_STCLASS)
1104 		    data->start_class = oclass;
1105 		if (mincount == 0 || minnext == 0) {
1106 		    if (flags & SCF_DO_STCLASS_OR) {
1107 			cl_or(pRExC_state, data->start_class, &this_class);
1108 		    }
1109 		    else if (flags & SCF_DO_STCLASS_AND) {
1110 			/* Switch to OR mode: cache the old value of
1111 			 * data->start_class */
1112 			StructCopy(data->start_class, &and_with,
1113 				   struct regnode_charclass_class);
1114 			flags &= ~SCF_DO_STCLASS_AND;
1115 			StructCopy(&this_class, data->start_class,
1116 				   struct regnode_charclass_class);
1117 			flags |= SCF_DO_STCLASS_OR;
1118 			data->start_class->flags |= ANYOF_EOS;
1119 		    }
1120 		} else {		/* Non-zero len */
1121 		    if (flags & SCF_DO_STCLASS_OR) {
1122 			cl_or(pRExC_state, data->start_class, &this_class);
1123 			cl_and(data->start_class, &and_with);
1124 		    }
1125 		    else if (flags & SCF_DO_STCLASS_AND)
1126 			cl_and(data->start_class, &this_class);
1127 		    flags &= ~SCF_DO_STCLASS;
1128 		}
1129 		if (!scan) 		/* It was not CURLYX, but CURLY. */
1130 		    scan = next;
1131 		if (ckWARN(WARN_REGEXP)
1132 		       /* ? quantifier ok, except for (?{ ... }) */
1133 		    && (next_is_eval || !(mincount == 0 && maxcount == 1))
1134 		    && (minnext == 0) && (deltanext == 0)
1135 		    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
1136 		    && maxcount <= REG_INFTY/3) /* Complement check for big count */
1137 		{
1138 		    vWARN(RExC_parse,
1139 			  "Quantifier unexpected on zero-length expression");
1140 		}
1141 
1142 		min += minnext * mincount;
1143 		is_inf_internal |= ((maxcount == REG_INFTY
1144 				     && (minnext + deltanext) > 0)
1145 				    || deltanext == I32_MAX);
1146 		is_inf |= is_inf_internal;
1147 		delta += (minnext + deltanext) * maxcount - minnext * mincount;
1148 
1149 		/* Try powerful optimization CURLYX => CURLYN. */
1150 		if (  OP(oscan) == CURLYX && data
1151 		      && data->flags & SF_IN_PAR
1152 		      && !(data->flags & SF_HAS_EVAL)
1153 		      && !deltanext && minnext == 1 ) {
1154 		    /* Try to optimize to CURLYN.  */
1155 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
1156 		    regnode *nxt1 = nxt;
1157 #ifdef DEBUGGING
1158 		    regnode *nxt2;
1159 #endif
1160 
1161 		    /* Skip open. */
1162 		    nxt = regnext(nxt);
1163 		    if (!strchr((char*)PL_simple,OP(nxt))
1164 			&& !(PL_regkind[(U8)OP(nxt)] == EXACT
1165 			     && STR_LEN(nxt) == 1))
1166 			goto nogo;
1167 #ifdef DEBUGGING
1168 		    nxt2 = nxt;
1169 #endif
1170 		    nxt = regnext(nxt);
1171 		    if (OP(nxt) != CLOSE)
1172 			goto nogo;
1173 		    /* Now we know that nxt2 is the only contents: */
1174 		    oscan->flags = (U8)ARG(nxt);
1175 		    OP(oscan) = CURLYN;
1176 		    OP(nxt1) = NOTHING;	/* was OPEN. */
1177 #ifdef DEBUGGING
1178 		    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
1179 		    NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
1180 		    NEXT_OFF(nxt2) = 0;	/* just for consistancy with CURLY. */
1181 		    OP(nxt) = OPTIMIZED;	/* was CLOSE. */
1182 		    OP(nxt + 1) = OPTIMIZED; /* was count. */
1183 		    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
1184 #endif
1185 		}
1186 	      nogo:
1187 
1188 		/* Try optimization CURLYX => CURLYM. */
1189 		if (  OP(oscan) == CURLYX && data
1190 		      && !(data->flags & SF_HAS_PAR)
1191 		      && !(data->flags & SF_HAS_EVAL)
1192 		      && !deltanext  ) {
1193 		    /* XXXX How to optimize if data == 0? */
1194 		    /* Optimize to a simpler form.  */
1195 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
1196 		    regnode *nxt2;
1197 
1198 		    OP(oscan) = CURLYM;
1199 		    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
1200 			    && (OP(nxt2) != WHILEM))
1201 			nxt = nxt2;
1202 		    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
1203 		    /* Need to optimize away parenths. */
1204 		    if (data->flags & SF_IN_PAR) {
1205 			/* Set the parenth number.  */
1206 			regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
1207 
1208 			if (OP(nxt) != CLOSE)
1209 			    FAIL("Panic opt close");
1210 			oscan->flags = (U8)ARG(nxt);
1211 			OP(nxt1) = OPTIMIZED;	/* was OPEN. */
1212 			OP(nxt) = OPTIMIZED;	/* was CLOSE. */
1213 #ifdef DEBUGGING
1214 			OP(nxt1 + 1) = OPTIMIZED; /* was count. */
1215 			OP(nxt + 1) = OPTIMIZED; /* was count. */
1216 			NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
1217 			NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
1218 #endif
1219 #if 0
1220 			while ( nxt1 && (OP(nxt1) != WHILEM)) {
1221 			    regnode *nnxt = regnext(nxt1);
1222 
1223 			    if (nnxt == nxt) {
1224 				if (reg_off_by_arg[OP(nxt1)])
1225 				    ARG_SET(nxt1, nxt2 - nxt1);
1226 				else if (nxt2 - nxt1 < U16_MAX)
1227 				    NEXT_OFF(nxt1) = nxt2 - nxt1;
1228 				else
1229 				    OP(nxt) = NOTHING;	/* Cannot beautify */
1230 			    }
1231 			    nxt1 = nnxt;
1232 			}
1233 #endif
1234 			/* Optimize again: */
1235 			study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
1236 				    NULL, 0);
1237 		    }
1238 		    else
1239 			oscan->flags = 0;
1240 		}
1241 		else if ((OP(oscan) == CURLYX)
1242 			 && (flags & SCF_WHILEM_VISITED_POS)
1243 			 /* See the comment on a similar expression above.
1244 			    However, this time it not a subexpression
1245 			    we care about, but the expression itself. */
1246 			 && (maxcount == REG_INFTY)
1247 			 && data && ++data->whilem_c < 16) {
1248 		    /* This stays as CURLYX, we can put the count/of pair. */
1249 		    /* Find WHILEM (as in regexec.c) */
1250 		    regnode *nxt = oscan + NEXT_OFF(oscan);
1251 
1252 		    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
1253 			nxt += ARG(nxt);
1254 		    PREVOPER(nxt)->flags = (U8)(data->whilem_c
1255 			| (RExC_whilem_seen << 4)); /* On WHILEM */
1256 		}
1257 		if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
1258 		    pars++;
1259 		if (flags & SCF_DO_SUBSTR) {
1260 		    SV *last_str = Nullsv;
1261 		    int counted = mincount != 0;
1262 
1263 		    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
1264 #if defined(SPARC64_GCC_WORKAROUND)
1265 			I32 b = 0;
1266 			STRLEN l = 0;
1267 			char *s = NULL;
1268 			I32 old = 0;
1269 
1270 			if (pos_before >= data->last_start_min)
1271 			    b = pos_before;
1272 			else
1273 			    b = data->last_start_min;
1274 
1275 			l = 0;
1276 			s = SvPV(data->last_found, l);
1277 			old = b - data->last_start_min;
1278 
1279 #else
1280 			I32 b = pos_before >= data->last_start_min
1281 			    ? pos_before : data->last_start_min;
1282 			STRLEN l;
1283 			char *s = SvPV(data->last_found, l);
1284 			I32 old = b - data->last_start_min;
1285 #endif
1286 
1287 			if (UTF)
1288 			    old = utf8_hop((U8*)s, old) - (U8*)s;
1289 
1290 			l -= old;
1291 			/* Get the added string: */
1292 			last_str = newSVpvn(s  + old, l);
1293 			if (deltanext == 0 && pos_before == b) {
1294 			    /* What was added is a constant string */
1295 			    if (mincount > 1) {
1296 				SvGROW(last_str, (mincount * l) + 1);
1297 				repeatcpy(SvPVX(last_str) + l,
1298 					  SvPVX(last_str), l, mincount - 1);
1299 				SvCUR(last_str) *= mincount;
1300 				/* Add additional parts. */
1301 				SvCUR_set(data->last_found,
1302 					  SvCUR(data->last_found) - l);
1303 				sv_catsv(data->last_found, last_str);
1304 				data->last_end += l * (mincount - 1);
1305 			    }
1306 			} else {
1307 			    /* start offset must point into the last copy */
1308 			    data->last_start_min += minnext * (mincount - 1);
1309 			    data->last_start_max += is_inf ? 0 : (maxcount - 1)
1310 				* (minnext + data->pos_delta);
1311 			}
1312 		    }
1313 		    /* It is counted once already... */
1314 		    data->pos_min += minnext * (mincount - counted);
1315 		    data->pos_delta += - counted * deltanext +
1316 			(minnext + deltanext) * maxcount - minnext * mincount;
1317 		    if (mincount != maxcount) {
1318 			 /* Cannot extend fixed substrings found inside
1319 			    the group.  */
1320 			scan_commit(pRExC_state,data);
1321 			if (mincount && last_str) {
1322 			    sv_setsv(data->last_found, last_str);
1323 			    data->last_end = data->pos_min;
1324 			    data->last_start_min =
1325 				data->pos_min - CHR_SVLEN(last_str);
1326 			    data->last_start_max = is_inf
1327 				? I32_MAX
1328 				: data->pos_min + data->pos_delta
1329 				- CHR_SVLEN(last_str);
1330 			}
1331 			data->longest = &(data->longest_float);
1332 		    }
1333 		    SvREFCNT_dec(last_str);
1334 		}
1335 		if (data && (fl & SF_HAS_EVAL))
1336 		    data->flags |= SF_HAS_EVAL;
1337 	      optimize_curly_tail:
1338 		if (OP(oscan) != CURLYX) {
1339 		    while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
1340 			   && NEXT_OFF(next))
1341 			NEXT_OFF(oscan) += NEXT_OFF(next);
1342 		}
1343 		continue;
1344 	    default:			/* REF and CLUMP only? */
1345 		if (flags & SCF_DO_SUBSTR) {
1346 		    scan_commit(pRExC_state,data);	/* Cannot expect anything... */
1347 		    data->longest = &(data->longest_float);
1348 		}
1349 		is_inf = is_inf_internal = 1;
1350 		if (flags & SCF_DO_STCLASS_OR)
1351 		    cl_anything(pRExC_state, data->start_class);
1352 		flags &= ~SCF_DO_STCLASS;
1353 		break;
1354 	    }
1355 	}
1356 	else if (strchr((char*)PL_simple,OP(scan))) {
1357 	    int value = 0;
1358 
1359 	    if (flags & SCF_DO_SUBSTR) {
1360 		scan_commit(pRExC_state,data);
1361 		data->pos_min++;
1362 	    }
1363 	    min++;
1364 	    if (flags & SCF_DO_STCLASS) {
1365 		data->start_class->flags &= ~ANYOF_EOS;	/* No match on empty */
1366 
1367 		/* Some of the logic below assumes that switching
1368 		   locale on will only add false positives. */
1369 		switch (PL_regkind[(U8)OP(scan)]) {
1370 		case SANY:
1371 		default:
1372 		  do_default:
1373 		    /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
1374 		    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
1375 			cl_anything(pRExC_state, data->start_class);
1376 		    break;
1377 		case REG_ANY:
1378 		    if (OP(scan) == SANY)
1379 			goto do_default;
1380 		    if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
1381 			value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
1382 				 || (data->start_class->flags & ANYOF_CLASS));
1383 			cl_anything(pRExC_state, data->start_class);
1384 		    }
1385 		    if (flags & SCF_DO_STCLASS_AND || !value)
1386 			ANYOF_BITMAP_CLEAR(data->start_class,'\n');
1387 		    break;
1388 		case ANYOF:
1389 		    if (flags & SCF_DO_STCLASS_AND)
1390 			cl_and(data->start_class,
1391 			       (struct regnode_charclass_class*)scan);
1392 		    else
1393 			cl_or(pRExC_state, data->start_class,
1394 			      (struct regnode_charclass_class*)scan);
1395 		    break;
1396 		case ALNUM:
1397 		    if (flags & SCF_DO_STCLASS_AND) {
1398 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1399 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
1400 			    for (value = 0; value < 256; value++)
1401 				if (!isALNUM(value))
1402 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1403 			}
1404 		    }
1405 		    else {
1406 			if (data->start_class->flags & ANYOF_LOCALE)
1407 			    ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
1408 			else {
1409 			    for (value = 0; value < 256; value++)
1410 				if (isALNUM(value))
1411 				    ANYOF_BITMAP_SET(data->start_class, value);
1412 			}
1413 		    }
1414 		    break;
1415 		case ALNUML:
1416 		    if (flags & SCF_DO_STCLASS_AND) {
1417 			if (data->start_class->flags & ANYOF_LOCALE)
1418 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
1419 		    }
1420 		    else {
1421 			ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
1422 			data->start_class->flags |= ANYOF_LOCALE;
1423 		    }
1424 		    break;
1425 		case NALNUM:
1426 		    if (flags & SCF_DO_STCLASS_AND) {
1427 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1428 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
1429 			    for (value = 0; value < 256; value++)
1430 				if (isALNUM(value))
1431 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1432 			}
1433 		    }
1434 		    else {
1435 			if (data->start_class->flags & ANYOF_LOCALE)
1436 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
1437 			else {
1438 			    for (value = 0; value < 256; value++)
1439 				if (!isALNUM(value))
1440 				    ANYOF_BITMAP_SET(data->start_class, value);
1441 			}
1442 		    }
1443 		    break;
1444 		case NALNUML:
1445 		    if (flags & SCF_DO_STCLASS_AND) {
1446 			if (data->start_class->flags & ANYOF_LOCALE)
1447 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
1448 		    }
1449 		    else {
1450 			data->start_class->flags |= ANYOF_LOCALE;
1451 			ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
1452 		    }
1453 		    break;
1454 		case SPACE:
1455 		    if (flags & SCF_DO_STCLASS_AND) {
1456 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1457 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
1458 			    for (value = 0; value < 256; value++)
1459 				if (!isSPACE(value))
1460 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1461 			}
1462 		    }
1463 		    else {
1464 			if (data->start_class->flags & ANYOF_LOCALE)
1465 			    ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
1466 			else {
1467 			    for (value = 0; value < 256; value++)
1468 				if (isSPACE(value))
1469 				    ANYOF_BITMAP_SET(data->start_class, value);
1470 			}
1471 		    }
1472 		    break;
1473 		case SPACEL:
1474 		    if (flags & SCF_DO_STCLASS_AND) {
1475 			if (data->start_class->flags & ANYOF_LOCALE)
1476 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
1477 		    }
1478 		    else {
1479 			data->start_class->flags |= ANYOF_LOCALE;
1480 			ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
1481 		    }
1482 		    break;
1483 		case NSPACE:
1484 		    if (flags & SCF_DO_STCLASS_AND) {
1485 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1486 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
1487 			    for (value = 0; value < 256; value++)
1488 				if (isSPACE(value))
1489 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1490 			}
1491 		    }
1492 		    else {
1493 			if (data->start_class->flags & ANYOF_LOCALE)
1494 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
1495 			else {
1496 			    for (value = 0; value < 256; value++)
1497 				if (!isSPACE(value))
1498 				    ANYOF_BITMAP_SET(data->start_class, value);
1499 			}
1500 		    }
1501 		    break;
1502 		case NSPACEL:
1503 		    if (flags & SCF_DO_STCLASS_AND) {
1504 			if (data->start_class->flags & ANYOF_LOCALE) {
1505 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
1506 			    for (value = 0; value < 256; value++)
1507 				if (!isSPACE(value))
1508 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1509 			}
1510 		    }
1511 		    else {
1512 			data->start_class->flags |= ANYOF_LOCALE;
1513 			ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
1514 		    }
1515 		    break;
1516 		case DIGIT:
1517 		    if (flags & SCF_DO_STCLASS_AND) {
1518 			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
1519 			for (value = 0; value < 256; value++)
1520 			    if (!isDIGIT(value))
1521 				ANYOF_BITMAP_CLEAR(data->start_class, value);
1522 		    }
1523 		    else {
1524 			if (data->start_class->flags & ANYOF_LOCALE)
1525 			    ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
1526 			else {
1527 			    for (value = 0; value < 256; value++)
1528 				if (isDIGIT(value))
1529 				    ANYOF_BITMAP_SET(data->start_class, value);
1530 			}
1531 		    }
1532 		    break;
1533 		case NDIGIT:
1534 		    if (flags & SCF_DO_STCLASS_AND) {
1535 			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
1536 			for (value = 0; value < 256; value++)
1537 			    if (isDIGIT(value))
1538 				ANYOF_BITMAP_CLEAR(data->start_class, value);
1539 		    }
1540 		    else {
1541 			if (data->start_class->flags & ANYOF_LOCALE)
1542 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
1543 			else {
1544 			    for (value = 0; value < 256; value++)
1545 				if (!isDIGIT(value))
1546 				    ANYOF_BITMAP_SET(data->start_class, value);
1547 			}
1548 		    }
1549 		    break;
1550 		}
1551 		if (flags & SCF_DO_STCLASS_OR)
1552 		    cl_and(data->start_class, &and_with);
1553 		flags &= ~SCF_DO_STCLASS;
1554 	    }
1555 	}
1556 	else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
1557 	    data->flags |= (OP(scan) == MEOL
1558 			    ? SF_BEFORE_MEOL
1559 			    : SF_BEFORE_SEOL);
1560 	}
1561 	else if (  PL_regkind[(U8)OP(scan)] == BRANCHJ
1562 		 /* Lookbehind, or need to calculate parens/evals/stclass: */
1563 		   && (scan->flags || data || (flags & SCF_DO_STCLASS))
1564 		   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
1565 	    /* Lookahead/lookbehind */
1566 	    I32 deltanext, minnext, fake = 0;
1567 	    regnode *nscan;
1568 	    struct regnode_charclass_class intrnl;
1569 	    int f = 0;
1570 
1571 	    data_fake.flags = 0;
1572 	    if (data) {
1573 		data_fake.whilem_c = data->whilem_c;
1574 		data_fake.last_closep = data->last_closep;
1575 	    }
1576 	    else
1577 		data_fake.last_closep = &fake;
1578 	    if ( flags & SCF_DO_STCLASS && !scan->flags
1579 		 && OP(scan) == IFMATCH ) { /* Lookahead */
1580 		cl_init(pRExC_state, &intrnl);
1581 		data_fake.start_class = &intrnl;
1582 		f |= SCF_DO_STCLASS_AND;
1583 	    }
1584 	    if (flags & SCF_WHILEM_VISITED_POS)
1585 		f |= SCF_WHILEM_VISITED_POS;
1586 	    next = regnext(scan);
1587 	    nscan = NEXTOPER(NEXTOPER(scan));
1588 	    minnext = study_chunk(pRExC_state, &nscan, &deltanext, last, &data_fake, f);
1589 	    if (scan->flags) {
1590 		if (deltanext) {
1591 		    vFAIL("Variable length lookbehind not implemented");
1592 		}
1593 		else if (minnext > U8_MAX) {
1594 		    vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
1595 		}
1596 		scan->flags = (U8)minnext;
1597 	    }
1598 	    if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
1599 		pars++;
1600 	    if (data && (data_fake.flags & SF_HAS_EVAL))
1601 		data->flags |= SF_HAS_EVAL;
1602 	    if (data)
1603 		data->whilem_c = data_fake.whilem_c;
1604 	    if (f & SCF_DO_STCLASS_AND) {
1605 		int was = (data->start_class->flags & ANYOF_EOS);
1606 
1607 		cl_and(data->start_class, &intrnl);
1608 		if (was)
1609 		    data->start_class->flags |= ANYOF_EOS;
1610 	    }
1611 	}
1612 	else if (OP(scan) == OPEN) {
1613 	    pars++;
1614 	}
1615 	else if (OP(scan) == CLOSE) {
1616 	    if ((I32)ARG(scan) == is_par) {
1617 		next = regnext(scan);
1618 
1619 		if ( next && (OP(next) != WHILEM) && next < last)
1620 		    is_par = 0;		/* Disable optimization */
1621 	    }
1622 	    if (data)
1623 		*(data->last_closep) = ARG(scan);
1624 	}
1625 	else if (OP(scan) == EVAL) {
1626 		if (data)
1627 		    data->flags |= SF_HAS_EVAL;
1628 	}
1629 	else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
1630 		if (flags & SCF_DO_SUBSTR) {
1631 		    scan_commit(pRExC_state,data);
1632 		    data->longest = &(data->longest_float);
1633 		}
1634 		is_inf = is_inf_internal = 1;
1635 		if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
1636 		    cl_anything(pRExC_state, data->start_class);
1637 		flags &= ~SCF_DO_STCLASS;
1638 	}
1639 	/* Else: zero-length, ignore. */
1640 	scan = regnext(scan);
1641     }
1642 
1643   finish:
1644     *scanp = scan;
1645     *deltap = is_inf_internal ? I32_MAX : delta;
1646     if (flags & SCF_DO_SUBSTR && is_inf)
1647 	data->pos_delta = I32_MAX - data->pos_min;
1648     if (is_par > U8_MAX)
1649 	is_par = 0;
1650     if (is_par && pars==1 && data) {
1651 	data->flags |= SF_IN_PAR;
1652 	data->flags &= ~SF_HAS_PAR;
1653     }
1654     else if (pars && data) {
1655 	data->flags |= SF_HAS_PAR;
1656 	data->flags &= ~SF_IN_PAR;
1657     }
1658     if (flags & SCF_DO_STCLASS_OR)
1659 	cl_and(data->start_class, &and_with);
1660     return min;
1661 }
1662 
1663 STATIC I32
1664 S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, char *s)
1665 {
1666     if (RExC_rx->data) {
1667 	Renewc(RExC_rx->data,
1668 	       sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1),
1669 	       char, struct reg_data);
1670 	Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
1671 	RExC_rx->data->count += n;
1672     }
1673     else {
1674 	Newc(1207, RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1),
1675 	     char, struct reg_data);
1676 	New(1208, RExC_rx->data->what, n, U8);
1677 	RExC_rx->data->count = n;
1678     }
1679     Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
1680     return RExC_rx->data->count - n;
1681 }
1682 
1683 void
1684 Perl_reginitcolors(pTHX)
1685 {
1686     int i = 0;
1687     char *s = PerlEnv_getenv("PERL_RE_COLORS");
1688 
1689     if (s) {
1690 	PL_colors[0] = s = savepv(s);
1691 	while (++i < 6) {
1692 	    s = strchr(s, '\t');
1693 	    if (s) {
1694 		*s = '\0';
1695 		PL_colors[i] = ++s;
1696 	    }
1697 	    else
1698 		PL_colors[i] = s = "";
1699 	}
1700     } else {
1701 	while (i < 6)
1702 	    PL_colors[i++] = "";
1703     }
1704     PL_colorset = 1;
1705 }
1706 
1707 
1708 /*
1709  - pregcomp - compile a regular expression into internal code
1710  *
1711  * We can't allocate space until we know how big the compiled form will be,
1712  * but we can't compile it (and thus know how big it is) until we've got a
1713  * place to put the code.  So we cheat:  we compile it twice, once with code
1714  * generation turned off and size counting turned on, and once "for real".
1715  * This also means that we don't allocate space until we are sure that the
1716  * thing really will compile successfully, and we never have to move the
1717  * code and thus invalidate pointers into it.  (Note that it has to be in
1718  * one piece because free() must be able to free it all.) [NB: not true in perl]
1719  *
1720  * Beware that the optimization-preparation code in here knows about some
1721  * of the structure of the compiled regexp.  [I'll say.]
1722  */
1723 regexp *
1724 Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
1725 {
1726     register regexp *r;
1727     regnode *scan;
1728     regnode *first;
1729     I32 flags;
1730     I32 minlen = 0;
1731     I32 sawplus = 0;
1732     I32 sawopen = 0;
1733     scan_data_t data;
1734     RExC_state_t RExC_state;
1735     RExC_state_t *pRExC_state = &RExC_state;
1736 
1737     if (exp == NULL)
1738 	FAIL("NULL regexp argument");
1739 
1740     RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
1741 
1742     RExC_precomp = exp;
1743     DEBUG_r({
1744 	 if (!PL_colorset) reginitcolors();
1745 	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
1746 		       PL_colors[4],PL_colors[5],PL_colors[0],
1747 		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
1748     });
1749     RExC_flags = pm->op_pmflags;
1750     RExC_sawback = 0;
1751 
1752     RExC_seen = 0;
1753     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
1754     RExC_seen_evals = 0;
1755     RExC_extralen = 0;
1756 
1757     /* First pass: determine size, legality. */
1758     RExC_parse = exp;
1759     RExC_start = exp;
1760     RExC_end = xend;
1761     RExC_naughty = 0;
1762     RExC_npar = 1;
1763     RExC_size = 0L;
1764     RExC_emit = &PL_regdummy;
1765     RExC_whilem_seen = 0;
1766 #if 0 /* REGC() is (currently) a NOP at the first pass.
1767        * Clever compilers notice this and complain. --jhi */
1768     REGC((U8)REG_MAGIC, (char*)RExC_emit);
1769 #endif
1770     if (reg(pRExC_state, 0, &flags) == NULL) {
1771 	RExC_precomp = Nullch;
1772 	return(NULL);
1773     }
1774     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
1775 
1776     /* Small enough for pointer-storage convention?
1777        If extralen==0, this means that we will not need long jumps. */
1778     if (RExC_size >= 0x10000L && RExC_extralen)
1779         RExC_size += RExC_extralen;
1780     else
1781 	RExC_extralen = 0;
1782     if (RExC_whilem_seen > 15)
1783 	RExC_whilem_seen = 15;
1784 
1785     /* Allocate space and initialize. */
1786     Newc(1001, r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode),
1787 	 char, regexp);
1788     if (r == NULL)
1789 	FAIL("Regexp out of space");
1790 
1791 #ifdef DEBUGGING
1792     /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
1793     Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char);
1794 #endif
1795     r->refcnt = 1;
1796     r->prelen = xend - exp;
1797     r->precomp = savepvn(RExC_precomp, r->prelen);
1798     r->subbeg = NULL;
1799     r->reganch = pm->op_pmflags & PMf_COMPILETIME;
1800     r->nparens = RExC_npar - 1;	/* set early to validate backrefs */
1801 
1802     r->substrs = 0;			/* Useful during FAIL. */
1803     r->startp = 0;			/* Useful during FAIL. */
1804     r->endp = 0;			/* Useful during FAIL. */
1805 
1806     Newz(1304, r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
1807     if (r->offsets) {
1808       r->offsets[0] = RExC_size;
1809     }
1810     DEBUG_r(PerlIO_printf(Perl_debug_log,
1811                           "%s %"UVuf" bytes for offset annotations.\n",
1812                           r->offsets ? "Got" : "Couldn't get",
1813                           (UV)((2*RExC_size+1) * sizeof(U32))));
1814 
1815     RExC_rx = r;
1816 
1817     /* Second pass: emit code. */
1818     RExC_flags = pm->op_pmflags;	/* don't let top level (?i) bleed */
1819     RExC_parse = exp;
1820     RExC_end = xend;
1821     RExC_naughty = 0;
1822     RExC_npar = 1;
1823     RExC_emit_start = r->program;
1824     RExC_emit = r->program;
1825     /* Store the count of eval-groups for security checks: */
1826     RExC_emit->next_off = (U16)((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
1827     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
1828     r->data = 0;
1829     if (reg(pRExC_state, 0, &flags) == NULL)
1830 	return(NULL);
1831 
1832     /* Dig out information for optimizations. */
1833     r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
1834     pm->op_pmflags = RExC_flags;
1835     if (UTF)
1836         r->reganch |= ROPT_UTF8;	/* Unicode in it? */
1837     r->regstclass = NULL;
1838     if (RExC_naughty >= 10)	/* Probably an expensive pattern. */
1839 	r->reganch |= ROPT_NAUGHTY;
1840     scan = r->program + 1;		/* First BRANCH. */
1841 
1842     /* XXXX To minimize changes to RE engine we always allocate
1843        3-units-long substrs field. */
1844     Newz(1004, r->substrs, 1, struct reg_substr_data);
1845 
1846     StructCopy(&zero_scan_data, &data, scan_data_t);
1847     /* XXXX Should not we check for something else?  Usually it is OPEN1... */
1848     if (OP(scan) != BRANCH) {	/* Only one top-level choice. */
1849 	I32 fake;
1850 	STRLEN longest_float_length, longest_fixed_length;
1851 	struct regnode_charclass_class ch_class;
1852 	int stclass_flag;
1853 	I32 last_close = 0;
1854 
1855 	first = scan;
1856 	/* Skip introductions and multiplicators >= 1. */
1857 	while ((OP(first) == OPEN && (sawopen = 1)) ||
1858 	       /* An OR of *one* alternative - should not happen now. */
1859 	    (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
1860 	    (OP(first) == PLUS) ||
1861 	    (OP(first) == MINMOD) ||
1862 	       /* An {n,m} with n>0 */
1863 	    (PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
1864 		if (OP(first) == PLUS)
1865 		    sawplus = 1;
1866 		else
1867 		    first += regarglen[(U8)OP(first)];
1868 		first = NEXTOPER(first);
1869 	}
1870 
1871 	/* Starting-point info. */
1872       again:
1873 	if (PL_regkind[(U8)OP(first)] == EXACT) {
1874 	    if (OP(first) == EXACT)
1875 	        ;	/* Empty, get anchored substr later. */
1876 	    else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
1877 		r->regstclass = first;
1878 	}
1879 	else if (strchr((char*)PL_simple,OP(first)))
1880 	    r->regstclass = first;
1881 	else if (PL_regkind[(U8)OP(first)] == BOUND ||
1882 		 PL_regkind[(U8)OP(first)] == NBOUND)
1883 	    r->regstclass = first;
1884 	else if (PL_regkind[(U8)OP(first)] == BOL) {
1885 	    r->reganch |= (OP(first) == MBOL
1886 			   ? ROPT_ANCH_MBOL
1887 			   : (OP(first) == SBOL
1888 			      ? ROPT_ANCH_SBOL
1889 			      : ROPT_ANCH_BOL));
1890 	    first = NEXTOPER(first);
1891 	    goto again;
1892 	}
1893 	else if (OP(first) == GPOS) {
1894 	    r->reganch |= ROPT_ANCH_GPOS;
1895 	    first = NEXTOPER(first);
1896 	    goto again;
1897 	}
1898 	else if (!sawopen && (OP(first) == STAR &&
1899 	    PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
1900 	    !(r->reganch & ROPT_ANCH) )
1901 	{
1902 	    /* turn .* into ^.* with an implied $*=1 */
1903 	    int type = OP(NEXTOPER(first));
1904 
1905 	    if (type == REG_ANY)
1906 		type = ROPT_ANCH_MBOL;
1907 	    else
1908 		type = ROPT_ANCH_SBOL;
1909 
1910 	    r->reganch |= type | ROPT_IMPLICIT;
1911 	    first = NEXTOPER(first);
1912 	    goto again;
1913 	}
1914 	if (sawplus && (!sawopen || !RExC_sawback)
1915 	    && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
1916 	    /* x+ must match at the 1st pos of run of x's */
1917 	    r->reganch |= ROPT_SKIP;
1918 
1919 	/* Scan is after the zeroth branch, first is atomic matcher. */
1920 	DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
1921 			      (IV)(first - scan + 1)));
1922 	/*
1923 	* If there's something expensive in the r.e., find the
1924 	* longest literal string that must appear and make it the
1925 	* regmust.  Resolve ties in favor of later strings, since
1926 	* the regstart check works with the beginning of the r.e.
1927 	* and avoiding duplication strengthens checking.  Not a
1928 	* strong reason, but sufficient in the absence of others.
1929 	* [Now we resolve ties in favor of the earlier string if
1930 	* it happens that c_offset_min has been invalidated, since the
1931 	* earlier string may buy us something the later one won't.]
1932 	*/
1933 	minlen = 0;
1934 
1935 	data.longest_fixed = newSVpvn("",0);
1936 	data.longest_float = newSVpvn("",0);
1937 	data.last_found = newSVpvn("",0);
1938 	data.longest = &(data.longest_fixed);
1939 	first = scan;
1940 	if (!r->regstclass) {
1941 	    cl_init(pRExC_state, &ch_class);
1942 	    data.start_class = &ch_class;
1943 	    stclass_flag = SCF_DO_STCLASS_AND;
1944 	} else				/* XXXX Check for BOUND? */
1945 	    stclass_flag = 0;
1946 	data.last_closep = &last_close;
1947 
1948 	minlen = study_chunk(pRExC_state, &first, &fake, scan + RExC_size, /* Up to end */
1949 			     &data, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag);
1950 	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
1951 	     && data.last_start_min == 0 && data.last_end > 0
1952 	     && !RExC_seen_zerolen
1953 	     && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
1954 	    r->reganch |= ROPT_CHECK_ALL;
1955 	scan_commit(pRExC_state, &data);
1956 	SvREFCNT_dec(data.last_found);
1957 
1958 	longest_float_length = CHR_SVLEN(data.longest_float);
1959 	if (longest_float_length
1960 	    || (data.flags & SF_FL_BEFORE_EOL
1961 		&& (!(data.flags & SF_FL_BEFORE_MEOL)
1962 		    || (RExC_flags & PMf_MULTILINE)))) {
1963 	    int t;
1964 
1965 	    if (SvCUR(data.longest_fixed) 			/* ok to leave SvCUR */
1966 		&& data.offset_fixed == data.offset_float_min
1967 		&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
1968 		    goto remove_float;		/* As in (a)+. */
1969 
1970 	    if (SvUTF8(data.longest_float)) {
1971 		r->float_utf8 = data.longest_float;
1972 		r->float_substr = Nullsv;
1973 	    } else {
1974 		r->float_substr = data.longest_float;
1975 		r->float_utf8 = Nullsv;
1976 	    }
1977 	    r->float_min_offset = data.offset_float_min;
1978 	    r->float_max_offset = data.offset_float_max;
1979 	    t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
1980 		       && (!(data.flags & SF_FL_BEFORE_MEOL)
1981 			   || (RExC_flags & PMf_MULTILINE)));
1982 	    fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
1983 	}
1984 	else {
1985 	  remove_float:
1986 	    r->float_substr = r->float_utf8 = Nullsv;
1987 	    SvREFCNT_dec(data.longest_float);
1988 	    longest_float_length = 0;
1989 	}
1990 
1991 	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
1992 	if (longest_fixed_length
1993 	    || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
1994 		&& (!(data.flags & SF_FIX_BEFORE_MEOL)
1995 		    || (RExC_flags & PMf_MULTILINE)))) {
1996 	    int t;
1997 
1998 	    if (SvUTF8(data.longest_fixed)) {
1999 		r->anchored_utf8 = data.longest_fixed;
2000 		r->anchored_substr = Nullsv;
2001 	    } else {
2002 		r->anchored_substr = data.longest_fixed;
2003 		r->anchored_utf8 = Nullsv;
2004 	    }
2005 	    r->anchored_offset = data.offset_fixed;
2006 	    t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
2007 		 && (!(data.flags & SF_FIX_BEFORE_MEOL)
2008 		     || (RExC_flags & PMf_MULTILINE)));
2009 	    fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
2010 	}
2011 	else {
2012 	    r->anchored_substr = r->anchored_utf8 = Nullsv;
2013 	    SvREFCNT_dec(data.longest_fixed);
2014 	    longest_fixed_length = 0;
2015 	}
2016 	if (r->regstclass
2017 	    && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY))
2018 	    r->regstclass = NULL;
2019 	if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
2020 	    && stclass_flag
2021 	    && !(data.start_class->flags & ANYOF_EOS)
2022 	    && !cl_is_anything(data.start_class))
2023 	{
2024 	    I32 n = add_data(pRExC_state, 1, "f");
2025 
2026 	    New(1006, RExC_rx->data->data[n], 1,
2027 		struct regnode_charclass_class);
2028 	    StructCopy(data.start_class,
2029 		       (struct regnode_charclass_class*)RExC_rx->data->data[n],
2030 		       struct regnode_charclass_class);
2031 	    r->regstclass = (regnode*)RExC_rx->data->data[n];
2032 	    r->reganch &= ~ROPT_SKIP;	/* Used in find_byclass(). */
2033 	    PL_regdata = r->data; /* for regprop() */
2034 	    DEBUG_r({ SV *sv = sv_newmortal();
2035 	              regprop(sv, (regnode*)data.start_class);
2036 		      PerlIO_printf(Perl_debug_log,
2037 				    "synthetic stclass `%s'.\n",
2038 				    SvPVX(sv));});
2039 	}
2040 
2041 	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
2042 	if (longest_fixed_length > longest_float_length) {
2043 	    r->check_substr = r->anchored_substr;
2044 	    r->check_utf8 = r->anchored_utf8;
2045 	    r->check_offset_min = r->check_offset_max = r->anchored_offset;
2046 	    if (r->reganch & ROPT_ANCH_SINGLE)
2047 		r->reganch |= ROPT_NOSCAN;
2048 	}
2049 	else {
2050 	    r->check_substr = r->float_substr;
2051 	    r->check_utf8 = r->float_utf8;
2052 	    r->check_offset_min = data.offset_float_min;
2053 	    r->check_offset_max = data.offset_float_max;
2054 	}
2055 	/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
2056 	   This should be changed ASAP!  */
2057 	if ((r->check_substr || r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) {
2058 	    r->reganch |= RE_USE_INTUIT;
2059 	    if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
2060 		r->reganch |= RE_INTUIT_TAIL;
2061 	}
2062     }
2063     else {
2064 	/* Several toplevels. Best we can is to set minlen. */
2065 	I32 fake;
2066 	struct regnode_charclass_class ch_class;
2067 	I32 last_close = 0;
2068 
2069 	DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
2070 	scan = r->program + 1;
2071 	cl_init(pRExC_state, &ch_class);
2072 	data.start_class = &ch_class;
2073 	data.last_closep = &last_close;
2074 	minlen = study_chunk(pRExC_state, &scan, &fake, scan + RExC_size, &data, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS);
2075 	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
2076 		= r->float_substr = r->float_utf8 = Nullsv;
2077 	if (!(data.start_class->flags & ANYOF_EOS)
2078 	    && !cl_is_anything(data.start_class))
2079 	{
2080 	    I32 n = add_data(pRExC_state, 1, "f");
2081 
2082 	    New(1006, RExC_rx->data->data[n], 1,
2083 		struct regnode_charclass_class);
2084 	    StructCopy(data.start_class,
2085 		       (struct regnode_charclass_class*)RExC_rx->data->data[n],
2086 		       struct regnode_charclass_class);
2087 	    r->regstclass = (regnode*)RExC_rx->data->data[n];
2088 	    r->reganch &= ~ROPT_SKIP;	/* Used in find_byclass(). */
2089 	    DEBUG_r({ SV* sv = sv_newmortal();
2090 	              regprop(sv, (regnode*)data.start_class);
2091 		      PerlIO_printf(Perl_debug_log,
2092 				    "synthetic stclass `%s'.\n",
2093 				    SvPVX(sv));});
2094 	}
2095     }
2096 
2097     r->minlen = minlen;
2098     if (RExC_seen & REG_SEEN_GPOS)
2099 	r->reganch |= ROPT_GPOS_SEEN;
2100     if (RExC_seen & REG_SEEN_LOOKBEHIND)
2101 	r->reganch |= ROPT_LOOKBEHIND_SEEN;
2102     if (RExC_seen & REG_SEEN_EVAL)
2103 	r->reganch |= ROPT_EVAL_SEEN;
2104     if (RExC_seen & REG_SEEN_CANY)
2105 	r->reganch |= ROPT_CANY_SEEN;
2106     Newz(1002, r->startp, RExC_npar, I32);
2107     Newz(1002, r->endp, RExC_npar, I32);
2108     PL_regdata = r->data; /* for regprop() */
2109     DEBUG_r(regdump(r));
2110     return(r);
2111 }
2112 
2113 /*
2114  - reg - regular expression, i.e. main body or parenthesized thing
2115  *
2116  * Caller must absorb opening parenthesis.
2117  *
2118  * Combining parenthesis handling with the base level of regular expression
2119  * is a trifle forced, but the need to tie the tails of the branches to what
2120  * follows makes it hard to avoid.
2121  */
2122 STATIC regnode *
2123 S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
2124     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
2125 {
2126     register regnode *ret;		/* Will be the head of the group. */
2127     register regnode *br;
2128     register regnode *lastbr;
2129     register regnode *ender = 0;
2130     register I32 parno = 0;
2131     I32 flags, oregflags = RExC_flags, have_branch = 0, open = 0;
2132 
2133     /* for (?g), (?gc), and (?o) warnings; warning
2134        about (?c) will warn about (?g) -- japhy    */
2135 
2136     I32 wastedflags = 0x00,
2137         wasted_o    = 0x01,
2138         wasted_g    = 0x02,
2139         wasted_gc   = 0x02 | 0x04,
2140         wasted_c    = 0x04;
2141 
2142     char * parse_start = RExC_parse; /* MJD */
2143     char *oregcomp_parse = RExC_parse;
2144     char c;
2145 
2146     *flagp = 0;				/* Tentatively. */
2147 
2148 
2149     /* Make an OPEN node, if parenthesized. */
2150     if (paren) {
2151 	if (*RExC_parse == '?') { /* (?...) */
2152 	    U32 posflags = 0, negflags = 0;
2153 	    U32 *flagsp = &posflags;
2154 	    int logical = 0;
2155 	    char *seqstart = RExC_parse;
2156 
2157 	    RExC_parse++;
2158 	    paren = *RExC_parse++;
2159 	    ret = NULL;			/* For look-ahead/behind. */
2160 	    switch (paren) {
2161 	    case '<':           /* (?<...) */
2162 		RExC_seen |= REG_SEEN_LOOKBEHIND;
2163 		if (*RExC_parse == '!')
2164 		    paren = ',';
2165 		if (*RExC_parse != '=' && *RExC_parse != '!')
2166 		    goto unknown;
2167 		RExC_parse++;
2168 	    case '=':           /* (?=...) */
2169 	    case '!':           /* (?!...) */
2170 		RExC_seen_zerolen++;
2171 	    case ':':           /* (?:...) */
2172 	    case '>':           /* (?>...) */
2173 		break;
2174 	    case '$':           /* (?$...) */
2175 	    case '@':           /* (?@...) */
2176 		vFAIL2("Sequence (?%c...) not implemented", (int)paren);
2177 		break;
2178 	    case '#':           /* (?#...) */
2179 		while (*RExC_parse && *RExC_parse != ')')
2180 		    RExC_parse++;
2181 		if (*RExC_parse != ')')
2182 		    FAIL("Sequence (?#... not terminated");
2183 		nextchar(pRExC_state);
2184 		*flagp = TRYAGAIN;
2185 		return NULL;
2186 	    case 'p':           /* (?p...) */
2187 		if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
2188 		    vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
2189 		/* FALL THROUGH*/
2190 	    case '?':           /* (??...) */
2191 		logical = 1;
2192 		if (*RExC_parse != '{')
2193 		    goto unknown;
2194 		paren = *RExC_parse++;
2195 		/* FALL THROUGH */
2196 	    case '{':           /* (?{...}) */
2197 	    {
2198 		I32 count = 1, n = 0;
2199 		char c;
2200 		char *s = RExC_parse;
2201 		SV *sv;
2202 		OP_4tree *sop, *rop;
2203 
2204 		RExC_seen_zerolen++;
2205 		RExC_seen |= REG_SEEN_EVAL;
2206 		while (count && (c = *RExC_parse)) {
2207 		    if (c == '\\' && RExC_parse[1])
2208 			RExC_parse++;
2209 		    else if (c == '{')
2210 			count++;
2211 		    else if (c == '}')
2212 			count--;
2213 		    RExC_parse++;
2214 		}
2215 		if (*RExC_parse != ')')
2216 		{
2217 		    RExC_parse = s;
2218 		    vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
2219 		}
2220 		if (!SIZE_ONLY) {
2221 		    AV *av;
2222 
2223 		    if (RExC_parse - 1 - s)
2224 			sv = newSVpvn(s, RExC_parse - 1 - s);
2225 		    else
2226 			sv = newSVpvn("", 0);
2227 
2228 		    ENTER;
2229 		    Perl_save_re_context(aTHX);
2230 		    rop = sv_compile_2op(sv, &sop, "re", &av);
2231 		    sop->op_private |= OPpREFCOUNTED;
2232 		    /* re_dup will OpREFCNT_inc */
2233 		    OpREFCNT_set(sop, 1);
2234 		    LEAVE;
2235 
2236 		    n = add_data(pRExC_state, 3, "nop");
2237 		    RExC_rx->data->data[n] = (void*)rop;
2238 		    RExC_rx->data->data[n+1] = (void*)sop;
2239 		    RExC_rx->data->data[n+2] = (void*)av;
2240 		    SvREFCNT_dec(sv);
2241 		}
2242 		else {						/* First pass */
2243 		    if (PL_reginterp_cnt < ++RExC_seen_evals
2244 			&& PL_curcop != &PL_compiling)
2245 			/* No compiled RE interpolated, has runtime
2246 			   components ===> unsafe.  */
2247 			FAIL("Eval-group not allowed at runtime, use re 'eval'");
2248 		    if (PL_tainting && PL_tainted)
2249 			FAIL("Eval-group in insecure regular expression");
2250 		}
2251 
2252 		nextchar(pRExC_state);
2253 		if (logical) {
2254 		    ret = reg_node(pRExC_state, LOGICAL);
2255 		    if (!SIZE_ONLY)
2256 			ret->flags = 2;
2257 		    regtail(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
2258                     /* deal with the length of this later - MJD */
2259 		    return ret;
2260 		}
2261 		return reganode(pRExC_state, EVAL, n);
2262 	    }
2263 	    case '(':           /* (?(?{...})...) and (?(?=...)...) */
2264 	    {
2265 		if (RExC_parse[0] == '?') {        /* (?(?...)) */
2266 		    if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
2267 			|| RExC_parse[1] == '<'
2268 			|| RExC_parse[1] == '{') { /* Lookahead or eval. */
2269 			I32 flag;
2270 
2271 			ret = reg_node(pRExC_state, LOGICAL);
2272 			if (!SIZE_ONLY)
2273 			    ret->flags = 1;
2274 			regtail(pRExC_state, ret, reg(pRExC_state, 1, &flag));
2275 			goto insert_if;
2276 		    }
2277 		}
2278 		else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
2279                     /* (?(1)...) */
2280 		    parno = atoi(RExC_parse++);
2281 
2282 		    while (isDIGIT(*RExC_parse))
2283 			RExC_parse++;
2284                     ret = reganode(pRExC_state, GROUPP, parno);
2285 
2286 		    if ((c = *nextchar(pRExC_state)) != ')')
2287 			vFAIL("Switch condition not recognized");
2288 		  insert_if:
2289 		    regtail(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
2290 		    br = regbranch(pRExC_state, &flags, 1);
2291 		    if (br == NULL)
2292 			br = reganode(pRExC_state, LONGJMP, 0);
2293 		    else
2294 			regtail(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
2295 		    c = *nextchar(pRExC_state);
2296 		    if (flags&HASWIDTH)
2297 			*flagp |= HASWIDTH;
2298 		    if (c == '|') {
2299 			lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
2300 			regbranch(pRExC_state, &flags, 1);
2301 			regtail(pRExC_state, ret, lastbr);
2302 		 	if (flags&HASWIDTH)
2303 			    *flagp |= HASWIDTH;
2304 			c = *nextchar(pRExC_state);
2305 		    }
2306 		    else
2307 			lastbr = NULL;
2308 		    if (c != ')')
2309 			vFAIL("Switch (?(condition)... contains too many branches");
2310 		    ender = reg_node(pRExC_state, TAIL);
2311 		    regtail(pRExC_state, br, ender);
2312 		    if (lastbr) {
2313 			regtail(pRExC_state, lastbr, ender);
2314 			regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
2315 		    }
2316 		    else
2317 			regtail(pRExC_state, ret, ender);
2318 		    return ret;
2319 		}
2320 		else {
2321 		    vFAIL2("Unknown switch condition (?(%.2s", RExC_parse);
2322 		}
2323 	    }
2324             case 0:
2325 		RExC_parse--; /* for vFAIL to print correctly */
2326                 vFAIL("Sequence (? incomplete");
2327                 break;
2328 	    default:
2329 		--RExC_parse;
2330 	      parse_flags:      /* (?i) */
2331 		while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
2332 		    /* (?g), (?gc) and (?o) are useless here
2333 		       and must be globally applied -- japhy */
2334 
2335 		    if (*RExC_parse == 'o' || *RExC_parse == 'g') {
2336 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
2337 			    I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g;
2338 			    if (! (wastedflags & wflagbit) ) {
2339 				wastedflags |= wflagbit;
2340 				vWARN5(
2341 				    RExC_parse + 1,
2342 				    "Useless (%s%c) - %suse /%c modifier",
2343 				    flagsp == &negflags ? "?-" : "?",
2344 				    *RExC_parse,
2345 				    flagsp == &negflags ? "don't " : "",
2346 				    *RExC_parse
2347 				);
2348 			    }
2349 			}
2350 		    }
2351 		    else if (*RExC_parse == 'c') {
2352 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
2353 			    if (! (wastedflags & wasted_c) ) {
2354 				wastedflags |= wasted_gc;
2355 				vWARN3(
2356 				    RExC_parse + 1,
2357 				    "Useless (%sc) - %suse /gc modifier",
2358 				    flagsp == &negflags ? "?-" : "?",
2359 				    flagsp == &negflags ? "don't " : ""
2360 				);
2361 			    }
2362 			}
2363 		    }
2364 		    else { pmflag(flagsp, *RExC_parse); }
2365 
2366 		    ++RExC_parse;
2367 		}
2368 		if (*RExC_parse == '-') {
2369 		    flagsp = &negflags;
2370 		    wastedflags = 0;  /* reset so (?g-c) warns twice */
2371 		    ++RExC_parse;
2372 		    goto parse_flags;
2373 		}
2374 		RExC_flags |= posflags;
2375 		RExC_flags &= ~negflags;
2376 		if (*RExC_parse == ':') {
2377 		    RExC_parse++;
2378 		    paren = ':';
2379 		    break;
2380 		}
2381 	      unknown:
2382 		if (*RExC_parse != ')') {
2383 		    RExC_parse++;
2384 		    vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
2385 		}
2386 		nextchar(pRExC_state);
2387 		*flagp = TRYAGAIN;
2388 		return NULL;
2389 	    }
2390 	}
2391 	else {                  /* (...) */
2392 	    parno = RExC_npar;
2393 	    RExC_npar++;
2394 	    ret = reganode(pRExC_state, OPEN, parno);
2395             Set_Node_Length(ret, 1); /* MJD */
2396             Set_Node_Offset(ret, RExC_parse); /* MJD */
2397 	    open = 1;
2398 	}
2399     }
2400     else                        /* ! paren */
2401 	ret = NULL;
2402 
2403     /* Pick up the branches, linking them together. */
2404     parse_start = RExC_parse;   /* MJD */
2405     br = regbranch(pRExC_state, &flags, 1);
2406     /*     branch_len = (paren != 0); */
2407 
2408     if (br == NULL)
2409 	return(NULL);
2410     if (*RExC_parse == '|') {
2411 	if (!SIZE_ONLY && RExC_extralen) {
2412 	    reginsert(pRExC_state, BRANCHJ, br);
2413 	}
2414 	else {                  /* MJD */
2415 	    reginsert(pRExC_state, BRANCH, br);
2416             Set_Node_Length(br, paren != 0);
2417             Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
2418         }
2419 	have_branch = 1;
2420 	if (SIZE_ONLY)
2421 	    RExC_extralen += 1;		/* For BRANCHJ-BRANCH. */
2422     }
2423     else if (paren == ':') {
2424 	*flagp |= flags&SIMPLE;
2425     }
2426     if (open) {				/* Starts with OPEN. */
2427 	regtail(pRExC_state, ret, br);		/* OPEN -> first. */
2428     }
2429     else if (paren != '?')		/* Not Conditional */
2430 	ret = br;
2431     *flagp |= flags & (SPSTART | HASWIDTH);
2432     lastbr = br;
2433     while (*RExC_parse == '|') {
2434 	if (!SIZE_ONLY && RExC_extralen) {
2435 	    ender = reganode(pRExC_state, LONGJMP,0);
2436 	    regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
2437 	}
2438 	if (SIZE_ONLY)
2439 	    RExC_extralen += 2;		/* Account for LONGJMP. */
2440 	nextchar(pRExC_state);
2441 	br = regbranch(pRExC_state, &flags, 0);
2442 
2443 	if (br == NULL)
2444 	    return(NULL);
2445 	regtail(pRExC_state, lastbr, br);		/* BRANCH -> BRANCH. */
2446 	lastbr = br;
2447 	if (flags&HASWIDTH)
2448 	    *flagp |= HASWIDTH;
2449 	*flagp |= flags&SPSTART;
2450     }
2451 
2452     if (have_branch || paren != ':') {
2453 	/* Make a closing node, and hook it on the end. */
2454 	switch (paren) {
2455 	case ':':
2456 	    ender = reg_node(pRExC_state, TAIL);
2457 	    break;
2458 	case 1:
2459 	    ender = reganode(pRExC_state, CLOSE, parno);
2460             Set_Node_Offset(ender,RExC_parse+1); /* MJD */
2461             Set_Node_Length(ender,1); /* MJD */
2462 	    break;
2463 	case '<':
2464 	case ',':
2465 	case '=':
2466 	case '!':
2467 	    *flagp &= ~HASWIDTH;
2468 	    /* FALL THROUGH */
2469 	case '>':
2470 	    ender = reg_node(pRExC_state, SUCCEED);
2471 	    break;
2472 	case 0:
2473 	    ender = reg_node(pRExC_state, END);
2474 	    break;
2475 	}
2476 	regtail(pRExC_state, lastbr, ender);
2477 
2478 	if (have_branch) {
2479 	    /* Hook the tails of the branches to the closing node. */
2480 	    for (br = ret; br != NULL; br = regnext(br)) {
2481 		regoptail(pRExC_state, br, ender);
2482 	    }
2483 	}
2484     }
2485 
2486     {
2487 	char *p;
2488 	static char parens[] = "=!<,>";
2489 
2490 	if (paren && (p = strchr(parens, paren))) {
2491 	    U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
2492 	    int flag = (p - parens) > 1;
2493 
2494 	    if (paren == '>')
2495 		node = SUSPEND, flag = 0;
2496 	    reginsert(pRExC_state, node,ret);
2497 	    ret->flags = flag;
2498 	    regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL));
2499 	}
2500     }
2501 
2502     /* Check for proper termination. */
2503     if (paren) {
2504 	RExC_flags = oregflags;
2505 	if (RExC_parse >= RExC_end || *nextchar(pRExC_state) != ')') {
2506 	    RExC_parse = oregcomp_parse;
2507 	    vFAIL("Unmatched (");
2508 	}
2509     }
2510     else if (!paren && RExC_parse < RExC_end) {
2511 	if (*RExC_parse == ')') {
2512 	    RExC_parse++;
2513 	    vFAIL("Unmatched )");
2514 	}
2515 	else
2516 	    FAIL("Junk on end of regexp");	/* "Can't happen". */
2517 	/* NOTREACHED */
2518     }
2519 
2520     return(ret);
2521 }
2522 
2523 /*
2524  - regbranch - one alternative of an | operator
2525  *
2526  * Implements the concatenation operator.
2527  */
2528 STATIC regnode *
2529 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
2530 {
2531     register regnode *ret;
2532     register regnode *chain = NULL;
2533     register regnode *latest;
2534     I32 flags = 0, c = 0;
2535 
2536     if (first)
2537 	ret = NULL;
2538     else {
2539 	if (!SIZE_ONLY && RExC_extralen)
2540 	    ret = reganode(pRExC_state, BRANCHJ,0);
2541 	else {
2542 	    ret = reg_node(pRExC_state, BRANCH);
2543             Set_Node_Length(ret, 1);
2544         }
2545     }
2546 
2547     if (!first && SIZE_ONLY)
2548 	RExC_extralen += 1;			/* BRANCHJ */
2549 
2550     *flagp = WORST;			/* Tentatively. */
2551 
2552     RExC_parse--;
2553     nextchar(pRExC_state);
2554     while (RExC_parse < RExC_end && *RExC_parse != '|' && *RExC_parse != ')') {
2555 	flags &= ~TRYAGAIN;
2556 	latest = regpiece(pRExC_state, &flags);
2557 	if (latest == NULL) {
2558 	    if (flags & TRYAGAIN)
2559 		continue;
2560 	    return(NULL);
2561 	}
2562 	else if (ret == NULL)
2563 	    ret = latest;
2564 	*flagp |= flags&HASWIDTH;
2565 	if (chain == NULL) 	/* First piece. */
2566 	    *flagp |= flags&SPSTART;
2567 	else {
2568 	    RExC_naughty++;
2569 	    regtail(pRExC_state, chain, latest);
2570 	}
2571 	chain = latest;
2572 	c++;
2573     }
2574     if (chain == NULL) {	/* Loop ran zero times. */
2575 	chain = reg_node(pRExC_state, NOTHING);
2576 	if (ret == NULL)
2577 	    ret = chain;
2578     }
2579     if (c == 1) {
2580 	*flagp |= flags&SIMPLE;
2581     }
2582 
2583     return(ret);
2584 }
2585 
2586 /*
2587  - regpiece - something followed by possible [*+?]
2588  *
2589  * Note that the branching code sequences used for ? and the general cases
2590  * of * and + are somewhat optimized:  they use the same NOTHING node as
2591  * both the endmarker for their branch list and the body of the last branch.
2592  * It might seem that this node could be dispensed with entirely, but the
2593  * endmarker role is not redundant.
2594  */
2595 STATIC regnode *
2596 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
2597 {
2598     register regnode *ret;
2599     register char op;
2600     register char *next;
2601     I32 flags;
2602     char *origparse = RExC_parse;
2603     char *maxpos;
2604     I32 min;
2605     I32 max = REG_INFTY;
2606     char *parse_start;
2607 
2608     ret = regatom(pRExC_state, &flags);
2609     if (ret == NULL) {
2610 	if (flags & TRYAGAIN)
2611 	    *flagp |= TRYAGAIN;
2612 	return(NULL);
2613     }
2614 
2615     op = *RExC_parse;
2616 
2617     if (op == '{' && regcurly(RExC_parse)) {
2618         parse_start = RExC_parse; /* MJD */
2619 	next = RExC_parse + 1;
2620 	maxpos = Nullch;
2621 	while (isDIGIT(*next) || *next == ',') {
2622 	    if (*next == ',') {
2623 		if (maxpos)
2624 		    break;
2625 		else
2626 		    maxpos = next;
2627 	    }
2628 	    next++;
2629 	}
2630 	if (*next == '}') {		/* got one */
2631 	    if (!maxpos)
2632 		maxpos = next;
2633 	    RExC_parse++;
2634 	    min = atoi(RExC_parse);
2635 	    if (*maxpos == ',')
2636 		maxpos++;
2637 	    else
2638 		maxpos = RExC_parse;
2639 	    max = atoi(maxpos);
2640 	    if (!max && *maxpos != '0')
2641 		max = REG_INFTY;		/* meaning "infinity" */
2642 	    else if (max >= REG_INFTY)
2643 		vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
2644 	    RExC_parse = next;
2645 	    nextchar(pRExC_state);
2646 
2647 	do_curly:
2648 	    if ((flags&SIMPLE)) {
2649 		RExC_naughty += 2 + RExC_naughty / 2;
2650 		reginsert(pRExC_state, CURLY, ret);
2651                 Set_Node_Offset(ret, parse_start+1); /* MJD */
2652                 Set_Node_Cur_Length(ret);
2653 	    }
2654 	    else {
2655 		regnode *w = reg_node(pRExC_state, WHILEM);
2656 
2657 		w->flags = 0;
2658 		regtail(pRExC_state, ret, w);
2659 		if (!SIZE_ONLY && RExC_extralen) {
2660 		    reginsert(pRExC_state, LONGJMP,ret);
2661 		    reginsert(pRExC_state, NOTHING,ret);
2662 		    NEXT_OFF(ret) = 3;	/* Go over LONGJMP. */
2663 		}
2664 		reginsert(pRExC_state, CURLYX,ret);
2665                                 /* MJD hk */
2666                 Set_Node_Offset(ret, parse_start+1);
2667                 Set_Node_Length(ret,
2668                                 op == '{' ? (RExC_parse - parse_start) : 1);
2669 
2670 		if (!SIZE_ONLY && RExC_extralen)
2671 		    NEXT_OFF(ret) = 3;	/* Go over NOTHING to LONGJMP. */
2672 		regtail(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
2673 		if (SIZE_ONLY)
2674 		    RExC_whilem_seen++, RExC_extralen += 3;
2675 		RExC_naughty += 4 + RExC_naughty;	/* compound interest */
2676 	    }
2677 	    ret->flags = 0;
2678 
2679 	    if (min > 0)
2680 		*flagp = WORST;
2681 	    if (max > 0)
2682 		*flagp |= HASWIDTH;
2683 	    if (max && max < min)
2684 		vFAIL("Can't do {n,m} with n > m");
2685 	    if (!SIZE_ONLY) {
2686 		ARG1_SET(ret, (U16)min);
2687 		ARG2_SET(ret, (U16)max);
2688 	    }
2689 
2690 	    goto nest_check;
2691 	}
2692     }
2693 
2694     if (!ISMULT1(op)) {
2695 	*flagp = flags;
2696 	return(ret);
2697     }
2698 
2699 #if 0				/* Now runtime fix should be reliable. */
2700 
2701     /* if this is reinstated, don't forget to put this back into perldiag:
2702 
2703 	    =item Regexp *+ operand could be empty at {#} in regex m/%s/
2704 
2705 	   (F) The part of the regexp subject to either the * or + quantifier
2706            could match an empty string. The {#} shows in the regular
2707            expression about where the problem was discovered.
2708 
2709     */
2710 
2711     if (!(flags&HASWIDTH) && op != '?')
2712       vFAIL("Regexp *+ operand could be empty");
2713 #endif
2714 
2715     parse_start = RExC_parse;
2716     nextchar(pRExC_state);
2717 
2718     *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
2719 
2720     if (op == '*' && (flags&SIMPLE)) {
2721 	reginsert(pRExC_state, STAR, ret);
2722 	ret->flags = 0;
2723 	RExC_naughty += 4;
2724     }
2725     else if (op == '*') {
2726 	min = 0;
2727 	goto do_curly;
2728     }
2729     else if (op == '+' && (flags&SIMPLE)) {
2730 	reginsert(pRExC_state, PLUS, ret);
2731 	ret->flags = 0;
2732 	RExC_naughty += 3;
2733     }
2734     else if (op == '+') {
2735 	min = 1;
2736 	goto do_curly;
2737     }
2738     else if (op == '?') {
2739 	min = 0; max = 1;
2740 	goto do_curly;
2741     }
2742   nest_check:
2743     if (ckWARN(WARN_REGEXP) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) {
2744 	vWARN3(RExC_parse,
2745 	       "%.*s matches null string many times",
2746 	       RExC_parse - origparse,
2747 	       origparse);
2748     }
2749 
2750     if (*RExC_parse == '?') {
2751 	nextchar(pRExC_state);
2752 	reginsert(pRExC_state, MINMOD, ret);
2753 	regtail(pRExC_state, ret, ret + NODE_STEP_REGNODE);
2754     }
2755     if (ISMULT2(RExC_parse)) {
2756 	RExC_parse++;
2757 	vFAIL("Nested quantifiers");
2758     }
2759 
2760     return(ret);
2761 }
2762 
2763 /*
2764  - regatom - the lowest level
2765  *
2766  * Optimization:  gobbles an entire sequence of ordinary characters so that
2767  * it can turn them into a single node, which is smaller to store and
2768  * faster to run.  Backslashed characters are exceptions, each becoming a
2769  * separate node; the code is simpler that way and it's not worth fixing.
2770  *
2771  * [Yes, it is worth fixing, some scripts can run twice the speed.] */
2772 STATIC regnode *
2773 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
2774 {
2775     register regnode *ret = 0;
2776     I32 flags;
2777     char *parse_start = 0;
2778 
2779     *flagp = WORST;		/* Tentatively. */
2780 
2781 tryagain:
2782     switch (*RExC_parse) {
2783     case '^':
2784 	RExC_seen_zerolen++;
2785 	nextchar(pRExC_state);
2786 	if (RExC_flags & PMf_MULTILINE)
2787 	    ret = reg_node(pRExC_state, MBOL);
2788 	else if (RExC_flags & PMf_SINGLELINE)
2789 	    ret = reg_node(pRExC_state, SBOL);
2790 	else
2791 	    ret = reg_node(pRExC_state, BOL);
2792         Set_Node_Length(ret, 1); /* MJD */
2793 	break;
2794     case '$':
2795 	nextchar(pRExC_state);
2796 	if (*RExC_parse)
2797 	    RExC_seen_zerolen++;
2798 	if (RExC_flags & PMf_MULTILINE)
2799 	    ret = reg_node(pRExC_state, MEOL);
2800 	else if (RExC_flags & PMf_SINGLELINE)
2801 	    ret = reg_node(pRExC_state, SEOL);
2802 	else
2803 	    ret = reg_node(pRExC_state, EOL);
2804         Set_Node_Length(ret, 1); /* MJD */
2805 	break;
2806     case '.':
2807 	nextchar(pRExC_state);
2808 	if (RExC_flags & PMf_SINGLELINE)
2809 	    ret = reg_node(pRExC_state, SANY);
2810 	else
2811 	    ret = reg_node(pRExC_state, REG_ANY);
2812 	*flagp |= HASWIDTH|SIMPLE;
2813 	RExC_naughty++;
2814         Set_Node_Length(ret, 1); /* MJD */
2815 	break;
2816     case '[':
2817     {
2818 	char *oregcomp_parse = ++RExC_parse;
2819 	ret = regclass(pRExC_state);
2820 	if (*RExC_parse != ']') {
2821 	    RExC_parse = oregcomp_parse;
2822 	    vFAIL("Unmatched [");
2823 	}
2824 	nextchar(pRExC_state);
2825 	*flagp |= HASWIDTH|SIMPLE;
2826         Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
2827 	break;
2828     }
2829     case '(':
2830 	nextchar(pRExC_state);
2831 	ret = reg(pRExC_state, 1, &flags);
2832 	if (ret == NULL) {
2833 		if (flags & TRYAGAIN) {
2834 		    if (RExC_parse == RExC_end) {
2835 			 /* Make parent create an empty node if needed. */
2836 			*flagp |= TRYAGAIN;
2837 			return(NULL);
2838 		    }
2839 		    goto tryagain;
2840 		}
2841 		return(NULL);
2842 	}
2843 	*flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
2844 	break;
2845     case '|':
2846     case ')':
2847 	if (flags & TRYAGAIN) {
2848 	    *flagp |= TRYAGAIN;
2849 	    return NULL;
2850 	}
2851 	vFAIL("Internal urp");
2852 				/* Supposed to be caught earlier. */
2853 	break;
2854     case '{':
2855 	if (!regcurly(RExC_parse)) {
2856 	    RExC_parse++;
2857 	    goto defchar;
2858 	}
2859 	/* FALL THROUGH */
2860     case '?':
2861     case '+':
2862     case '*':
2863 	RExC_parse++;
2864 	vFAIL("Quantifier follows nothing");
2865 	break;
2866     case '\\':
2867 	switch (*++RExC_parse) {
2868 	case 'A':
2869 	    RExC_seen_zerolen++;
2870 	    ret = reg_node(pRExC_state, SBOL);
2871 	    *flagp |= SIMPLE;
2872 	    nextchar(pRExC_state);
2873             Set_Node_Length(ret, 2); /* MJD */
2874 	    break;
2875 	case 'G':
2876 	    ret = reg_node(pRExC_state, GPOS);
2877 	    RExC_seen |= REG_SEEN_GPOS;
2878 	    *flagp |= SIMPLE;
2879 	    nextchar(pRExC_state);
2880             Set_Node_Length(ret, 2); /* MJD */
2881 	    break;
2882 	case 'Z':
2883 	    ret = reg_node(pRExC_state, SEOL);
2884 	    *flagp |= SIMPLE;
2885 	    RExC_seen_zerolen++;		/* Do not optimize RE away */
2886 	    nextchar(pRExC_state);
2887 	    break;
2888 	case 'z':
2889 	    ret = reg_node(pRExC_state, EOS);
2890 	    *flagp |= SIMPLE;
2891 	    RExC_seen_zerolen++;		/* Do not optimize RE away */
2892 	    nextchar(pRExC_state);
2893             Set_Node_Length(ret, 2); /* MJD */
2894 	    break;
2895 	case 'C':
2896 	    ret = reg_node(pRExC_state, CANY);
2897 	    RExC_seen |= REG_SEEN_CANY;
2898 	    *flagp |= HASWIDTH|SIMPLE;
2899 	    nextchar(pRExC_state);
2900             Set_Node_Length(ret, 2); /* MJD */
2901 	    break;
2902 	case 'X':
2903 	    ret = reg_node(pRExC_state, CLUMP);
2904 	    *flagp |= HASWIDTH;
2905 	    nextchar(pRExC_state);
2906             Set_Node_Length(ret, 2); /* MJD */
2907 	    break;
2908 	case 'w':
2909 	    ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML     : ALNUM));
2910 	    *flagp |= HASWIDTH|SIMPLE;
2911 	    nextchar(pRExC_state);
2912             Set_Node_Length(ret, 2); /* MJD */
2913 	    break;
2914 	case 'W':
2915 	    ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML    : NALNUM));
2916 	    *flagp |= HASWIDTH|SIMPLE;
2917 	    nextchar(pRExC_state);
2918             Set_Node_Length(ret, 2); /* MJD */
2919 	    break;
2920 	case 'b':
2921 	    RExC_seen_zerolen++;
2922 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
2923 	    ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL     : BOUND));
2924 	    *flagp |= SIMPLE;
2925 	    nextchar(pRExC_state);
2926             Set_Node_Length(ret, 2); /* MJD */
2927 	    break;
2928 	case 'B':
2929 	    RExC_seen_zerolen++;
2930 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
2931 	    ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL    : NBOUND));
2932 	    *flagp |= SIMPLE;
2933 	    nextchar(pRExC_state);
2934             Set_Node_Length(ret, 2); /* MJD */
2935 	    break;
2936 	case 's':
2937 	    ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL     : SPACE));
2938 	    *flagp |= HASWIDTH|SIMPLE;
2939 	    nextchar(pRExC_state);
2940             Set_Node_Length(ret, 2); /* MJD */
2941 	    break;
2942 	case 'S':
2943 	    ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL    : NSPACE));
2944 	    *flagp |= HASWIDTH|SIMPLE;
2945 	    nextchar(pRExC_state);
2946             Set_Node_Length(ret, 2); /* MJD */
2947 	    break;
2948 	case 'd':
2949 	    ret = reg_node(pRExC_state, DIGIT);
2950 	    *flagp |= HASWIDTH|SIMPLE;
2951 	    nextchar(pRExC_state);
2952             Set_Node_Length(ret, 2); /* MJD */
2953 	    break;
2954 	case 'D':
2955 	    ret = reg_node(pRExC_state, NDIGIT);
2956 	    *flagp |= HASWIDTH|SIMPLE;
2957 	    nextchar(pRExC_state);
2958             Set_Node_Length(ret, 2); /* MJD */
2959 	    break;
2960 	case 'p':
2961 	case 'P':
2962 	    {
2963 		char* oldregxend = RExC_end;
2964                 char* parse_start = RExC_parse;
2965 
2966 		if (RExC_parse[1] == '{') {
2967 		  /* a lovely hack--pretend we saw [\pX] instead */
2968 		    RExC_end = strchr(RExC_parse, '}');
2969 		    if (!RExC_end) {
2970 		        U8 c = (U8)*RExC_parse;
2971 			RExC_parse += 2;
2972 			RExC_end = oldregxend;
2973 			vFAIL2("Missing right brace on \\%c{}", c);
2974 		    }
2975 		    RExC_end++;
2976 		}
2977 		else {
2978 		    RExC_end = RExC_parse + 2;
2979 		    if (RExC_end > oldregxend)
2980 			RExC_end = oldregxend;
2981 		}
2982 		RExC_parse--;
2983 
2984 		ret = regclass(pRExC_state);
2985 
2986 		RExC_end = oldregxend;
2987 		RExC_parse--;
2988                 Set_Node_Cur_Length(ret); /* MJD */
2989 		nextchar(pRExC_state);
2990 		*flagp |= HASWIDTH|SIMPLE;
2991 	    }
2992 	    break;
2993 	case 'n':
2994 	case 'r':
2995 	case 't':
2996 	case 'f':
2997 	case 'e':
2998 	case 'a':
2999 	case 'x':
3000 	case 'c':
3001 	case '0':
3002 	    goto defchar;
3003 	case '1': case '2': case '3': case '4':
3004 	case '5': case '6': case '7': case '8': case '9':
3005 	    {
3006 		I32 num = atoi(RExC_parse);
3007 
3008 		if (num > 9 && num >= RExC_npar)
3009 		    goto defchar;
3010 		else {
3011                     char * parse_start = RExC_parse - 1; /* MJD */
3012 		    while (isDIGIT(*RExC_parse))
3013 			RExC_parse++;
3014 
3015 		    if (!SIZE_ONLY && num > (I32)RExC_rx->nparens)
3016 			vFAIL("Reference to nonexistent group");
3017 		    RExC_sawback = 1;
3018 		    ret = reganode(pRExC_state,
3019 				   (U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
3020 				   num);
3021 		    *flagp |= HASWIDTH;
3022 
3023                     /* override incorrect value set in reganode MJD */
3024                     Set_Node_Offset(ret, parse_start+1);
3025                     Set_Node_Cur_Length(ret); /* MJD */
3026 		    RExC_parse--;
3027 		    nextchar(pRExC_state);
3028 		}
3029 	    }
3030 	    break;
3031 	case '\0':
3032 	    if (RExC_parse >= RExC_end)
3033 		FAIL("Trailing \\");
3034 	    /* FALL THROUGH */
3035 	default:
3036 	    /* Do not generate `unrecognized' warnings here, we fall
3037 	       back into the quick-grab loop below */
3038 	    goto defchar;
3039 	}
3040 	break;
3041 
3042     case '#':
3043 	if (RExC_flags & PMf_EXTENDED) {
3044 	    while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++;
3045 	    if (RExC_parse < RExC_end)
3046 		goto tryagain;
3047 	}
3048 	/* FALL THROUGH */
3049 
3050     default: {
3051 	    register STRLEN len;
3052 	    register UV ender;
3053 	    register char *p;
3054 	    char *oldp, *s;
3055 	    STRLEN numlen;
3056 	    STRLEN foldlen;
3057 	    U8 tmpbuf[UTF8_MAXLEN_FOLD+1], *foldbuf;
3058 
3059             parse_start = RExC_parse - 1;
3060 
3061 	    RExC_parse++;
3062 
3063 	defchar:
3064 	    ender = 0;
3065 	    ret = reg_node(pRExC_state,
3066 			   (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
3067 	    s = STRING(ret);
3068 	    for (len = 0, p = RExC_parse - 1;
3069 	      len < 127 && p < RExC_end;
3070 	      len++)
3071 	    {
3072 		oldp = p;
3073 
3074 		if (RExC_flags & PMf_EXTENDED)
3075 		    p = regwhite(p, RExC_end);
3076 		switch (*p) {
3077 		case '^':
3078 		case '$':
3079 		case '.':
3080 		case '[':
3081 		case '(':
3082 		case ')':
3083 		case '|':
3084 		    goto loopdone;
3085 		case '\\':
3086 		    switch (*++p) {
3087 		    case 'A':
3088 		    case 'C':
3089 		    case 'X':
3090 		    case 'G':
3091 		    case 'Z':
3092 		    case 'z':
3093 		    case 'w':
3094 		    case 'W':
3095 		    case 'b':
3096 		    case 'B':
3097 		    case 's':
3098 		    case 'S':
3099 		    case 'd':
3100 		    case 'D':
3101 		    case 'p':
3102 		    case 'P':
3103 			--p;
3104 			goto loopdone;
3105 		    case 'n':
3106 			ender = '\n';
3107 			p++;
3108 			break;
3109 		    case 'r':
3110 			ender = '\r';
3111 			p++;
3112 			break;
3113 		    case 't':
3114 			ender = '\t';
3115 			p++;
3116 			break;
3117 		    case 'f':
3118 			ender = '\f';
3119 			p++;
3120 			break;
3121 		    case 'e':
3122 			  ender = ASCII_TO_NATIVE('\033');
3123 			p++;
3124 			break;
3125 		    case 'a':
3126 			  ender = ASCII_TO_NATIVE('\007');
3127 			p++;
3128 			break;
3129 		    case 'x':
3130 			if (*++p == '{') {
3131 			    char* e = strchr(p, '}');
3132 
3133 			    if (!e) {
3134 				RExC_parse = p + 1;
3135 				vFAIL("Missing right brace on \\x{}");
3136 			    }
3137 			    else {
3138                                 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3139                                     | PERL_SCAN_DISALLOW_PREFIX;
3140                                 numlen = e - p - 1;
3141 				ender = grok_hex(p + 1, &numlen, &flags, NULL);
3142 				if (ender > 0xff)
3143 				    RExC_utf8 = 1;
3144 				/* numlen is generous */
3145 				if (numlen + len >= 127) {
3146 				    p--;
3147 				    goto loopdone;
3148 				}
3149 				p = e + 1;
3150 			    }
3151 			}
3152 			else {
3153                             I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3154 			    numlen = 2;
3155 			    ender = grok_hex(p, &numlen, &flags, NULL);
3156 			    p += numlen;
3157 			}
3158 			break;
3159 		    case 'c':
3160 			p++;
3161 			ender = UCHARAT(p++);
3162 			ender = toCTRL(ender);
3163 			break;
3164 		    case '0': case '1': case '2': case '3':case '4':
3165 		    case '5': case '6': case '7': case '8':case '9':
3166 			if (*p == '0' ||
3167 			  (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
3168                             I32 flags = 0;
3169 			    numlen = 3;
3170 			    ender = grok_oct(p, &numlen, &flags, NULL);
3171 			    p += numlen;
3172 			}
3173 			else {
3174 			    --p;
3175 			    goto loopdone;
3176 			}
3177 			break;
3178 		    case '\0':
3179 			if (p >= RExC_end)
3180 			    FAIL("Trailing \\");
3181 			/* FALL THROUGH */
3182 		    default:
3183 			if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(*p))
3184 			    vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
3185 			goto normal_default;
3186 		    }
3187 		    break;
3188 		default:
3189 		  normal_default:
3190 		    if (UTF8_IS_START(*p) && UTF) {
3191 			ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
3192 					       &numlen, 0);
3193 			p += numlen;
3194 		    }
3195 		    else
3196 			ender = *p++;
3197 		    break;
3198 		}
3199 		if (RExC_flags & PMf_EXTENDED)
3200 		    p = regwhite(p, RExC_end);
3201 		if (UTF && FOLD) {
3202 		    /* Prime the casefolded buffer. */
3203 		    ender = toFOLD_uni(ender, tmpbuf, &foldlen);
3204 		}
3205 		if (ISMULT2(p)) { /* Back off on ?+*. */
3206 		    if (len)
3207 			p = oldp;
3208 		    else if (UTF) {
3209 		         STRLEN unilen;
3210 
3211 			 if (FOLD) {
3212 			      /* Emit all the Unicode characters. */
3213 			      for (foldbuf = tmpbuf;
3214 				   foldlen;
3215 				   foldlen -= numlen) {
3216 				   ender = utf8_to_uvchr(foldbuf, &numlen);
3217 				   if (numlen > 0) {
3218 					reguni(pRExC_state, ender, s, &unilen);
3219 					s       += unilen;
3220 					len     += unilen;
3221 					/* In EBCDIC the numlen
3222 					 * and unilen can differ. */
3223 					foldbuf += numlen;
3224 					if (numlen >= foldlen)
3225 					     break;
3226 				   }
3227 				   else
3228 					break; /* "Can't happen." */
3229 			      }
3230 			 }
3231 			 else {
3232 			      reguni(pRExC_state, ender, s, &unilen);
3233 			      if (unilen > 0) {
3234 				   s   += unilen;
3235 				   len += unilen;
3236 			      }
3237 			 }
3238 		    }
3239 		    else {
3240 			len++;
3241 			REGC((char)ender, s++);
3242 		    }
3243 		    break;
3244 		}
3245 		if (UTF) {
3246 		     STRLEN unilen;
3247 
3248 		     if (FOLD) {
3249 		          /* Emit all the Unicode characters. */
3250 			  for (foldbuf = tmpbuf;
3251 			       foldlen;
3252 			       foldlen -= numlen) {
3253 			       ender = utf8_to_uvchr(foldbuf, &numlen);
3254 			       if (numlen > 0) {
3255 				    reguni(pRExC_state, ender, s, &unilen);
3256 				    len     += unilen;
3257 				    s       += unilen;
3258 				    /* In EBCDIC the numlen
3259 				     * and unilen can differ. */
3260 				    foldbuf += numlen;
3261 				    if (numlen >= foldlen)
3262 					 break;
3263 			       }
3264 			       else
3265 				    break;
3266 			  }
3267 		     }
3268 		     else {
3269 			  reguni(pRExC_state, ender, s, &unilen);
3270 			  if (unilen > 0) {
3271 			       s   += unilen;
3272 			       len += unilen;
3273 			  }
3274 		     }
3275 		     len--;
3276 		}
3277 		else
3278 		    REGC((char)ender, s++);
3279 	    }
3280 	loopdone:
3281 	    RExC_parse = p - 1;
3282             Set_Node_Cur_Length(ret); /* MJD */
3283 	    nextchar(pRExC_state);
3284 	    {
3285 		/* len is STRLEN which is unsigned, need to copy to signed */
3286 		IV iv = len;
3287 		if (iv < 0)
3288 		    vFAIL("Internal disaster");
3289 	    }
3290 	    if (len > 0)
3291 		*flagp |= HASWIDTH;
3292 	    if (len == 1)
3293 		*flagp |= SIMPLE;
3294 	    if (!SIZE_ONLY)
3295 		STR_LEN(ret) = len;
3296 	    if (SIZE_ONLY)
3297 		RExC_size += STR_SZ(len);
3298 	    else
3299 		RExC_emit += STR_SZ(len);
3300 	}
3301 	break;
3302     }
3303 
3304     /* If the encoding pragma is in effect recode the text of
3305      * any EXACT-kind nodes. */
3306     if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
3307 	 STRLEN oldlen = STR_LEN(ret);
3308 	 SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
3309 
3310 	 if (RExC_utf8)
3311 	      SvUTF8_on(sv);
3312 	 if (sv_utf8_downgrade(sv, TRUE)) {
3313 	      char *s       = sv_recode_to_utf8(sv, PL_encoding);
3314 	      STRLEN newlen = SvCUR(sv);
3315 
3316 	      if (!SIZE_ONLY) {
3317 		   DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
3318 					 (int)oldlen, STRING(ret),
3319 					 (int)newlen, s));
3320 		   Copy(s, STRING(ret), newlen, char);
3321 		   STR_LEN(ret) += newlen - oldlen;
3322 		   RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
3323 	      } else
3324 		   RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
3325 	 }
3326     }
3327 
3328     return(ret);
3329 }
3330 
3331 STATIC char *
3332 S_regwhite(pTHX_ char *p, char *e)
3333 {
3334     while (p < e) {
3335 	if (isSPACE(*p))
3336 	    ++p;
3337 	else if (*p == '#') {
3338 	    do {
3339 		p++;
3340 	    } while (p < e && *p != '\n');
3341 	}
3342 	else
3343 	    break;
3344     }
3345     return p;
3346 }
3347 
3348 /* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
3349    Character classes ([:foo:]) can also be negated ([:^foo:]).
3350    Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
3351    Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
3352    but trigger failures because they are currently unimplemented. */
3353 
3354 #define POSIXCC_DONE(c)   ((c) == ':')
3355 #define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
3356 #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
3357 
3358 STATIC I32
3359 S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
3360 {
3361     char *posixcc = 0;
3362     I32 namedclass = OOB_NAMEDCLASS;
3363 
3364     if (value == '[' && RExC_parse + 1 < RExC_end &&
3365 	/* I smell either [: or [= or [. -- POSIX has been here, right? */
3366 	POSIXCC(UCHARAT(RExC_parse))) {
3367 	char  c = UCHARAT(RExC_parse);
3368 	char* s = RExC_parse++;
3369 
3370 	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
3371 	    RExC_parse++;
3372 	if (RExC_parse == RExC_end)
3373 	    /* Grandfather lone [:, [=, [. */
3374 	    RExC_parse = s;
3375 	else {
3376 	    char* t = RExC_parse++; /* skip over the c */
3377 
3378   	    if (UCHARAT(RExC_parse) == ']') {
3379   		RExC_parse++; /* skip over the ending ] */
3380   		posixcc = s + 1;
3381 		if (*s == ':') {
3382 		    I32 complement = *posixcc == '^' ? *posixcc++ : 0;
3383 		    I32 skip = 5; /* the most common skip */
3384 
3385 		    switch (*posixcc) {
3386 		    case 'a':
3387 			if (strnEQ(posixcc, "alnum", 5))
3388 			    namedclass =
3389 				complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
3390 			else if (strnEQ(posixcc, "alpha", 5))
3391 			    namedclass =
3392 				complement ? ANYOF_NALPHA : ANYOF_ALPHA;
3393 			else if (strnEQ(posixcc, "ascii", 5))
3394 			    namedclass =
3395 				complement ? ANYOF_NASCII : ANYOF_ASCII;
3396 			break;
3397 		    case 'b':
3398 			if (strnEQ(posixcc, "blank", 5))
3399 			    namedclass =
3400 				complement ? ANYOF_NBLANK : ANYOF_BLANK;
3401 			break;
3402 		    case 'c':
3403 			if (strnEQ(posixcc, "cntrl", 5))
3404 			    namedclass =
3405 				complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
3406 			break;
3407 		    case 'd':
3408 			if (strnEQ(posixcc, "digit", 5))
3409 			    namedclass =
3410 				complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
3411 			break;
3412 		    case 'g':
3413 			if (strnEQ(posixcc, "graph", 5))
3414 			    namedclass =
3415 				complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
3416 			break;
3417 		    case 'l':
3418 			if (strnEQ(posixcc, "lower", 5))
3419 			    namedclass =
3420 				complement ? ANYOF_NLOWER : ANYOF_LOWER;
3421 			break;
3422 		    case 'p':
3423 			if (strnEQ(posixcc, "print", 5))
3424 			    namedclass =
3425 				complement ? ANYOF_NPRINT : ANYOF_PRINT;
3426 			else if (strnEQ(posixcc, "punct", 5))
3427 			    namedclass =
3428 				complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
3429 			break;
3430 		    case 's':
3431 			if (strnEQ(posixcc, "space", 5))
3432 			    namedclass =
3433 				complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
3434 			break;
3435 		    case 'u':
3436 			if (strnEQ(posixcc, "upper", 5))
3437 			    namedclass =
3438 				complement ? ANYOF_NUPPER : ANYOF_UPPER;
3439  			break;
3440 		    case 'w': /* this is not POSIX, this is the Perl \w */
3441 			if (strnEQ(posixcc, "word", 4)) {
3442 			    namedclass =
3443 				complement ? ANYOF_NALNUM : ANYOF_ALNUM;
3444 			    skip = 4;
3445 			}
3446 			break;
3447 		    case 'x':
3448 			if (strnEQ(posixcc, "xdigit", 6)) {
3449 			    namedclass =
3450 				complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
3451 			    skip = 6;
3452 			}
3453 			break;
3454 		    }
3455 		    if (namedclass == OOB_NAMEDCLASS ||
3456 			posixcc[skip] != ':' ||
3457 			posixcc[skip+1] != ']')
3458 		    {
3459 			Simple_vFAIL3("POSIX class [:%.*s:] unknown",
3460 				      t - s - 1, s + 1);
3461 		    }
3462 		} else if (!SIZE_ONLY) {
3463 		    /* [[=foo=]] and [[.foo.]] are still future. */
3464 
3465 		    /* adjust RExC_parse so the warning shows after
3466 		       the class closes */
3467 		    while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
3468 			RExC_parse++;
3469 		    Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
3470 		}
3471 	    } else {
3472 		/* Maternal grandfather:
3473 		 * "[:" ending in ":" but not in ":]" */
3474 		RExC_parse = s;
3475 	    }
3476 	}
3477     }
3478 
3479     return namedclass;
3480 }
3481 
3482 STATIC void
3483 S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
3484 {
3485     if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
3486 	char *s = RExC_parse;
3487  	char  c = *s++;
3488 
3489 	while(*s && isALNUM(*s))
3490 	    s++;
3491 	if (*s && c == *s && s[1] == ']') {
3492 	    if (ckWARN(WARN_REGEXP))
3493 		vWARN3(s+2,
3494 			"POSIX syntax [%c %c] belongs inside character classes",
3495 			c, c);
3496 
3497 	    /* [[=foo=]] and [[.foo.]] are still future. */
3498 	    if (POSIXCC_NOTYET(c)) {
3499 		/* adjust RExC_parse so the error shows after
3500 		   the class closes */
3501 		while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
3502 		    ;
3503 		Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
3504 	    }
3505 	}
3506     }
3507 }
3508 
3509 STATIC regnode *
3510 S_regclass(pTHX_ RExC_state_t *pRExC_state)
3511 {
3512     register UV value;
3513     register UV nextvalue;
3514     register IV prevvalue = OOB_UNICODE;
3515     register IV range = 0;
3516     register regnode *ret;
3517     STRLEN numlen;
3518     IV namedclass;
3519     char *rangebegin = 0;
3520     bool need_class = 0;
3521     SV *listsv = Nullsv;
3522     register char *e;
3523     UV n;
3524     bool optimize_invert   = TRUE;
3525     AV* unicode_alternate  = 0;
3526 #ifdef EBCDIC
3527     UV literal_endpoint = 0;
3528 #endif
3529 
3530     ret = reganode(pRExC_state, ANYOF, 0);
3531 
3532     if (!SIZE_ONLY)
3533 	ANYOF_FLAGS(ret) = 0;
3534 
3535     if (UCHARAT(RExC_parse) == '^') {	/* Complement of range. */
3536 	RExC_naughty++;
3537 	RExC_parse++;
3538 	if (!SIZE_ONLY)
3539 	    ANYOF_FLAGS(ret) |= ANYOF_INVERT;
3540     }
3541 
3542     if (SIZE_ONLY)
3543 	RExC_size += ANYOF_SKIP;
3544     else {
3545  	RExC_emit += ANYOF_SKIP;
3546 	if (FOLD)
3547 	    ANYOF_FLAGS(ret) |= ANYOF_FOLD;
3548 	if (LOC)
3549 	    ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
3550 	ANYOF_BITMAP_ZERO(ret);
3551 	listsv = newSVpvn("# comment\n", 10);
3552     }
3553 
3554     nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
3555 
3556     if (!SIZE_ONLY && POSIXCC(nextvalue))
3557 	checkposixcc(pRExC_state);
3558 
3559     /* allow 1st char to be ] (allowing it to be - is dealt with later) */
3560     if (UCHARAT(RExC_parse) == ']')
3561 	goto charclassloop;
3562 
3563     while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
3564 
3565     charclassloop:
3566 
3567 	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
3568 
3569 	if (!range)
3570 	    rangebegin = RExC_parse;
3571 	if (UTF) {
3572 	    value = utf8n_to_uvchr((U8*)RExC_parse,
3573 				   RExC_end - RExC_parse,
3574 				   &numlen, 0);
3575 	    RExC_parse += numlen;
3576 	}
3577 	else
3578 	    value = UCHARAT(RExC_parse++);
3579 	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
3580 	if (value == '[' && POSIXCC(nextvalue))
3581 	    namedclass = regpposixcc(pRExC_state, value);
3582 	else if (value == '\\') {
3583 	    if (UTF) {
3584 		value = utf8n_to_uvchr((U8*)RExC_parse,
3585 				   RExC_end - RExC_parse,
3586 				   &numlen, 0);
3587 		RExC_parse += numlen;
3588 	    }
3589 	    else
3590 		value = UCHARAT(RExC_parse++);
3591 	    /* Some compilers cannot handle switching on 64-bit integer
3592 	     * values, therefore value cannot be an UV.  Yes, this will
3593 	     * be a problem later if we want switch on Unicode.
3594 	     * A similar issue a little bit later when switching on
3595 	     * namedclass. --jhi */
3596 	    switch ((I32)value) {
3597 	    case 'w':	namedclass = ANYOF_ALNUM;	break;
3598 	    case 'W':	namedclass = ANYOF_NALNUM;	break;
3599 	    case 's':	namedclass = ANYOF_SPACE;	break;
3600 	    case 'S':	namedclass = ANYOF_NSPACE;	break;
3601 	    case 'd':	namedclass = ANYOF_DIGIT;	break;
3602 	    case 'D':	namedclass = ANYOF_NDIGIT;	break;
3603 	    case 'p':
3604 	    case 'P':
3605 		if (RExC_parse >= RExC_end)
3606 		    vFAIL2("Empty \\%c{}", (U8)value);
3607 		if (*RExC_parse == '{') {
3608 		    U8 c = (U8)value;
3609 		    e = strchr(RExC_parse++, '}');
3610                     if (!e)
3611                         vFAIL2("Missing right brace on \\%c{}", c);
3612 		    while (isSPACE(UCHARAT(RExC_parse)))
3613 		        RExC_parse++;
3614                     if (e == RExC_parse)
3615                         vFAIL2("Empty \\%c{}", c);
3616 		    n = e - RExC_parse;
3617 		    while (isSPACE(UCHARAT(RExC_parse + n - 1)))
3618 		        n--;
3619 		}
3620 		else {
3621 		    e = RExC_parse;
3622 		    n = 1;
3623 		}
3624 		if (!SIZE_ONLY) {
3625 		    if (UCHARAT(RExC_parse) == '^') {
3626 			 RExC_parse++;
3627 			 n--;
3628 			 value = value == 'p' ? 'P' : 'p'; /* toggle */
3629 			 while (isSPACE(UCHARAT(RExC_parse))) {
3630 			      RExC_parse++;
3631 			      n--;
3632 			 }
3633 		    }
3634 		    if (value == 'p')
3635 			 Perl_sv_catpvf(aTHX_ listsv,
3636 					"+utf8::%.*s\n", (int)n, RExC_parse);
3637 		    else
3638 			 Perl_sv_catpvf(aTHX_ listsv,
3639 					"!utf8::%.*s\n", (int)n, RExC_parse);
3640 		}
3641 		RExC_parse = e + 1;
3642 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
3643 		continue;
3644 	    case 'n':	value = '\n';			break;
3645 	    case 'r':	value = '\r';			break;
3646 	    case 't':	value = '\t';			break;
3647 	    case 'f':	value = '\f';			break;
3648 	    case 'b':	value = '\b';			break;
3649 	    case 'e':	value = ASCII_TO_NATIVE('\033');break;
3650 	    case 'a':	value = ASCII_TO_NATIVE('\007');break;
3651 	    case 'x':
3652 		if (*RExC_parse == '{') {
3653                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3654                         | PERL_SCAN_DISALLOW_PREFIX;
3655 		    e = strchr(RExC_parse++, '}');
3656                     if (!e)
3657                         vFAIL("Missing right brace on \\x{}");
3658 
3659 		    numlen = e - RExC_parse;
3660 		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
3661 		    RExC_parse = e + 1;
3662 		}
3663 		else {
3664                     I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3665 		    numlen = 2;
3666 		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
3667 		    RExC_parse += numlen;
3668 		}
3669 		break;
3670 	    case 'c':
3671 		value = UCHARAT(RExC_parse++);
3672 		value = toCTRL(value);
3673 		break;
3674 	    case '0': case '1': case '2': case '3': case '4':
3675 	    case '5': case '6': case '7': case '8': case '9':
3676             {
3677                 I32 flags = 0;
3678 		numlen = 3;
3679 		value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
3680 		RExC_parse += numlen;
3681 		break;
3682             }
3683 	    default:
3684 		if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
3685 		    vWARN2(RExC_parse,
3686 			   "Unrecognized escape \\%c in character class passed through",
3687 			   (int)value);
3688 		break;
3689 	    }
3690 	} /* end of \blah */
3691 #ifdef EBCDIC
3692 	else
3693 	    literal_endpoint++;
3694 #endif
3695 
3696 	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
3697 
3698 	    if (!SIZE_ONLY && !need_class)
3699 		ANYOF_CLASS_ZERO(ret);
3700 
3701 	    need_class = 1;
3702 
3703 	    /* a bad range like a-\d, a-[:digit:] ? */
3704 	    if (range) {
3705 		if (!SIZE_ONLY) {
3706 		    if (ckWARN(WARN_REGEXP))
3707 			vWARN4(RExC_parse,
3708 			       "False [] range \"%*.*s\"",
3709 			       RExC_parse - rangebegin,
3710 			       RExC_parse - rangebegin,
3711 			       rangebegin);
3712 		    if (prevvalue < 256) {
3713 			ANYOF_BITMAP_SET(ret, prevvalue);
3714 			ANYOF_BITMAP_SET(ret, '-');
3715 		    }
3716 		    else {
3717 			ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
3718 			Perl_sv_catpvf(aTHX_ listsv,
3719 				       "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
3720 		    }
3721 		}
3722 
3723 		range = 0; /* this was not a true range */
3724 	    }
3725 
3726 	    if (!SIZE_ONLY) {
3727 	        if (namedclass > OOB_NAMEDCLASS)
3728 		    optimize_invert = FALSE;
3729 		/* Possible truncation here but in some 64-bit environments
3730 		 * the compiler gets heartburn about switch on 64-bit values.
3731 		 * A similar issue a little earlier when switching on value.
3732 		 * --jhi */
3733 		switch ((I32)namedclass) {
3734 		case ANYOF_ALNUM:
3735 		    if (LOC)
3736 			ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
3737 		    else {
3738 			for (value = 0; value < 256; value++)
3739 			    if (isALNUM(value))
3740 				ANYOF_BITMAP_SET(ret, value);
3741 		    }
3742 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");
3743 		    break;
3744 		case ANYOF_NALNUM:
3745 		    if (LOC)
3746 			ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
3747 		    else {
3748 			for (value = 0; value < 256; value++)
3749 			    if (!isALNUM(value))
3750 				ANYOF_BITMAP_SET(ret, value);
3751 		    }
3752 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n");
3753 		    break;
3754 		case ANYOF_ALNUMC:
3755 		    if (LOC)
3756 			ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
3757 		    else {
3758 			for (value = 0; value < 256; value++)
3759 			    if (isALNUMC(value))
3760 				ANYOF_BITMAP_SET(ret, value);
3761 		    }
3762 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n");
3763 		    break;
3764 		case ANYOF_NALNUMC:
3765 		    if (LOC)
3766 			ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
3767 		    else {
3768 			for (value = 0; value < 256; value++)
3769 			    if (!isALNUMC(value))
3770 				ANYOF_BITMAP_SET(ret, value);
3771 		    }
3772 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n");
3773 		    break;
3774 		case ANYOF_ALPHA:
3775 		    if (LOC)
3776 			ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
3777 		    else {
3778 			for (value = 0; value < 256; value++)
3779 			    if (isALPHA(value))
3780 				ANYOF_BITMAP_SET(ret, value);
3781 		    }
3782 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n");
3783 		    break;
3784 		case ANYOF_NALPHA:
3785 		    if (LOC)
3786 			ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
3787 		    else {
3788 			for (value = 0; value < 256; value++)
3789 			    if (!isALPHA(value))
3790 				ANYOF_BITMAP_SET(ret, value);
3791 		    }
3792 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n");
3793 		    break;
3794 		case ANYOF_ASCII:
3795 		    if (LOC)
3796 			ANYOF_CLASS_SET(ret, ANYOF_ASCII);
3797 		    else {
3798 #ifndef EBCDIC
3799 			for (value = 0; value < 128; value++)
3800 			    ANYOF_BITMAP_SET(ret, value);
3801 #else  /* EBCDIC */
3802 			for (value = 0; value < 256; value++) {
3803 			    if (isASCII(value))
3804 			        ANYOF_BITMAP_SET(ret, value);
3805 			}
3806 #endif /* EBCDIC */
3807 		    }
3808 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n");
3809 		    break;
3810 		case ANYOF_NASCII:
3811 		    if (LOC)
3812 			ANYOF_CLASS_SET(ret, ANYOF_NASCII);
3813 		    else {
3814 #ifndef EBCDIC
3815 			for (value = 128; value < 256; value++)
3816 			    ANYOF_BITMAP_SET(ret, value);
3817 #else  /* EBCDIC */
3818 			for (value = 0; value < 256; value++) {
3819 			    if (!isASCII(value))
3820 			        ANYOF_BITMAP_SET(ret, value);
3821 			}
3822 #endif /* EBCDIC */
3823 		    }
3824 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n");
3825 		    break;
3826 		case ANYOF_BLANK:
3827 		    if (LOC)
3828 			ANYOF_CLASS_SET(ret, ANYOF_BLANK);
3829 		    else {
3830 			for (value = 0; value < 256; value++)
3831 			    if (isBLANK(value))
3832 				ANYOF_BITMAP_SET(ret, value);
3833 		    }
3834 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsBlank\n");
3835 		    break;
3836 		case ANYOF_NBLANK:
3837 		    if (LOC)
3838 			ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
3839 		    else {
3840 			for (value = 0; value < 256; value++)
3841 			    if (!isBLANK(value))
3842 				ANYOF_BITMAP_SET(ret, value);
3843 		    }
3844 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsBlank\n");
3845 		    break;
3846 		case ANYOF_CNTRL:
3847 		    if (LOC)
3848 			ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
3849 		    else {
3850 			for (value = 0; value < 256; value++)
3851 			    if (isCNTRL(value))
3852 				ANYOF_BITMAP_SET(ret, value);
3853 		    }
3854 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n");
3855 		    break;
3856 		case ANYOF_NCNTRL:
3857 		    if (LOC)
3858 			ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
3859 		    else {
3860 			for (value = 0; value < 256; value++)
3861 			    if (!isCNTRL(value))
3862 				ANYOF_BITMAP_SET(ret, value);
3863 		    }
3864 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n");
3865 		    break;
3866 		case ANYOF_DIGIT:
3867 		    if (LOC)
3868 			ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
3869 		    else {
3870 			/* consecutive digits assumed */
3871 			for (value = '0'; value <= '9'; value++)
3872 			    ANYOF_BITMAP_SET(ret, value);
3873 		    }
3874 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n");
3875 		    break;
3876 		case ANYOF_NDIGIT:
3877 		    if (LOC)
3878 			ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
3879 		    else {
3880 			/* consecutive digits assumed */
3881 			for (value = 0; value < '0'; value++)
3882 			    ANYOF_BITMAP_SET(ret, value);
3883 			for (value = '9' + 1; value < 256; value++)
3884 			    ANYOF_BITMAP_SET(ret, value);
3885 		    }
3886 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n");
3887 		    break;
3888 		case ANYOF_GRAPH:
3889 		    if (LOC)
3890 			ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
3891 		    else {
3892 			for (value = 0; value < 256; value++)
3893 			    if (isGRAPH(value))
3894 				ANYOF_BITMAP_SET(ret, value);
3895 		    }
3896 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n");
3897 		    break;
3898 		case ANYOF_NGRAPH:
3899 		    if (LOC)
3900 			ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
3901 		    else {
3902 			for (value = 0; value < 256; value++)
3903 			    if (!isGRAPH(value))
3904 				ANYOF_BITMAP_SET(ret, value);
3905 		    }
3906 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n");
3907 		    break;
3908 		case ANYOF_LOWER:
3909 		    if (LOC)
3910 			ANYOF_CLASS_SET(ret, ANYOF_LOWER);
3911 		    else {
3912 			for (value = 0; value < 256; value++)
3913 			    if (isLOWER(value))
3914 				ANYOF_BITMAP_SET(ret, value);
3915 		    }
3916 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n");
3917 		    break;
3918 		case ANYOF_NLOWER:
3919 		    if (LOC)
3920 			ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
3921 		    else {
3922 			for (value = 0; value < 256; value++)
3923 			    if (!isLOWER(value))
3924 				ANYOF_BITMAP_SET(ret, value);
3925 		    }
3926 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n");
3927 		    break;
3928 		case ANYOF_PRINT:
3929 		    if (LOC)
3930 			ANYOF_CLASS_SET(ret, ANYOF_PRINT);
3931 		    else {
3932 			for (value = 0; value < 256; value++)
3933 			    if (isPRINT(value))
3934 				ANYOF_BITMAP_SET(ret, value);
3935 		    }
3936 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n");
3937 		    break;
3938 		case ANYOF_NPRINT:
3939 		    if (LOC)
3940 			ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
3941 		    else {
3942 			for (value = 0; value < 256; value++)
3943 			    if (!isPRINT(value))
3944 				ANYOF_BITMAP_SET(ret, value);
3945 		    }
3946 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n");
3947 		    break;
3948 		case ANYOF_PSXSPC:
3949 		    if (LOC)
3950 			ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
3951 		    else {
3952 			for (value = 0; value < 256; value++)
3953 			    if (isPSXSPC(value))
3954 				ANYOF_BITMAP_SET(ret, value);
3955 		    }
3956 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");
3957 		    break;
3958 		case ANYOF_NPSXSPC:
3959 		    if (LOC)
3960 			ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
3961 		    else {
3962 			for (value = 0; value < 256; value++)
3963 			    if (!isPSXSPC(value))
3964 				ANYOF_BITMAP_SET(ret, value);
3965 		    }
3966 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");
3967 		    break;
3968 		case ANYOF_PUNCT:
3969 		    if (LOC)
3970 			ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
3971 		    else {
3972 			for (value = 0; value < 256; value++)
3973 			    if (isPUNCT(value))
3974 				ANYOF_BITMAP_SET(ret, value);
3975 		    }
3976 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n");
3977 		    break;
3978 		case ANYOF_NPUNCT:
3979 		    if (LOC)
3980 			ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
3981 		    else {
3982 			for (value = 0; value < 256; value++)
3983 			    if (!isPUNCT(value))
3984 				ANYOF_BITMAP_SET(ret, value);
3985 		    }
3986 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");
3987 		    break;
3988 		case ANYOF_SPACE:
3989 		    if (LOC)
3990 			ANYOF_CLASS_SET(ret, ANYOF_SPACE);
3991 		    else {
3992 			for (value = 0; value < 256; value++)
3993 			    if (isSPACE(value))
3994 				ANYOF_BITMAP_SET(ret, value);
3995 		    }
3996 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpacePerl\n");
3997 		    break;
3998 		case ANYOF_NSPACE:
3999 		    if (LOC)
4000 			ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
4001 		    else {
4002 			for (value = 0; value < 256; value++)
4003 			    if (!isSPACE(value))
4004 				ANYOF_BITMAP_SET(ret, value);
4005 		    }
4006 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpacePerl\n");
4007 		    break;
4008 		case ANYOF_UPPER:
4009 		    if (LOC)
4010 			ANYOF_CLASS_SET(ret, ANYOF_UPPER);
4011 		    else {
4012 			for (value = 0; value < 256; value++)
4013 			    if (isUPPER(value))
4014 				ANYOF_BITMAP_SET(ret, value);
4015 		    }
4016 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");
4017 		    break;
4018 		case ANYOF_NUPPER:
4019 		    if (LOC)
4020 			ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
4021 		    else {
4022 			for (value = 0; value < 256; value++)
4023 			    if (!isUPPER(value))
4024 				ANYOF_BITMAP_SET(ret, value);
4025 		    }
4026 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n");
4027 		    break;
4028 		case ANYOF_XDIGIT:
4029 		    if (LOC)
4030 			ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
4031 		    else {
4032 			for (value = 0; value < 256; value++)
4033 			    if (isXDIGIT(value))
4034 				ANYOF_BITMAP_SET(ret, value);
4035 		    }
4036 		    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n");
4037 		    break;
4038 		case ANYOF_NXDIGIT:
4039 		    if (LOC)
4040 			ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
4041 		    else {
4042 			for (value = 0; value < 256; value++)
4043 			    if (!isXDIGIT(value))
4044 				ANYOF_BITMAP_SET(ret, value);
4045 		    }
4046 		    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
4047 		    break;
4048 		default:
4049 		    vFAIL("Invalid [::] class");
4050 		    break;
4051 		}
4052 		if (LOC)
4053 		    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
4054 		continue;
4055 	    }
4056 	} /* end of namedclass \blah */
4057 
4058 	if (range) {
4059 	    if (prevvalue > (IV)value) /* b-a */ {
4060 		Simple_vFAIL4("Invalid [] range \"%*.*s\"",
4061 			      RExC_parse - rangebegin,
4062 			      RExC_parse - rangebegin,
4063 			      rangebegin);
4064 		range = 0; /* not a valid range */
4065 	    }
4066 	}
4067 	else {
4068 	    prevvalue = value; /* save the beginning of the range */
4069 	    if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
4070 		RExC_parse[1] != ']') {
4071 		RExC_parse++;
4072 
4073 		/* a bad range like \w-, [:word:]- ? */
4074 		if (namedclass > OOB_NAMEDCLASS) {
4075 		    if (ckWARN(WARN_REGEXP))
4076 			vWARN4(RExC_parse,
4077 			       "False [] range \"%*.*s\"",
4078 			       RExC_parse - rangebegin,
4079 			       RExC_parse - rangebegin,
4080 			       rangebegin);
4081 		    if (!SIZE_ONLY)
4082 			ANYOF_BITMAP_SET(ret, '-');
4083 		} else
4084 		    range = 1;	/* yeah, it's a range! */
4085 		continue;	/* but do it the next time */
4086 	    }
4087 	}
4088 
4089 	/* now is the next time */
4090 	if (!SIZE_ONLY) {
4091 	    IV i;
4092 
4093 	    if (prevvalue < 256) {
4094 	        IV ceilvalue = value < 256 ? value : 255;
4095 
4096 #ifdef EBCDIC
4097 		/* In EBCDIC [\x89-\x91] should include
4098 		 * the \x8e but [i-j] should not. */
4099 		if (literal_endpoint == 2 &&
4100 		    ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
4101 		     (isUPPER(prevvalue) && isUPPER(ceilvalue))))
4102 		{
4103 		    if (isLOWER(prevvalue)) {
4104 			for (i = prevvalue; i <= ceilvalue; i++)
4105 			    if (isLOWER(i))
4106 				ANYOF_BITMAP_SET(ret, i);
4107 		    } else {
4108 			for (i = prevvalue; i <= ceilvalue; i++)
4109 			    if (isUPPER(i))
4110 				ANYOF_BITMAP_SET(ret, i);
4111 		    }
4112 		}
4113 		else
4114 #endif
4115 		      for (i = prevvalue; i <= ceilvalue; i++)
4116 			  ANYOF_BITMAP_SET(ret, i);
4117 	  }
4118 	  if (value > 255 || UTF) {
4119 	        UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
4120 		UV natvalue      = NATIVE_TO_UNI(value);
4121 
4122 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
4123 		if (prevnatvalue < natvalue) { /* what about > ? */
4124 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
4125 				   prevnatvalue, natvalue);
4126 		}
4127 		else if (prevnatvalue == natvalue) {
4128 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
4129 		    if (FOLD) {
4130 			 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
4131 			 STRLEN foldlen;
4132 			 UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
4133 
4134 			 /* If folding and foldable and a single
4135 			  * character, insert also the folded version
4136 			  * to the charclass. */
4137 			 if (f != value) {
4138 			      if (foldlen == (STRLEN)UNISKIP(f))
4139 				  Perl_sv_catpvf(aTHX_ listsv,
4140 						 "%04"UVxf"\n", f);
4141 			      else {
4142 				  /* Any multicharacter foldings
4143 				   * require the following transform:
4144 				   * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst)
4145 				   * where E folds into "pq" and F folds
4146 				   * into "rst", all other characters
4147 				   * fold to single characters.  We save
4148 				   * away these multicharacter foldings,
4149 				   * to be later saved as part of the
4150 				   * additional "s" data. */
4151 				  SV *sv;
4152 
4153 				  if (!unicode_alternate)
4154 				      unicode_alternate = newAV();
4155 				  sv = newSVpvn((char*)foldbuf, foldlen);
4156 				  SvUTF8_on(sv);
4157 				  av_push(unicode_alternate, sv);
4158 			      }
4159 			 }
4160 
4161 			 /* If folding and the value is one of the Greek
4162 			  * sigmas insert a few more sigmas to make the
4163 			  * folding rules of the sigmas to work right.
4164 			  * Note that not all the possible combinations
4165 			  * are handled here: some of them are handled
4166 			  * by the standard folding rules, and some of
4167 			  * them (literal or EXACTF cases) are handled
4168 			  * during runtime in regexec.c:S_find_byclass(). */
4169 			 if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
4170 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4171 					     (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
4172 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4173 					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
4174 			 }
4175 			 else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
4176 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4177 					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
4178 		    }
4179 		}
4180 	    }
4181 #ifdef EBCDIC
4182 	    literal_endpoint = 0;
4183 #endif
4184         }
4185 
4186 	range = 0; /* this range (if it was one) is done now */
4187     }
4188 
4189     if (need_class) {
4190 	ANYOF_FLAGS(ret) |= ANYOF_LARGE;
4191 	if (SIZE_ONLY)
4192 	    RExC_size += ANYOF_CLASS_ADD_SKIP;
4193 	else
4194 	    RExC_emit += ANYOF_CLASS_ADD_SKIP;
4195     }
4196 
4197     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
4198     if (!SIZE_ONLY &&
4199 	 /* If the only flag is folding (plus possibly inversion). */
4200 	((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
4201        ) {
4202 	for (value = 0; value < 256; ++value) {
4203 	    if (ANYOF_BITMAP_TEST(ret, value)) {
4204 		UV fold = PL_fold[value];
4205 
4206 		if (fold != value)
4207 		    ANYOF_BITMAP_SET(ret, fold);
4208 	    }
4209 	}
4210 	ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
4211     }
4212 
4213     /* optimize inverted simple patterns (e.g. [^a-z]) */
4214     if (!SIZE_ONLY && optimize_invert &&
4215 	/* If the only flag is inversion. */
4216 	(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) ==	ANYOF_INVERT) {
4217 	for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
4218 	    ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
4219 	ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
4220     }
4221 
4222     if (!SIZE_ONLY) {
4223 	AV *av = newAV();
4224 	SV *rv;
4225 
4226 	/* The 0th element stores the character class description
4227 	 * in its textual form: used later (regexec.c:Perl_regclass_swatch())
4228 	 * to initialize the appropriate swash (which gets stored in
4229 	 * the 1st element), and also useful for dumping the regnode.
4230 	 * The 2nd element stores the multicharacter foldings,
4231 	 * used later (regexec.c:s_reginclasslen()). */
4232 	av_store(av, 0, listsv);
4233 	av_store(av, 1, NULL);
4234 	av_store(av, 2, (SV*)unicode_alternate);
4235 	rv = newRV_noinc((SV*)av);
4236 	n = add_data(pRExC_state, 1, "s");
4237 	RExC_rx->data->data[n] = (void*)rv;
4238 	ARG_SET(ret, n);
4239     }
4240 
4241     return ret;
4242 }
4243 
4244 STATIC char*
4245 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
4246 {
4247     char* retval = RExC_parse++;
4248 
4249     for (;;) {
4250 	if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
4251 		RExC_parse[2] == '#') {
4252 	    while (*RExC_parse && *RExC_parse != ')')
4253 		RExC_parse++;
4254 	    RExC_parse++;
4255 	    continue;
4256 	}
4257 	if (RExC_flags & PMf_EXTENDED) {
4258 	    if (isSPACE(*RExC_parse)) {
4259 		RExC_parse++;
4260 		continue;
4261 	    }
4262 	    else if (*RExC_parse == '#') {
4263 		while (*RExC_parse && *RExC_parse != '\n')
4264 		    RExC_parse++;
4265 		RExC_parse++;
4266 		continue;
4267 	    }
4268 	}
4269 	return retval;
4270     }
4271 }
4272 
4273 /*
4274 - reg_node - emit a node
4275 */
4276 STATIC regnode *			/* Location. */
4277 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
4278 {
4279     register regnode *ret;
4280     register regnode *ptr;
4281 
4282     ret = RExC_emit;
4283     if (SIZE_ONLY) {
4284 	SIZE_ALIGN(RExC_size);
4285 	RExC_size += 1;
4286 	return(ret);
4287     }
4288 
4289     NODE_ALIGN_FILL(ret);
4290     ptr = ret;
4291     FILL_ADVANCE_NODE(ptr, op);
4292     if (RExC_offsets) {         /* MJD */
4293       MJD_OFFSET_DEBUG((stderr, "%s:%u: (op %s) %s %u <- %u (len %u) (max %u).\n",
4294               "reg_node", __LINE__,
4295               reg_name[op],
4296               RExC_emit - RExC_emit_start > RExC_offsets[0]
4297               ? "Overwriting end of array!\n" : "OK",
4298               RExC_emit - RExC_emit_start,
4299               RExC_parse - RExC_start,
4300               RExC_offsets[0]));
4301       Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
4302     }
4303 
4304     RExC_emit = ptr;
4305 
4306     return(ret);
4307 }
4308 
4309 /*
4310 - reganode - emit a node with an argument
4311 */
4312 STATIC regnode *			/* Location. */
4313 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
4314 {
4315     register regnode *ret;
4316     register regnode *ptr;
4317 
4318     ret = RExC_emit;
4319     if (SIZE_ONLY) {
4320 	SIZE_ALIGN(RExC_size);
4321 	RExC_size += 2;
4322 	return(ret);
4323     }
4324 
4325     NODE_ALIGN_FILL(ret);
4326     ptr = ret;
4327     FILL_ADVANCE_NODE_ARG(ptr, op, arg);
4328     if (RExC_offsets) {         /* MJD */
4329       MJD_OFFSET_DEBUG((stderr, "%s: %s %u <- %u (max %u).\n",
4330               "reganode",
4331               RExC_emit - RExC_emit_start > RExC_offsets[0] ?
4332               "Overwriting end of array!\n" : "OK",
4333               RExC_emit - RExC_emit_start,
4334               RExC_parse - RExC_start,
4335               RExC_offsets[0]));
4336       Set_Cur_Node_Offset;
4337     }
4338 
4339     RExC_emit = ptr;
4340 
4341     return(ret);
4342 }
4343 
4344 /*
4345 - reguni - emit (if appropriate) a Unicode character
4346 */
4347 STATIC void
4348 S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
4349 {
4350     *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
4351 }
4352 
4353 /*
4354 - reginsert - insert an operator in front of already-emitted operand
4355 *
4356 * Means relocating the operand.
4357 */
4358 STATIC void
4359 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
4360 {
4361     register regnode *src;
4362     register regnode *dst;
4363     register regnode *place;
4364     register int offset = regarglen[(U8)op];
4365 
4366 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
4367 
4368     if (SIZE_ONLY) {
4369 	RExC_size += NODE_STEP_REGNODE + offset;
4370 	return;
4371     }
4372 
4373     src = RExC_emit;
4374     RExC_emit += NODE_STEP_REGNODE + offset;
4375     dst = RExC_emit;
4376     while (src > opnd) {
4377 	StructCopy(--src, --dst, regnode);
4378         if (RExC_offsets) {     /* MJD 20010112 */
4379           MJD_OFFSET_DEBUG((stderr, "%s: %s copy %u -> %u (max %u).\n",
4380                   "reg_insert",
4381                   dst - RExC_emit_start > RExC_offsets[0]
4382                   ? "Overwriting end of array!\n" : "OK",
4383                   src - RExC_emit_start,
4384                   dst - RExC_emit_start,
4385                   RExC_offsets[0]));
4386           Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
4387           Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
4388         }
4389     }
4390 
4391 
4392     place = opnd;		/* Op node, where operand used to be. */
4393     if (RExC_offsets) {         /* MJD */
4394       MJD_OFFSET_DEBUG((stderr, "%s: %s %u <- %u (max %u).\n",
4395               "reginsert",
4396               place - RExC_emit_start > RExC_offsets[0]
4397               ? "Overwriting end of array!\n" : "OK",
4398               place - RExC_emit_start,
4399               RExC_parse - RExC_start,
4400               RExC_offsets[0]));
4401       Set_Node_Offset(place, RExC_parse);
4402     }
4403     src = NEXTOPER(place);
4404     FILL_ADVANCE_NODE(place, op);
4405     Zero(src, offset, regnode);
4406 }
4407 
4408 /*
4409 - regtail - set the next-pointer at the end of a node chain of p to val.
4410 */
4411 STATIC void
4412 S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
4413 {
4414     register regnode *scan;
4415     register regnode *temp;
4416 
4417     if (SIZE_ONLY)
4418 	return;
4419 
4420     /* Find last node. */
4421     scan = p;
4422     for (;;) {
4423 	temp = regnext(scan);
4424 	if (temp == NULL)
4425 	    break;
4426 	scan = temp;
4427     }
4428 
4429     if (reg_off_by_arg[OP(scan)]) {
4430 	ARG_SET(scan, val - scan);
4431     }
4432     else {
4433 	NEXT_OFF(scan) = val - scan;
4434     }
4435 }
4436 
4437 /*
4438 - regoptail - regtail on operand of first argument; nop if operandless
4439 */
4440 STATIC void
4441 S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
4442 {
4443     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
4444     if (p == NULL || SIZE_ONLY)
4445 	return;
4446     if (PL_regkind[(U8)OP(p)] == BRANCH) {
4447 	regtail(pRExC_state, NEXTOPER(p), val);
4448     }
4449     else if ( PL_regkind[(U8)OP(p)] == BRANCHJ) {
4450 	regtail(pRExC_state, NEXTOPER(NEXTOPER(p)), val);
4451     }
4452     else
4453 	return;
4454 }
4455 
4456 /*
4457  - regcurly - a little FSA that accepts {\d+,?\d*}
4458  */
4459 STATIC I32
4460 S_regcurly(pTHX_ register char *s)
4461 {
4462     if (*s++ != '{')
4463 	return FALSE;
4464     if (!isDIGIT(*s))
4465 	return FALSE;
4466     while (isDIGIT(*s))
4467 	s++;
4468     if (*s == ',')
4469 	s++;
4470     while (isDIGIT(*s))
4471 	s++;
4472     if (*s != '}')
4473 	return FALSE;
4474     return TRUE;
4475 }
4476 
4477 
4478 #ifdef DEBUGGING
4479 
4480 STATIC regnode *
4481 S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
4482 {
4483     register U8 op = EXACT;	/* Arbitrary non-END op. */
4484     register regnode *next;
4485 
4486     while (op != END && (!last || node < last)) {
4487 	/* While that wasn't END last time... */
4488 
4489 	NODE_ALIGN(node);
4490 	op = OP(node);
4491 	if (op == CLOSE)
4492 	    l--;
4493 	next = regnext(node);
4494 	/* Where, what. */
4495 	if (OP(node) == OPTIMIZED)
4496 	    goto after_print;
4497 	regprop(sv, node);
4498 	PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
4499 		      (int)(2*l + 1), "", SvPVX(sv));
4500 	if (next == NULL)		/* Next ptr. */
4501 	    PerlIO_printf(Perl_debug_log, "(0)");
4502 	else
4503 	    PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
4504 	(void)PerlIO_putc(Perl_debug_log, '\n');
4505       after_print:
4506 	if (PL_regkind[(U8)op] == BRANCHJ) {
4507 	    register regnode *nnode = (OP(next) == LONGJMP
4508 				       ? regnext(next)
4509 				       : next);
4510 	    if (last && nnode > last)
4511 		nnode = last;
4512 	    node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
4513 	}
4514 	else if (PL_regkind[(U8)op] == BRANCH) {
4515 	    node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
4516 	}
4517 	else if ( op == CURLY) {   /* `next' might be very big: optimizer */
4518 	    node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
4519 			     NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
4520 	}
4521 	else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
4522 	    node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
4523 			     next, sv, l + 1);
4524 	}
4525 	else if ( op == PLUS || op == STAR) {
4526 	    node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
4527 	}
4528 	else if (op == ANYOF) {
4529 	    /* arglen 1 + class block */
4530 	    node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
4531 		    ? ANYOF_CLASS_SKIP : ANYOF_SKIP);
4532 	    node = NEXTOPER(node);
4533 	}
4534 	else if (PL_regkind[(U8)op] == EXACT) {
4535             /* Literal string, where present. */
4536 	    node += NODE_SZ_STR(node) - 1;
4537 	    node = NEXTOPER(node);
4538 	}
4539 	else {
4540 	    node = NEXTOPER(node);
4541 	    node += regarglen[(U8)op];
4542 	}
4543 	if (op == CURLYX || op == OPEN)
4544 	    l++;
4545 	else if (op == WHILEM)
4546 	    l--;
4547     }
4548     return node;
4549 }
4550 
4551 #endif	/* DEBUGGING */
4552 
4553 /*
4554  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
4555  */
4556 void
4557 Perl_regdump(pTHX_ regexp *r)
4558 {
4559 #ifdef DEBUGGING
4560     SV *sv = sv_newmortal();
4561 
4562     (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
4563 
4564     /* Header fields of interest. */
4565     if (r->anchored_substr)
4566 	PerlIO_printf(Perl_debug_log,
4567 		      "anchored `%s%.*s%s'%s at %"IVdf" ",
4568 		      PL_colors[0],
4569 		      (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
4570 		      SvPVX(r->anchored_substr),
4571 		      PL_colors[1],
4572 		      SvTAIL(r->anchored_substr) ? "$" : "",
4573 		      (IV)r->anchored_offset);
4574     else if (r->anchored_utf8)
4575 	PerlIO_printf(Perl_debug_log,
4576 		      "anchored utf8 `%s%.*s%s'%s at %"IVdf" ",
4577 		      PL_colors[0],
4578 		      (int)(SvCUR(r->anchored_utf8) - (SvTAIL(r->anchored_utf8)!=0)),
4579 		      SvPVX(r->anchored_utf8),
4580 		      PL_colors[1],
4581 		      SvTAIL(r->anchored_utf8) ? "$" : "",
4582 		      (IV)r->anchored_offset);
4583     if (r->float_substr)
4584 	PerlIO_printf(Perl_debug_log,
4585 		      "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
4586 		      PL_colors[0],
4587 		      (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
4588 		      SvPVX(r->float_substr),
4589 		      PL_colors[1],
4590 		      SvTAIL(r->float_substr) ? "$" : "",
4591 		      (IV)r->float_min_offset, (UV)r->float_max_offset);
4592     else if (r->float_utf8)
4593 	PerlIO_printf(Perl_debug_log,
4594 		      "floating utf8 `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
4595 		      PL_colors[0],
4596 		      (int)(SvCUR(r->float_utf8) - (SvTAIL(r->float_utf8)!=0)),
4597 		      SvPVX(r->float_utf8),
4598 		      PL_colors[1],
4599 		      SvTAIL(r->float_utf8) ? "$" : "",
4600 		      (IV)r->float_min_offset, (UV)r->float_max_offset);
4601     if (r->check_substr || r->check_utf8)
4602 	PerlIO_printf(Perl_debug_log,
4603 		      r->check_substr == r->float_substr
4604 		      && r->check_utf8 == r->float_utf8
4605 		      ? "(checking floating" : "(checking anchored");
4606     if (r->reganch & ROPT_NOSCAN)
4607 	PerlIO_printf(Perl_debug_log, " noscan");
4608     if (r->reganch & ROPT_CHECK_ALL)
4609 	PerlIO_printf(Perl_debug_log, " isall");
4610     if (r->check_substr || r->check_utf8)
4611 	PerlIO_printf(Perl_debug_log, ") ");
4612 
4613     if (r->regstclass) {
4614 	regprop(sv, r->regstclass);
4615 	PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv));
4616     }
4617     if (r->reganch & ROPT_ANCH) {
4618 	PerlIO_printf(Perl_debug_log, "anchored");
4619 	if (r->reganch & ROPT_ANCH_BOL)
4620 	    PerlIO_printf(Perl_debug_log, "(BOL)");
4621 	if (r->reganch & ROPT_ANCH_MBOL)
4622 	    PerlIO_printf(Perl_debug_log, "(MBOL)");
4623 	if (r->reganch & ROPT_ANCH_SBOL)
4624 	    PerlIO_printf(Perl_debug_log, "(SBOL)");
4625 	if (r->reganch & ROPT_ANCH_GPOS)
4626 	    PerlIO_printf(Perl_debug_log, "(GPOS)");
4627 	PerlIO_putc(Perl_debug_log, ' ');
4628     }
4629     if (r->reganch & ROPT_GPOS_SEEN)
4630 	PerlIO_printf(Perl_debug_log, "GPOS ");
4631     if (r->reganch & ROPT_SKIP)
4632 	PerlIO_printf(Perl_debug_log, "plus ");
4633     if (r->reganch & ROPT_IMPLICIT)
4634 	PerlIO_printf(Perl_debug_log, "implicit ");
4635     PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
4636     if (r->reganch & ROPT_EVAL_SEEN)
4637 	PerlIO_printf(Perl_debug_log, "with eval ");
4638     PerlIO_printf(Perl_debug_log, "\n");
4639     if (r->offsets) {
4640       U32 i;
4641       U32 len = r->offsets[0];
4642       PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]);
4643       for (i = 1; i <= len; i++)
4644         PerlIO_printf(Perl_debug_log, "%"UVuf"[%"UVuf"] ",
4645                       (UV)r->offsets[i*2-1],
4646                       (UV)r->offsets[i*2]);
4647       PerlIO_printf(Perl_debug_log, "\n");
4648     }
4649 #endif	/* DEBUGGING */
4650 }
4651 
4652 #ifdef DEBUGGING
4653 
4654 STATIC void
4655 S_put_byte(pTHX_ SV *sv, int c)
4656 {
4657     if (isCNTRL(c) || c == 255 || !isPRINT(c))
4658 	Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
4659     else if (c == '-' || c == ']' || c == '\\' || c == '^')
4660 	Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
4661     else
4662 	Perl_sv_catpvf(aTHX_ sv, "%c", c);
4663 }
4664 
4665 #endif	/* DEBUGGING */
4666 
4667 /*
4668 - regprop - printable representation of opcode
4669 */
4670 void
4671 Perl_regprop(pTHX_ SV *sv, regnode *o)
4672 {
4673 #ifdef DEBUGGING
4674     register int k;
4675 
4676     sv_setpvn(sv, "", 0);
4677     if (OP(o) >= reg_num)		/* regnode.type is unsigned */
4678 	/* It would be nice to FAIL() here, but this may be called from
4679 	   regexec.c, and it would be hard to supply pRExC_state. */
4680 	Perl_croak(aTHX_ "Corrupted regexp opcode");
4681     sv_catpv(sv, (char*)reg_name[OP(o)]); /* Take off const! */
4682 
4683     k = PL_regkind[(U8)OP(o)];
4684 
4685     if (k == EXACT) {
4686         SV *dsv = sv_2mortal(newSVpvn("", 0));
4687 	/* Using is_utf8_string() is a crude hack but it may
4688 	 * be the best for now since we have no flag "this EXACTish
4689 	 * node was UTF-8" --jhi */
4690 	bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o));
4691 	char *s    = do_utf8 ?
4692 	  pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60,
4693 			 UNI_DISPLAY_REGEX) :
4694 	  STRING(o);
4695 	int len = do_utf8 ?
4696 	  strlen(s) :
4697 	  STR_LEN(o);
4698 	Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>",
4699 		       PL_colors[0],
4700 		       len, s,
4701 		       PL_colors[1]);
4702     }
4703     else if (k == CURLY) {
4704 	if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
4705 	    Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
4706 	Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
4707     }
4708     else if (k == WHILEM && o->flags)			/* Ordinal/of */
4709 	Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
4710     else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
4711 	Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));	/* Parenth number */
4712     else if (k == LOGICAL)
4713 	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);	/* 2: embedded, otherwise 1 */
4714     else if (k == ANYOF) {
4715 	int i, rangestart = -1;
4716 	U8 flags = ANYOF_FLAGS(o);
4717 	const char * const anyofs[] = {	/* Should be synchronized with
4718 					 * ANYOF_ #xdefines in regcomp.h */
4719 	    "\\w",
4720 	    "\\W",
4721 	    "\\s",
4722 	    "\\S",
4723 	    "\\d",
4724 	    "\\D",
4725 	    "[:alnum:]",
4726 	    "[:^alnum:]",
4727 	    "[:alpha:]",
4728 	    "[:^alpha:]",
4729 	    "[:ascii:]",
4730 	    "[:^ascii:]",
4731 	    "[:ctrl:]",
4732 	    "[:^ctrl:]",
4733 	    "[:graph:]",
4734 	    "[:^graph:]",
4735 	    "[:lower:]",
4736 	    "[:^lower:]",
4737 	    "[:print:]",
4738 	    "[:^print:]",
4739 	    "[:punct:]",
4740 	    "[:^punct:]",
4741 	    "[:upper:]",
4742 	    "[:^upper:]",
4743 	    "[:xdigit:]",
4744 	    "[:^xdigit:]",
4745 	    "[:space:]",
4746 	    "[:^space:]",
4747 	    "[:blank:]",
4748 	    "[:^blank:]"
4749 	};
4750 
4751 	if (flags & ANYOF_LOCALE)
4752 	    sv_catpv(sv, "{loc}");
4753 	if (flags & ANYOF_FOLD)
4754 	    sv_catpv(sv, "{i}");
4755 	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
4756 	if (flags & ANYOF_INVERT)
4757 	    sv_catpv(sv, "^");
4758 	for (i = 0; i <= 256; i++) {
4759 	    if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
4760 		if (rangestart == -1)
4761 		    rangestart = i;
4762 	    } else if (rangestart != -1) {
4763 		if (i <= rangestart + 3)
4764 		    for (; rangestart < i; rangestart++)
4765 			put_byte(sv, rangestart);
4766 		else {
4767 		    put_byte(sv, rangestart);
4768 		    sv_catpv(sv, "-");
4769 		    put_byte(sv, i - 1);
4770 		}
4771 		rangestart = -1;
4772 	    }
4773 	}
4774 
4775 	if (o->flags & ANYOF_CLASS)
4776 	    for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
4777 		if (ANYOF_CLASS_TEST(o,i))
4778 		    sv_catpv(sv, anyofs[i]);
4779 
4780 	if (flags & ANYOF_UNICODE)
4781 	    sv_catpv(sv, "{unicode}");
4782 	else if (flags & ANYOF_UNICODE_ALL)
4783 	    sv_catpv(sv, "{unicode_all}");
4784 
4785 	{
4786 	    SV *lv;
4787 	    SV *sw = regclass_swash(o, FALSE, &lv, 0);
4788 
4789 	    if (lv) {
4790 		if (sw) {
4791 		    U8 s[UTF8_MAXLEN+1];
4792 
4793 		    for (i = 0; i <= 256; i++) { /* just the first 256 */
4794 			U8 *e = uvchr_to_utf8(s, i);
4795 
4796 			if (i < 256 && swash_fetch(sw, s, TRUE)) {
4797 			    if (rangestart == -1)
4798 				rangestart = i;
4799 			} else if (rangestart != -1) {
4800 			    U8 *p;
4801 
4802 			    if (i <= rangestart + 3)
4803 				for (; rangestart < i; rangestart++) {
4804 				    for(e = uvchr_to_utf8(s, rangestart), p = s; p < e; p++)
4805 					put_byte(sv, *p);
4806 				}
4807 			    else {
4808 				for (e = uvchr_to_utf8(s, rangestart), p = s; p < e; p++)
4809 				    put_byte(sv, *p);
4810 				sv_catpv(sv, "-");
4811 				    for (e = uvchr_to_utf8(s, i - 1), p = s; p < e; p++)
4812 					put_byte(sv, *p);
4813 				}
4814 				rangestart = -1;
4815 			    }
4816 			}
4817 
4818 		    sv_catpv(sv, "..."); /* et cetera */
4819 		}
4820 
4821 		{
4822 		    char *s = savepv(SvPVX(lv));
4823 		    char *origs = s;
4824 
4825 		    while(*s && *s != '\n') s++;
4826 
4827 		    if (*s == '\n') {
4828 			char *t = ++s;
4829 
4830 			while (*s) {
4831 			    if (*s == '\n')
4832 				*s = ' ';
4833 			    s++;
4834 			}
4835 			if (s[-1] == ' ')
4836 			    s[-1] = 0;
4837 
4838 			sv_catpv(sv, t);
4839 		    }
4840 
4841 		    Safefree(origs);
4842 		}
4843 	    }
4844 	}
4845 
4846 	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
4847     }
4848     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
4849 	Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
4850 #endif	/* DEBUGGING */
4851 }
4852 
4853 SV *
4854 Perl_re_intuit_string(pTHX_ regexp *prog)
4855 {				/* Assume that RE_INTUIT is set */
4856     DEBUG_r(
4857 	{   STRLEN n_a;
4858 	    char *s = SvPV(prog->check_substr
4859 		      ? prog->check_substr : prog->check_utf8, n_a);
4860 
4861 	    if (!PL_colorset) reginitcolors();
4862 	    PerlIO_printf(Perl_debug_log,
4863 		      "%sUsing REx %ssubstr:%s `%s%.60s%s%s'\n",
4864 		      PL_colors[4],
4865 		      prog->check_substr ? "" : "utf8 ",
4866 		      PL_colors[5],PL_colors[0],
4867 		      s,
4868 		      PL_colors[1],
4869 		      (strlen(s) > 60 ? "..." : ""));
4870 	} );
4871 
4872     return prog->check_substr ? prog->check_substr : prog->check_utf8;
4873 }
4874 
4875 void
4876 Perl_pregfree(pTHX_ struct regexp *r)
4877 {
4878 #ifdef DEBUGGING
4879     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
4880 #endif
4881 
4882     if (!r || (--r->refcnt > 0))
4883 	return;
4884     DEBUG_r({
4885 	 int len;
4886          char *s;
4887 
4888 	 s = (r->reganch & ROPT_UTF8) ? pv_uni_display(dsv, (U8*)r->precomp,
4889 		r->prelen, 60, UNI_DISPLAY_REGEX)
4890             : pv_display(dsv, r->precomp, r->prelen, 0, 60);
4891 	 len = SvCUR(dsv);
4892 	 if (!PL_colorset)
4893 	      reginitcolors();
4894 	 PerlIO_printf(Perl_debug_log,
4895 		       "%sFreeing REx:%s `%s%*.*s%s%s'\n",
4896 		       PL_colors[4],PL_colors[5],PL_colors[0],
4897 		       len, len, s,
4898 		       PL_colors[1],
4899 		       len > 60 ? "..." : "");
4900     });
4901 
4902     if (r->precomp)
4903 	Safefree(r->precomp);
4904     if (r->offsets)             /* 20010421 MJD */
4905 	Safefree(r->offsets);
4906     if (RX_MATCH_COPIED(r))
4907 	Safefree(r->subbeg);
4908     if (r->substrs) {
4909 	if (r->anchored_substr)
4910 	    SvREFCNT_dec(r->anchored_substr);
4911 	if (r->anchored_utf8)
4912 	    SvREFCNT_dec(r->anchored_utf8);
4913 	if (r->float_substr)
4914 	    SvREFCNT_dec(r->float_substr);
4915 	if (r->float_utf8)
4916 	    SvREFCNT_dec(r->float_utf8);
4917 	Safefree(r->substrs);
4918     }
4919     if (r->data) {
4920 	int n = r->data->count;
4921 	AV* new_comppad = NULL;
4922 	AV* old_comppad;
4923 	SV** old_curpad;
4924 
4925 	while (--n >= 0) {
4926           /* If you add a ->what type here, update the comment in regcomp.h */
4927 	    switch (r->data->what[n]) {
4928 	    case 's':
4929 		SvREFCNT_dec((SV*)r->data->data[n]);
4930 		break;
4931 	    case 'f':
4932 		Safefree(r->data->data[n]);
4933 		break;
4934 	    case 'p':
4935 		new_comppad = (AV*)r->data->data[n];
4936 		break;
4937 	    case 'o':
4938 		if (new_comppad == NULL)
4939 		    Perl_croak(aTHX_ "panic: pregfree comppad");
4940 		old_comppad = PL_comppad;
4941 		old_curpad = PL_curpad;
4942 		/* Watch out for global destruction's random ordering. */
4943 		if (SvTYPE(new_comppad) == SVt_PVAV) {
4944 		    PL_comppad = new_comppad;
4945 		    PL_curpad = AvARRAY(new_comppad);
4946 		}
4947 		else
4948 		    PL_curpad = NULL;
4949 
4950 		if (!OpREFCNT_dec((OP_4tree*)r->data->data[n])) {
4951                     op_free((OP_4tree*)r->data->data[n]);
4952 		}
4953 
4954 		PL_comppad = old_comppad;
4955 		PL_curpad = old_curpad;
4956 		SvREFCNT_dec((SV*)new_comppad);
4957 		new_comppad = NULL;
4958 		break;
4959 	    case 'n':
4960 	        break;
4961 	    default:
4962 		Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
4963 	    }
4964 	}
4965 	Safefree(r->data->what);
4966 	Safefree(r->data);
4967     }
4968     Safefree(r->startp);
4969     Safefree(r->endp);
4970     Safefree(r);
4971 }
4972 
4973 /*
4974  - regnext - dig the "next" pointer out of a node
4975  *
4976  * [Note, when REGALIGN is defined there are two places in regmatch()
4977  * that bypass this code for speed.]
4978  */
4979 regnode *
4980 Perl_regnext(pTHX_ register regnode *p)
4981 {
4982     register I32 offset;
4983 
4984     if (p == &PL_regdummy)
4985 	return(NULL);
4986 
4987     offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
4988     if (offset == 0)
4989 	return(NULL);
4990 
4991     return(p+offset);
4992 }
4993 
4994 STATIC void
4995 S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
4996 {
4997     va_list args;
4998     STRLEN l1 = strlen(pat1);
4999     STRLEN l2 = strlen(pat2);
5000     char buf[512];
5001     SV *msv;
5002     char *message;
5003 
5004     if (l1 > 510)
5005 	l1 = 510;
5006     if (l1 + l2 > 510)
5007 	l2 = 510 - l1;
5008     Copy(pat1, buf, l1 , char);
5009     Copy(pat2, buf + l1, l2 , char);
5010     buf[l1 + l2] = '\n';
5011     buf[l1 + l2 + 1] = '\0';
5012 #ifdef I_STDARG
5013     /* ANSI variant takes additional second argument */
5014     va_start(args, pat2);
5015 #else
5016     va_start(args);
5017 #endif
5018     msv = vmess(buf, &args);
5019     va_end(args);
5020     message = SvPV(msv,l1);
5021     if (l1 > 512)
5022 	l1 = 512;
5023     Copy(message, buf, l1 , char);
5024     buf[l1] = '\0';			/* Overwrite \n */
5025     Perl_croak(aTHX_ "%s", buf);
5026 }
5027 
5028 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
5029 
5030 void
5031 Perl_save_re_context(pTHX)
5032 {
5033 #if 0
5034     SAVEPPTR(RExC_precomp);		/* uncompiled string. */
5035     SAVEI32(RExC_npar);		/* () count. */
5036     SAVEI32(RExC_size);		/* Code size. */
5037     SAVEI32(RExC_flags);		/* are we folding, multilining? */
5038     SAVEVPTR(RExC_rx);		/* from regcomp.c */
5039     SAVEI32(RExC_seen);		/* from regcomp.c */
5040     SAVEI32(RExC_sawback);		/* Did we see \1, ...? */
5041     SAVEI32(RExC_naughty);		/* How bad is this pattern? */
5042     SAVEVPTR(RExC_emit);		/* Code-emit pointer; &regdummy = don't */
5043     SAVEPPTR(RExC_end);		/* End of input for compile */
5044     SAVEPPTR(RExC_parse);		/* Input-scan pointer. */
5045 #endif
5046 
5047     SAVEI32(PL_reg_flags);		/* from regexec.c */
5048     SAVEPPTR(PL_bostr);
5049     SAVEPPTR(PL_reginput);		/* String-input pointer. */
5050     SAVEPPTR(PL_regbol);		/* Beginning of input, for ^ check. */
5051     SAVEPPTR(PL_regeol);		/* End of input, for $ check. */
5052     SAVEVPTR(PL_regstartp);		/* Pointer to startp array. */
5053     SAVEVPTR(PL_regendp);		/* Ditto for endp. */
5054     SAVEVPTR(PL_reglastparen);		/* Similarly for lastparen. */
5055     SAVEPPTR(PL_regtill);		/* How far we are required to go. */
5056     SAVEGENERICPV(PL_reg_start_tmp);		/* from regexec.c */
5057     PL_reg_start_tmp = 0;
5058     SAVEI32(PL_reg_start_tmpl);		/* from regexec.c */
5059     PL_reg_start_tmpl = 0;
5060     SAVEVPTR(PL_regdata);
5061     SAVEI32(PL_reg_eval_set);		/* from regexec.c */
5062     SAVEI32(PL_regnarrate);		/* from regexec.c */
5063     SAVEVPTR(PL_regprogram);		/* from regexec.c */
5064     SAVEINT(PL_regindent);		/* from regexec.c */
5065     SAVEVPTR(PL_regcc);			/* from regexec.c */
5066     SAVEVPTR(PL_curcop);
5067     SAVEVPTR(PL_reg_call_cc);		/* from regexec.c */
5068     SAVEVPTR(PL_reg_re);		/* from regexec.c */
5069     SAVEPPTR(PL_reg_ganch);		/* from regexec.c */
5070     SAVESPTR(PL_reg_sv);		/* from regexec.c */
5071     SAVEI8(PL_reg_match_utf8);		/* from regexec.c */
5072     SAVEVPTR(PL_reg_magic);		/* from regexec.c */
5073     SAVEI32(PL_reg_oldpos);			/* from regexec.c */
5074     SAVEVPTR(PL_reg_oldcurpm);		/* from regexec.c */
5075     SAVEVPTR(PL_reg_curpm);		/* from regexec.c */
5076     SAVEI32(PL_regnpar);		/* () count. */
5077     SAVEI32(PL_regsize);		/* from regexec.c */
5078 #ifdef DEBUGGING
5079     SAVEPPTR(PL_reg_starttry);		/* from regexec.c */
5080 #endif
5081 }
5082 
5083 static void
5084 clear_re(pTHX_ void *r)
5085 {
5086     ReREFCNT_dec((regexp *)r);
5087 }
5088 
5089