xref: /openbsd-src/gnu/usr.bin/perl/regcomp.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*    regcomp.c
2  */
3 
4 /*
5  * "A fair jaw-cracker dwarf-language must be."  --Samwise Gamgee
6  */
7 
8 /* This file contains functions for compiling a regular expression.  See
9  * also regexec.c which funnily enough, contains functions for executing
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_comp.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  */
17 
18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
19  * confused with the original package (see point 3 below).  Thanks, Henry!
20  */
21 
22 /* Additional note: this code is very heavily munged from Henry's version
23  * in places.  In some spots I've traded clarity for efficiency, so don't
24  * blame Henry for some of the lack of readability.
25  */
26 
27 /* The names of the functions have been changed from regcomp and
28  * regexec to  pregcomp and pregexec in order to avoid conflicts
29  * with the POSIX routines of the same names.
30 */
31 
32 #ifdef PERL_EXT_RE_BUILD
33 /* need to replace pregcomp et al, so enable that */
34 #  ifndef PERL_IN_XSUB_RE
35 #    define PERL_IN_XSUB_RE
36 #  endif
37 /* need access to debugger hooks */
38 #  if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
39 #    define DEBUGGING
40 #  endif
41 #endif
42 
43 #ifdef PERL_IN_XSUB_RE
44 /* We *really* need to overwrite these symbols: */
45 #  define Perl_pregcomp my_regcomp
46 #  define Perl_regdump my_regdump
47 #  define Perl_regprop my_regprop
48 #  define Perl_pregfree my_regfree
49 #  define Perl_re_intuit_string my_re_intuit_string
50 /* *These* symbols are masked to allow static link. */
51 #  define Perl_regnext my_regnext
52 #  define Perl_save_re_context my_save_re_context
53 #  define Perl_reginitcolors my_reginitcolors
54 
55 #  define PERL_NO_GET_CONTEXT
56 #endif
57 
58 /*
59  * pregcomp and pregexec -- regsub and regerror are not used in perl
60  *
61  *	Copyright (c) 1986 by University of Toronto.
62  *	Written by Henry Spencer.  Not derived from licensed software.
63  *
64  *	Permission is granted to anyone to use this software for any
65  *	purpose on any computer system, and to redistribute it freely,
66  *	subject to the following restrictions:
67  *
68  *	1. The author is not responsible for the consequences of use of
69  *		this software, no matter how awful, even if they arise
70  *		from defects in it.
71  *
72  *	2. The origin of this software must not be misrepresented, either
73  *		by explicit claim or by omission.
74  *
75  *	3. Altered versions must be plainly marked as such, and must not
76  *		be misrepresented as being the original software.
77  *
78  *
79  ****    Alterations to Henry's code are...
80  ****
81  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
82  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
83  ****
84  ****    You may distribute under the terms of either the GNU General Public
85  ****    License or the Artistic License, as specified in the README file.
86 
87  *
88  * Beware that some of this code is subtly aware of the way operator
89  * precedence is structured in regular expressions.  Serious changes in
90  * regular-expression syntax might require a total rethink.
91  */
92 #include "EXTERN.h"
93 #define PERL_IN_REGCOMP_C
94 #include "perl.h"
95 
96 #ifndef PERL_IN_XSUB_RE
97 #  include "INTERN.h"
98 #endif
99 
100 #define REG_COMP_C
101 #include "regcomp.h"
102 
103 #ifdef op
104 #undef op
105 #endif /* op */
106 
107 #ifdef MSDOS
108 #  if defined(BUGGY_MSC6)
109  /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
110 #    pragma optimize("a",off)
111  /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
112 #    pragma optimize("w",on )
113 #  endif /* BUGGY_MSC6 */
114 #endif /* MSDOS */
115 
116 #ifndef STATIC
117 #define	STATIC	static
118 #endif
119 
120 typedef struct RExC_state_t {
121     U32		flags;			/* are we folding, multilining? */
122     char	*precomp;		/* uncompiled string. */
123     regexp	*rx;
124     char	*start;			/* Start of input for compile */
125     char	*end;			/* End of input for compile */
126     char	*parse;			/* Input-scan pointer. */
127     I32		whilem_seen;		/* number of WHILEM in this expr */
128     regnode	*emit_start;		/* Start of emitted-code area */
129     regnode	*emit;			/* Code-emit pointer; &regdummy = don't = compiling */
130     I32		naughty;		/* How bad is this pattern? */
131     I32		sawback;		/* Did we see \1, ...? */
132     U32		seen;
133     I32		size;			/* Code size. */
134     I32		npar;			/* () count. */
135     I32		extralen;
136     I32		seen_zerolen;
137     I32		seen_evals;
138     I32		utf8;
139     I32		orig_utf8;
140 #if ADD_TO_REGEXEC
141     char 	*starttry;		/* -Dr: where regtry was called. */
142 #define RExC_starttry	(pRExC_state->starttry)
143 #endif
144 } RExC_state_t;
145 
146 #define RExC_flags	(pRExC_state->flags)
147 #define RExC_precomp	(pRExC_state->precomp)
148 #define RExC_rx		(pRExC_state->rx)
149 #define RExC_start	(pRExC_state->start)
150 #define RExC_end	(pRExC_state->end)
151 #define RExC_parse	(pRExC_state->parse)
152 #define RExC_whilem_seen	(pRExC_state->whilem_seen)
153 #define RExC_offsets	(pRExC_state->rx->offsets) /* I am not like the others */
154 #define RExC_emit	(pRExC_state->emit)
155 #define RExC_emit_start	(pRExC_state->emit_start)
156 #define RExC_naughty	(pRExC_state->naughty)
157 #define RExC_sawback	(pRExC_state->sawback)
158 #define RExC_seen	(pRExC_state->seen)
159 #define RExC_size	(pRExC_state->size)
160 #define RExC_npar	(pRExC_state->npar)
161 #define RExC_extralen	(pRExC_state->extralen)
162 #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
163 #define RExC_seen_evals	(pRExC_state->seen_evals)
164 #define RExC_utf8	(pRExC_state->utf8)
165 #define RExC_orig_utf8 (pRExC_state->orig_utf8)
166 
167 #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
168 #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
169 	((*s) == '{' && regcurly(s)))
170 
171 #ifdef SPSTART
172 #undef SPSTART		/* dratted cpp namespace... */
173 #endif
174 /*
175  * Flags to be passed up and down.
176  */
177 #define	WORST		0	/* Worst case. */
178 #define	HASWIDTH	0x1	/* Known to match non-null strings. */
179 #define	SIMPLE		0x2	/* Simple enough to be STAR/PLUS operand. */
180 #define	SPSTART		0x4	/* Starts with * or +. */
181 #define TRYAGAIN	0x8	/* Weeded out a declaration. */
182 
183 /* Length of a variant. */
184 
185 typedef struct scan_data_t {
186     I32 len_min;
187     I32 len_delta;
188     I32 pos_min;
189     I32 pos_delta;
190     SV *last_found;
191     I32 last_end;			/* min value, <0 unless valid. */
192     I32 last_start_min;
193     I32 last_start_max;
194     SV **longest;			/* Either &l_fixed, or &l_float. */
195     SV *longest_fixed;
196     I32 offset_fixed;
197     SV *longest_float;
198     I32 offset_float_min;
199     I32 offset_float_max;
200     I32 flags;
201     I32 whilem_c;
202     I32 *last_closep;
203     struct regnode_charclass_class *start_class;
204 } scan_data_t;
205 
206 /*
207  * Forward declarations for pregcomp()'s friends.
208  */
209 
210 static const scan_data_t zero_scan_data =
211   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
212 
213 #define SF_BEFORE_EOL		(SF_BEFORE_SEOL|SF_BEFORE_MEOL)
214 #define SF_BEFORE_SEOL		0x1
215 #define SF_BEFORE_MEOL		0x2
216 #define SF_FIX_BEFORE_EOL	(SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
217 #define SF_FL_BEFORE_EOL	(SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
218 
219 #ifdef NO_UNARY_PLUS
220 #  define SF_FIX_SHIFT_EOL	(0+2)
221 #  define SF_FL_SHIFT_EOL		(0+4)
222 #else
223 #  define SF_FIX_SHIFT_EOL	(+2)
224 #  define SF_FL_SHIFT_EOL		(+4)
225 #endif
226 
227 #define SF_FIX_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
228 #define SF_FIX_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
229 
230 #define SF_FL_BEFORE_SEOL	(SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
231 #define SF_FL_BEFORE_MEOL	(SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
232 #define SF_IS_INF		0x40
233 #define SF_HAS_PAR		0x80
234 #define SF_IN_PAR		0x100
235 #define SF_HAS_EVAL		0x200
236 #define SCF_DO_SUBSTR		0x400
237 #define SCF_DO_STCLASS_AND	0x0800
238 #define SCF_DO_STCLASS_OR	0x1000
239 #define SCF_DO_STCLASS		(SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
240 #define SCF_WHILEM_VISITED_POS	0x2000
241 
242 #define UTF (RExC_utf8 != 0)
243 #define LOC ((RExC_flags & PMf_LOCALE) != 0)
244 #define FOLD ((RExC_flags & PMf_FOLD) != 0)
245 
246 #define OOB_UNICODE		12345678
247 #define OOB_NAMEDCLASS		-1
248 
249 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
250 #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
251 
252 
253 /* length of regex to show in messages that don't mark a position within */
254 #define RegexLengthToShowInErrorMessages 127
255 
256 /*
257  * If MARKER[12] are adjusted, be sure to adjust the constants at the top
258  * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
259  * op/pragma/warn/regcomp.
260  */
261 #define MARKER1 "<-- HERE"    /* marker as it appears in the description */
262 #define MARKER2 " <-- HERE "  /* marker as it appears within the regex */
263 
264 #define REPORT_LOCATION " in regex; marked by " MARKER1 " in m/%.*s" MARKER2 "%s/"
265 
266 /*
267  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
268  * arg. Show regex, up to a maximum length. If it's too long, chop and add
269  * "...".
270  */
271 #define	FAIL(msg) STMT_START {						\
272     const char *ellipses = "";						\
273     IV len = RExC_end - RExC_precomp;					\
274 									\
275     if (!SIZE_ONLY)							\
276 	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);			\
277     if (len > RegexLengthToShowInErrorMessages) {			\
278 	/* chop 10 shorter than the max, to ensure meaning of "..." */	\
279 	len = RegexLengthToShowInErrorMessages - 10;			\
280 	ellipses = "...";						\
281     }									\
282     Perl_croak(aTHX_ "%s in regex m/%.*s%s/",				\
283 	    msg, (int)len, RExC_precomp, ellipses);			\
284 } STMT_END
285 
286 /*
287  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
288  * args. Show regex, up to a maximum length. If it's too long, chop and add
289  * "...".
290  */
291 #define	FAIL2(pat,msg) STMT_START {					\
292     const char *ellipses = "";						\
293     IV len = RExC_end - RExC_precomp;					\
294 									\
295     if (!SIZE_ONLY)							\
296 	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);			\
297     if (len > RegexLengthToShowInErrorMessages) {			\
298 	/* chop 10 shorter than the max, to ensure meaning of "..." */	\
299 	len = RegexLengthToShowInErrorMessages - 10;			\
300 	ellipses = "...";						\
301     }									\
302     S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/",			\
303 	    msg, (int)len, RExC_precomp, ellipses);			\
304 } STMT_END
305 
306 
307 /*
308  * Simple_vFAIL -- like FAIL, but marks the current location in the scan
309  */
310 #define	Simple_vFAIL(m) STMT_START {					\
311     const IV offset = RExC_parse - RExC_precomp;			\
312     Perl_croak(aTHX_ "%s" REPORT_LOCATION,				\
313 	    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
314 } STMT_END
315 
316 /*
317  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
318  */
319 #define	vFAIL(m) STMT_START {				\
320     if (!SIZE_ONLY)					\
321 	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
322     Simple_vFAIL(m);					\
323 } STMT_END
324 
325 /*
326  * Like Simple_vFAIL(), but accepts two arguments.
327  */
328 #define	Simple_vFAIL2(m,a1) STMT_START {			\
329     const IV offset = RExC_parse - RExC_precomp;			\
330     S_re_croak2(aTHX_ m, REPORT_LOCATION, a1,			\
331 	    (int)offset, RExC_precomp, RExC_precomp + offset);	\
332 } STMT_END
333 
334 /*
335  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
336  */
337 #define	vFAIL2(m,a1) STMT_START {			\
338     if (!SIZE_ONLY)					\
339 	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
340     Simple_vFAIL2(m, a1);				\
341 } STMT_END
342 
343 
344 /*
345  * Like Simple_vFAIL(), but accepts three arguments.
346  */
347 #define	Simple_vFAIL3(m, a1, a2) STMT_START {			\
348     const IV offset = RExC_parse - RExC_precomp;		\
349     S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2,		\
350 	    (int)offset, RExC_precomp, RExC_precomp + offset);	\
351 } STMT_END
352 
353 /*
354  * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
355  */
356 #define	vFAIL3(m,a1,a2) STMT_START {			\
357     if (!SIZE_ONLY)					\
358 	SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx);	\
359     Simple_vFAIL3(m, a1, a2);				\
360 } STMT_END
361 
362 /*
363  * Like Simple_vFAIL(), but accepts four arguments.
364  */
365 #define	Simple_vFAIL4(m, a1, a2, a3) STMT_START {		\
366     const IV offset = RExC_parse - RExC_precomp;		\
367     S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,		\
368 	    (int)offset, RExC_precomp, RExC_precomp + offset);	\
369 } STMT_END
370 
371 #define	vWARN(loc,m) STMT_START {					\
372     const IV offset = loc - RExC_precomp;				\
373     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,	\
374 	    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
375 } STMT_END
376 
377 #define	vWARNdep(loc,m) STMT_START {					\
378     const IV offset = loc - RExC_precomp;				\
379     Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),		\
380 	    "%s" REPORT_LOCATION,					\
381 	    m, (int)offset, RExC_precomp, RExC_precomp + offset);	\
382 } STMT_END
383 
384 
385 #define	vWARN2(loc, m, a1) STMT_START {					\
386     const IV offset = loc - RExC_precomp;				\
387     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
388 	    a1, (int)offset, RExC_precomp, RExC_precomp + offset);	\
389 } STMT_END
390 
391 #define	vWARN3(loc, m, a1, a2) STMT_START {				\
392     const IV offset = loc - RExC_precomp;				\
393     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
394 	    a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);	\
395 } STMT_END
396 
397 #define	vWARN4(loc, m, a1, a2, a3) STMT_START {				\
398     const IV offset = loc - RExC_precomp;				\
399     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
400 	    a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
401 } STMT_END
402 
403 #define	vWARN5(loc, m, a1, a2, a3, a4) STMT_START {			\
404     const IV offset = loc - RExC_precomp;				\
405     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,		\
406 	    a1, a2, a3, a4, (int)offset, RExC_precomp, RExC_precomp + offset); \
407 } STMT_END
408 
409 
410 /* Allow for side effects in s */
411 #define REGC(c,s) STMT_START {			\
412     if (!SIZE_ONLY) *(s) = (c); else (void)(s);	\
413 } STMT_END
414 
415 /* Macros for recording node offsets.   20001227 mjd@plover.com
416  * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
417  * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
418  * Element 0 holds the number n.
419  */
420 
421 #define MJD_OFFSET_DEBUG(x)
422 /* #define MJD_OFFSET_DEBUG(x) Perl_warn_nocontext x */
423 
424 
425 #define Set_Node_Offset_To_R(node,byte) STMT_START {			\
426     if (! SIZE_ONLY) {							\
427 	MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n",		\
428 		__LINE__, (node), (byte)));				\
429 	if((node) < 0) {						\
430 	    Perl_croak(aTHX_ "value of node is %d in Offset macro", (int)(node)); \
431 	} else {							\
432 	    RExC_offsets[2*(node)-1] = (byte);				\
433 	}								\
434     }									\
435 } STMT_END
436 
437 #define Set_Node_Offset(node,byte) \
438     Set_Node_Offset_To_R((node)-RExC_emit_start, (byte)-RExC_start)
439 #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
440 
441 #define Set_Node_Length_To_R(node,len) STMT_START {			\
442     if (! SIZE_ONLY) {							\
443 	MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n",		\
444 		__LINE__, (int)(node), (int)(len)));			\
445 	if((node) < 0) {						\
446 	    Perl_croak(aTHX_ "value of node is %d in Length macro", (int)(node)); \
447 	} else {							\
448 	    RExC_offsets[2*(node)] = (len);				\
449 	}								\
450     }									\
451 } STMT_END
452 
453 #define Set_Node_Length(node,len) \
454     Set_Node_Length_To_R((node)-RExC_emit_start, len)
455 #define Set_Cur_Node_Length(len) Set_Node_Length(RExC_emit, len)
456 #define Set_Node_Cur_Length(node) \
457     Set_Node_Length(node, RExC_parse - parse_start)
458 
459 /* Get offsets and lengths */
460 #define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
461 #define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
462 
463 static void clear_re(pTHX_ void *r);
464 
465 /* Mark that we cannot extend a found fixed substring at this point.
466    Updata the longest found anchored substring and the longest found
467    floating substrings if needed. */
468 
469 STATIC void
470 S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
471 {
472     const STRLEN l = CHR_SVLEN(data->last_found);
473     const STRLEN old_l = CHR_SVLEN(*data->longest);
474 
475     if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
476 	SvSetMagicSV(*data->longest, data->last_found);
477 	if (*data->longest == data->longest_fixed) {
478 	    data->offset_fixed = l ? data->last_start_min : data->pos_min;
479 	    if (data->flags & SF_BEFORE_EOL)
480 		data->flags
481 		    |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
482 	    else
483 		data->flags &= ~SF_FIX_BEFORE_EOL;
484 	}
485 	else {
486 	    data->offset_float_min = l ? data->last_start_min : data->pos_min;
487 	    data->offset_float_max = (l
488 				      ? data->last_start_max
489 				      : data->pos_min + data->pos_delta);
490 	    if ((U32)data->offset_float_max > (U32)I32_MAX)
491 		data->offset_float_max = I32_MAX;
492 	    if (data->flags & SF_BEFORE_EOL)
493 		data->flags
494 		    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
495 	    else
496 		data->flags &= ~SF_FL_BEFORE_EOL;
497 	}
498     }
499     SvCUR_set(data->last_found, 0);
500     {
501 	SV * const sv = data->last_found;
502 	MAGIC * const mg =
503 	    SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
504 	if (mg && mg->mg_len > 0)
505 	    mg->mg_len = 0;
506     }
507     data->last_end = -1;
508     data->flags &= ~SF_BEFORE_EOL;
509 }
510 
511 /* Can match anything (initialization) */
512 STATIC void
513 S_cl_anything(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
514 {
515     ANYOF_CLASS_ZERO(cl);
516     ANYOF_BITMAP_SETALL(cl);
517     cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
518     if (LOC)
519 	cl->flags |= ANYOF_LOCALE;
520 }
521 
522 /* Can match anything (initialization) */
523 STATIC int
524 S_cl_is_anything(pTHX_ const struct regnode_charclass_class *cl)
525 {
526     int value;
527 
528     for (value = 0; value <= ANYOF_MAX; value += 2)
529 	if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
530 	    return 1;
531     if (!(cl->flags & ANYOF_UNICODE_ALL))
532 	return 0;
533     if (!ANYOF_BITMAP_TESTALLSET(cl))
534 	return 0;
535     return 1;
536 }
537 
538 /* Can match anything (initialization) */
539 STATIC void
540 S_cl_init(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
541 {
542     Zero(cl, 1, struct regnode_charclass_class);
543     cl->type = ANYOF;
544     cl_anything(pRExC_state, cl);
545 }
546 
547 STATIC void
548 S_cl_init_zero(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
549 {
550     Zero(cl, 1, struct regnode_charclass_class);
551     cl->type = ANYOF;
552     cl_anything(pRExC_state, cl);
553     if (LOC)
554 	cl->flags |= ANYOF_LOCALE;
555 }
556 
557 /* 'And' a given class with another one.  Can create false positives */
558 /* We assume that cl is not inverted */
559 STATIC void
560 S_cl_and(pTHX_ struct regnode_charclass_class *cl,
561 	const struct regnode_charclass_class *and_with)
562 {
563     if (!(and_with->flags & ANYOF_CLASS)
564 	&& !(cl->flags & ANYOF_CLASS)
565 	&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
566 	&& !(and_with->flags & ANYOF_FOLD)
567 	&& !(cl->flags & ANYOF_FOLD)) {
568 	int i;
569 
570 	if (and_with->flags & ANYOF_INVERT)
571 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
572 		cl->bitmap[i] &= ~and_with->bitmap[i];
573 	else
574 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
575 		cl->bitmap[i] &= and_with->bitmap[i];
576     } /* XXXX: logic is complicated otherwise, leave it along for a moment. */
577     if (!(and_with->flags & ANYOF_EOS))
578 	cl->flags &= ~ANYOF_EOS;
579 
580     if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
581 	!(and_with->flags & ANYOF_INVERT)) {
582 	cl->flags &= ~ANYOF_UNICODE_ALL;
583 	cl->flags |= ANYOF_UNICODE;
584 	ARG_SET(cl, ARG(and_with));
585     }
586     if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
587 	!(and_with->flags & ANYOF_INVERT))
588 	cl->flags &= ~ANYOF_UNICODE_ALL;
589     if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) &&
590 	!(and_with->flags & ANYOF_INVERT))
591 	cl->flags &= ~ANYOF_UNICODE;
592 }
593 
594 /* 'OR' a given class with another one.  Can create false positives */
595 /* We assume that cl is not inverted */
596 STATIC void
597 S_cl_or(pTHX_ RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const struct regnode_charclass_class *or_with)
598 {
599     if (or_with->flags & ANYOF_INVERT) {
600 	/* We do not use
601 	 * (B1 | CL1) | (!B2 & !CL2) = (B1 | !B2 & !CL2) | (CL1 | (!B2 & !CL2))
602 	 *   <= (B1 | !B2) | (CL1 | !CL2)
603 	 * which is wasteful if CL2 is small, but we ignore CL2:
604 	 *   (B1 | CL1) | (!B2 & !CL2) <= (B1 | CL1) | !B2 = (B1 | !B2) | CL1
605 	 * XXXX Can we handle case-fold?  Unclear:
606 	 *   (OK1(i) | OK1(i')) | !(OK1(i) | OK1(i')) =
607 	 *   (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
608 	 */
609 	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
610 	     && !(or_with->flags & ANYOF_FOLD)
611 	     && !(cl->flags & ANYOF_FOLD) ) {
612 	    int i;
613 
614 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
615 		cl->bitmap[i] |= ~or_with->bitmap[i];
616 	} /* XXXX: logic is complicated otherwise */
617 	else {
618 	    cl_anything(pRExC_state, cl);
619 	}
620     } else {
621 	/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
622 	if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
623 	     && (!(or_with->flags & ANYOF_FOLD)
624 		 || (cl->flags & ANYOF_FOLD)) ) {
625 	    int i;
626 
627 	    /* OR char bitmap and class bitmap separately */
628 	    for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
629 		cl->bitmap[i] |= or_with->bitmap[i];
630 	    if (or_with->flags & ANYOF_CLASS) {
631 		for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
632 		    cl->classflags[i] |= or_with->classflags[i];
633 		cl->flags |= ANYOF_CLASS;
634 	    }
635 	}
636 	else { /* XXXX: logic is complicated, leave it along for a moment. */
637 	    cl_anything(pRExC_state, cl);
638 	}
639     }
640     if (or_with->flags & ANYOF_EOS)
641 	cl->flags |= ANYOF_EOS;
642 
643     if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
644 	ARG(cl) != ARG(or_with)) {
645 	cl->flags |= ANYOF_UNICODE_ALL;
646 	cl->flags &= ~ANYOF_UNICODE;
647     }
648     if (or_with->flags & ANYOF_UNICODE_ALL) {
649 	cl->flags |= ANYOF_UNICODE_ALL;
650 	cl->flags &= ~ANYOF_UNICODE;
651     }
652 }
653 
654 /*
655  * There are strange code-generation bugs caused on sparc64 by gcc-2.95.2.
656  * These need to be revisited when a newer toolchain becomes available.
657  */
658 #if defined(__sparc64__) && defined(__GNUC__)
659 #   if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
660 #       undef  SPARC64_GCC_WORKAROUND
661 #       define SPARC64_GCC_WORKAROUND 1
662 #   endif
663 #endif
664 
665 /* REx optimizer.  Converts nodes into quickier variants "in place".
666    Finds fixed substrings.  */
667 
668 /* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
669    to the position after last scanned or to NULL. */
670 
671 STATIC I32
672 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags)
673 			/* scanp: Start here (read-write). */
674 			/* deltap: Write maxlen-minlen here. */
675 			/* last: Stop before this one. */
676 {
677     I32 min = 0, pars = 0, code;
678     regnode *scan = *scanp, *next;
679     I32 delta = 0;
680     int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
681     int is_inf_internal = 0;		/* The studied chunk is infinite */
682     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
683     scan_data_t data_fake;
684     struct regnode_charclass_class and_with; /* Valid if flags & SCF_DO_STCLASS_OR */
685 
686     while (scan && OP(scan) != END && scan < last) {
687 	/* Peephole optimizer: */
688 
689 	if (PL_regkind[(U8)OP(scan)] == EXACT) {
690 	    /* Merge several consecutive EXACTish nodes into one. */
691 	    regnode *n = regnext(scan);
692 	    U32 stringok = 1;
693 #ifdef DEBUGGING
694 	    regnode *stop = scan;
695 #endif
696 
697 	    next = scan + NODE_SZ_STR(scan);
698 	    /* Skip NOTHING, merge EXACT*. */
699 	    while (n &&
700 		   ( PL_regkind[(U8)OP(n)] == NOTHING ||
701 		     (stringok && (OP(n) == OP(scan))))
702 		   && NEXT_OFF(n)
703 		   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
704 		if (OP(n) == TAIL || n > next)
705 		    stringok = 0;
706 		if (PL_regkind[(U8)OP(n)] == NOTHING) {
707 		    NEXT_OFF(scan) += NEXT_OFF(n);
708 		    next = n + NODE_STEP_REGNODE;
709 #ifdef DEBUGGING
710 		    if (stringok)
711 			stop = n;
712 #endif
713 		    n = regnext(n);
714 		}
715 		else if (stringok) {
716 		    const int oldl = STR_LEN(scan);
717 		    regnode *nnext = regnext(n);
718 
719 		    if (oldl + STR_LEN(n) > U8_MAX)
720 			break;
721 		    NEXT_OFF(scan) += NEXT_OFF(n);
722 		    STR_LEN(scan) += STR_LEN(n);
723 		    next = n + NODE_SZ_STR(n);
724 		    /* Now we can overwrite *n : */
725 		    Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
726 #ifdef DEBUGGING
727 		    stop = next - 1;
728 #endif
729 		    n = nnext;
730 		}
731 	    }
732 
733 	    if (UTF && OP(scan) == EXACTF && STR_LEN(scan) >= 6) {
734 /*
735   Two problematic code points in Unicode casefolding of EXACT nodes:
736 
737    U+0390 - GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
738    U+03B0 - GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
739 
740    which casefold to
741 
742    Unicode			UTF-8
743 
744    U+03B9 U+0308 U+0301		0xCE 0xB9 0xCC 0x88 0xCC 0x81
745    U+03C5 U+0308 U+0301		0xCF 0x85 0xCC 0x88 0xCC 0x81
746 
747    This means that in case-insensitive matching (or "loose matching",
748    as Unicode calls it), an EXACTF of length six (the UTF-8 encoded byte
749    length of the above casefolded versions) can match a target string
750    of length two (the byte length of UTF-8 encoded U+0390 or U+03B0).
751    This would rather mess up the minimum length computation.
752 
753    What we'll do is to look for the tail four bytes, and then peek
754    at the preceding two bytes to see whether we need to decrease
755    the minimum length by four (six minus two).
756 
757    Thanks to the design of UTF-8, there cannot be false matches:
758    A sequence of valid UTF-8 bytes cannot be a subsequence of
759    another valid sequence of UTF-8 bytes.
760 
761 */
762 		 char *s0 = STRING(scan), *s, *t;
763 		 char *s1 = s0 + STR_LEN(scan) - 1, *s2 = s1 - 4;
764 		 const char * const t0 = "\xcc\x88\xcc\x81";
765 		 const char * const t1 = t0 + 3;
766 
767 		 for (s = s0 + 2;
768 		      s < s2 && (t = ninstr(s, s1, t0, t1));
769 		      s = t + 4) {
770 		      if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
771 			  ((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
772 			   min -= 4;
773 		 }
774 	    }
775 
776 #ifdef DEBUGGING
777 	    /* Allow dumping */
778 	    n = scan + NODE_SZ_STR(scan);
779 	    while (n <= stop) {
780 		if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) {
781 		    OP(n) = OPTIMIZED;
782 		    NEXT_OFF(n) = 0;
783 		}
784 		n++;
785 	    }
786 #endif
787 	}
788 	/* Follow the next-chain of the current node and optimize
789 	   away all the NOTHINGs from it.  */
790 	if (OP(scan) != CURLYX) {
791 	    const int max = (reg_off_by_arg[OP(scan)]
792 		       ? I32_MAX
793 		       /* I32 may be smaller than U16 on CRAYs! */
794 		       : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
795 	    int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
796 	    int noff;
797 	    regnode *n = scan;
798 
799 	    /* Skip NOTHING and LONGJMP. */
800 	    while ((n = regnext(n))
801 		   && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
802 		       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
803 		   && off + noff < max)
804 		off += noff;
805 	    if (reg_off_by_arg[OP(scan)])
806 		ARG(scan) = off;
807 	    else
808 		NEXT_OFF(scan) = off;
809 	}
810 	/* The principal pseudo-switch.  Cannot be a switch, since we
811 	   look into several different things.  */
812 	if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
813 		   || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
814 	    next = regnext(scan);
815 	    code = OP(scan);
816 
817 	    if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
818 		I32 max1 = 0, min1 = I32_MAX, num = 0;
819 		struct regnode_charclass_class accum;
820 
821 		if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
822 		    scan_commit(pRExC_state, data); /* Cannot merge strings after this. */
823 		if (flags & SCF_DO_STCLASS)
824 		    cl_init_zero(pRExC_state, &accum);
825 		while (OP(scan) == code) {
826 		    I32 deltanext, minnext, f = 0, fake;
827 		    struct regnode_charclass_class this_class;
828 
829 		    num++;
830 		    data_fake.flags = 0;
831 		    if (data) {
832 			data_fake.whilem_c = data->whilem_c;
833 			data_fake.last_closep = data->last_closep;
834 		    }
835 		    else
836 			data_fake.last_closep = &fake;
837 		    next = regnext(scan);
838 		    scan = NEXTOPER(scan);
839 		    if (code != BRANCH)
840 			scan = NEXTOPER(scan);
841 		    if (flags & SCF_DO_STCLASS) {
842 			cl_init(pRExC_state, &this_class);
843 			data_fake.start_class = &this_class;
844 			f = SCF_DO_STCLASS_AND;
845 		    }
846 		    if (flags & SCF_WHILEM_VISITED_POS)
847 			f |= SCF_WHILEM_VISITED_POS;
848 		    /* we suppose the run is continuous, last=next...*/
849 		    minnext = study_chunk(pRExC_state, &scan, &deltanext,
850 					  next, &data_fake, f);
851 		    if (min1 > minnext)
852 			min1 = minnext;
853 		    if (max1 < minnext + deltanext)
854 			max1 = minnext + deltanext;
855 		    if (deltanext == I32_MAX)
856 			is_inf = is_inf_internal = 1;
857 		    scan = next;
858 		    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
859 			pars++;
860 		    if (data && (data_fake.flags & SF_HAS_EVAL))
861 			data->flags |= SF_HAS_EVAL;
862 		    if (data)
863 			data->whilem_c = data_fake.whilem_c;
864 		    if (flags & SCF_DO_STCLASS)
865 			cl_or(pRExC_state, &accum, &this_class);
866 		    if (code == SUSPEND)
867 			break;
868 		}
869 		if (code == IFTHEN && num < 2) /* Empty ELSE branch */
870 		    min1 = 0;
871 		if (flags & SCF_DO_SUBSTR) {
872 		    data->pos_min += min1;
873 		    data->pos_delta += max1 - min1;
874 		    if (max1 != min1 || is_inf)
875 			data->longest = &(data->longest_float);
876 		}
877 		min += min1;
878 		delta += max1 - min1;
879 		if (flags & SCF_DO_STCLASS_OR) {
880 		    cl_or(pRExC_state, data->start_class, &accum);
881 		    if (min1) {
882 			cl_and(data->start_class, &and_with);
883 			flags &= ~SCF_DO_STCLASS;
884 		    }
885 		}
886 		else if (flags & SCF_DO_STCLASS_AND) {
887 		    if (min1) {
888 			cl_and(data->start_class, &accum);
889 			flags &= ~SCF_DO_STCLASS;
890 		    }
891 		    else {
892 			/* Switch to OR mode: cache the old value of
893 			 * data->start_class */
894 			StructCopy(data->start_class, &and_with,
895 				   struct regnode_charclass_class);
896 			flags &= ~SCF_DO_STCLASS_AND;
897 			StructCopy(&accum, data->start_class,
898 				   struct regnode_charclass_class);
899 			flags |= SCF_DO_STCLASS_OR;
900 			data->start_class->flags |= ANYOF_EOS;
901 		    }
902 		}
903 
904 	    }
905 	    else if (code == BRANCHJ)	/* single branch is optimized. */
906 		scan = NEXTOPER(NEXTOPER(scan));
907 	    else			/* single branch is optimized. */
908 		scan = NEXTOPER(scan);
909 	    continue;
910 	}
911 	else if (OP(scan) == EXACT) {
912 	    I32 l = STR_LEN(scan);
913 	    UV uc = *((U8*)STRING(scan));
914 	    if (UTF) {
915 		const U8 * const s = (U8*)STRING(scan);
916 		l = utf8_length((U8 *)s, (U8 *)s + l);
917 		uc = utf8_to_uvchr((U8 *)s, NULL);
918 	    }
919 	    min += l;
920 	    if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
921 		/* The code below prefers earlier match for fixed
922 		   offset, later match for variable offset.  */
923 		if (data->last_end == -1) { /* Update the start info. */
924 		    data->last_start_min = data->pos_min;
925  		    data->last_start_max = is_inf
926  			? I32_MAX : data->pos_min + data->pos_delta;
927 		}
928 		sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
929 		{
930 		    SV * const sv = data->last_found;
931 		    MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
932 			mg_find(sv, PERL_MAGIC_utf8) : NULL;
933 		    if (mg && mg->mg_len >= 0)
934 			mg->mg_len += utf8_length((U8*)STRING(scan),
935 						  (U8*)STRING(scan)+STR_LEN(scan));
936 		}
937 		if (UTF)
938 		    SvUTF8_on(data->last_found);
939 		data->last_end = data->pos_min + l;
940 		data->pos_min += l; /* As in the first entry. */
941 		data->flags &= ~SF_BEFORE_EOL;
942 	    }
943 	    if (flags & SCF_DO_STCLASS_AND) {
944 		/* Check whether it is compatible with what we know already! */
945 		int compat = 1;
946 
947 		if (uc >= 0x100 ||
948 		    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
949 		    && !ANYOF_BITMAP_TEST(data->start_class, uc)
950 		    && (!(data->start_class->flags & ANYOF_FOLD)
951 			|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
952                     )
953 		    compat = 0;
954 		ANYOF_CLASS_ZERO(data->start_class);
955 		ANYOF_BITMAP_ZERO(data->start_class);
956 		if (compat)
957 		    ANYOF_BITMAP_SET(data->start_class, uc);
958 		data->start_class->flags &= ~ANYOF_EOS;
959 		if (uc < 0x100)
960 		  data->start_class->flags &= ~ANYOF_UNICODE_ALL;
961 	    }
962 	    else if (flags & SCF_DO_STCLASS_OR) {
963 		/* false positive possible if the class is case-folded */
964 		if (uc < 0x100)
965 		    ANYOF_BITMAP_SET(data->start_class, uc);
966 		else
967 		    data->start_class->flags |= ANYOF_UNICODE_ALL;
968 		data->start_class->flags &= ~ANYOF_EOS;
969 		cl_and(data->start_class, &and_with);
970 	    }
971 	    flags &= ~SCF_DO_STCLASS;
972 	}
973 	else if (PL_regkind[(U8)OP(scan)] == EXACT) { /* But OP != EXACT! */
974 	    I32 l = STR_LEN(scan);
975 	    UV uc = *((U8*)STRING(scan));
976 
977 	    /* Search for fixed substrings supports EXACT only. */
978 	    if (flags & SCF_DO_SUBSTR)
979 		scan_commit(pRExC_state, data);
980 	    if (UTF) {
981 		U8 *s = (U8 *)STRING(scan);
982 		l = utf8_length(s, s + l);
983 		uc = utf8_to_uvchr(s, NULL);
984 	    }
985 	    min += l;
986 	    if (data && (flags & SCF_DO_SUBSTR))
987 		data->pos_min += l;
988 	    if (flags & SCF_DO_STCLASS_AND) {
989 		/* Check whether it is compatible with what we know already! */
990 		int compat = 1;
991 
992 		if (uc >= 0x100 ||
993 		    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
994 		    && !ANYOF_BITMAP_TEST(data->start_class, uc)
995 		     && !ANYOF_BITMAP_TEST(data->start_class, PL_fold[uc])))
996 		    compat = 0;
997 		ANYOF_CLASS_ZERO(data->start_class);
998 		ANYOF_BITMAP_ZERO(data->start_class);
999 		if (compat) {
1000 		    ANYOF_BITMAP_SET(data->start_class, uc);
1001 		    data->start_class->flags &= ~ANYOF_EOS;
1002 		    data->start_class->flags |= ANYOF_FOLD;
1003 		    if (OP(scan) == EXACTFL)
1004 			data->start_class->flags |= ANYOF_LOCALE;
1005 		}
1006 	    }
1007 	    else if (flags & SCF_DO_STCLASS_OR) {
1008 		if (data->start_class->flags & ANYOF_FOLD) {
1009 		    /* false positive possible if the class is case-folded.
1010 		       Assume that the locale settings are the same... */
1011 		    if (uc < 0x100)
1012 			ANYOF_BITMAP_SET(data->start_class, uc);
1013 		    data->start_class->flags &= ~ANYOF_EOS;
1014 		}
1015 		cl_and(data->start_class, &and_with);
1016 	    }
1017 	    flags &= ~SCF_DO_STCLASS;
1018 	}
1019 	else if (strchr((const char*)PL_varies,OP(scan))) {
1020 	    I32 mincount, maxcount, minnext, deltanext, fl = 0;
1021 	    I32 f = flags, pos_before = 0;
1022 	    regnode *oscan = scan;
1023 	    struct regnode_charclass_class this_class;
1024 	    struct regnode_charclass_class *oclass = NULL;
1025 	    I32 next_is_eval = 0;
1026 
1027 	    switch (PL_regkind[(U8)OP(scan)]) {
1028 	    case WHILEM:		/* End of (?:...)* . */
1029 		scan = NEXTOPER(scan);
1030 		goto finish;
1031 	    case PLUS:
1032 		if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
1033 		    next = NEXTOPER(scan);
1034 		    if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
1035 			mincount = 1;
1036 			maxcount = REG_INFTY;
1037 			next = regnext(scan);
1038 			scan = NEXTOPER(scan);
1039 			goto do_curly;
1040 		    }
1041 		}
1042 		if (flags & SCF_DO_SUBSTR)
1043 		    data->pos_min++;
1044 		min++;
1045 		/* Fall through. */
1046 	    case STAR:
1047 		if (flags & SCF_DO_STCLASS) {
1048 		    mincount = 0;
1049 		    maxcount = REG_INFTY;
1050 		    next = regnext(scan);
1051 		    scan = NEXTOPER(scan);
1052 		    goto do_curly;
1053 		}
1054 		is_inf = is_inf_internal = 1;
1055 		scan = regnext(scan);
1056 		if (flags & SCF_DO_SUBSTR) {
1057 		    scan_commit(pRExC_state, data); /* Cannot extend fixed substrings */
1058 		    data->longest = &(data->longest_float);
1059 		}
1060 		goto optimize_curly_tail;
1061 	    case CURLY:
1062 		mincount = ARG1(scan);
1063 		maxcount = ARG2(scan);
1064 		next = regnext(scan);
1065 		if (OP(scan) == CURLYX) {
1066 		    I32 lp = (data ? *(data->last_closep) : 0);
1067 
1068 		    scan->flags = ((lp <= U8_MAX) ? lp : U8_MAX);
1069 		}
1070 		scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
1071 		next_is_eval = (OP(scan) == EVAL);
1072 	      do_curly:
1073 		if (flags & SCF_DO_SUBSTR) {
1074 		    if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
1075 		    pos_before = data->pos_min;
1076 		}
1077 		if (data) {
1078 		    fl = data->flags;
1079 		    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
1080 		    if (is_inf)
1081 			data->flags |= SF_IS_INF;
1082 		}
1083 		if (flags & SCF_DO_STCLASS) {
1084 		    cl_init(pRExC_state, &this_class);
1085 		    oclass = data->start_class;
1086 		    data->start_class = &this_class;
1087 		    f |= SCF_DO_STCLASS_AND;
1088 		    f &= ~SCF_DO_STCLASS_OR;
1089 		}
1090 		/* These are the cases when once a subexpression
1091 		   fails at a particular position, it cannot succeed
1092 		   even after backtracking at the enclosing scope.
1093 
1094 		   XXXX what if minimal match and we are at the
1095 		        initial run of {n,m}? */
1096 		if ((mincount != maxcount - 1) && (maxcount != REG_INFTY))
1097 		    f &= ~SCF_WHILEM_VISITED_POS;
1098 
1099 		/* This will finish on WHILEM, setting scan, or on NULL: */
1100 		minnext = study_chunk(pRExC_state, &scan, &deltanext, last, data,
1101 				      mincount == 0
1102 					? (f & ~SCF_DO_SUBSTR) : f);
1103 
1104 		if (flags & SCF_DO_STCLASS)
1105 		    data->start_class = oclass;
1106 		if (mincount == 0 || minnext == 0) {
1107 		    if (flags & SCF_DO_STCLASS_OR) {
1108 			cl_or(pRExC_state, data->start_class, &this_class);
1109 		    }
1110 		    else if (flags & SCF_DO_STCLASS_AND) {
1111 			/* Switch to OR mode: cache the old value of
1112 			 * data->start_class */
1113 			StructCopy(data->start_class, &and_with,
1114 				   struct regnode_charclass_class);
1115 			flags &= ~SCF_DO_STCLASS_AND;
1116 			StructCopy(&this_class, data->start_class,
1117 				   struct regnode_charclass_class);
1118 			flags |= SCF_DO_STCLASS_OR;
1119 			data->start_class->flags |= ANYOF_EOS;
1120 		    }
1121 		} else {		/* Non-zero len */
1122 		    if (flags & SCF_DO_STCLASS_OR) {
1123 			cl_or(pRExC_state, data->start_class, &this_class);
1124 			cl_and(data->start_class, &and_with);
1125 		    }
1126 		    else if (flags & SCF_DO_STCLASS_AND)
1127 			cl_and(data->start_class, &this_class);
1128 		    flags &= ~SCF_DO_STCLASS;
1129 		}
1130 		if (!scan) 		/* It was not CURLYX, but CURLY. */
1131 		    scan = next;
1132 		if ( /* ? quantifier ok, except for (?{ ... }) */
1133 		    (next_is_eval || !(mincount == 0 && maxcount == 1))
1134 		    && (minnext == 0) && (deltanext == 0)
1135 		    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
1136 		    && maxcount <= REG_INFTY/3 /* Complement check for big count */
1137 		    && ckWARN(WARN_REGEXP))
1138 		{
1139 		    vWARN(RExC_parse,
1140 			  "Quantifier unexpected on zero-length expression");
1141 		}
1142 
1143 		min += minnext * mincount;
1144 		is_inf_internal |= ((maxcount == REG_INFTY
1145 				     && (minnext + deltanext) > 0)
1146 				    || deltanext == I32_MAX);
1147 		is_inf |= is_inf_internal;
1148 		delta += (minnext + deltanext) * maxcount - minnext * mincount;
1149 
1150 		/* Try powerful optimization CURLYX => CURLYN. */
1151 		if (  OP(oscan) == CURLYX && data
1152 		      && data->flags & SF_IN_PAR
1153 		      && !(data->flags & SF_HAS_EVAL)
1154 		      && !deltanext && minnext == 1 ) {
1155 		    /* Try to optimize to CURLYN.  */
1156 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
1157 		    regnode *nxt1 = nxt;
1158 #ifdef DEBUGGING
1159 		    regnode *nxt2;
1160 #endif
1161 
1162 		    /* Skip open. */
1163 		    nxt = regnext(nxt);
1164 		    if (!strchr((const char*)PL_simple,OP(nxt))
1165 			&& !(PL_regkind[(U8)OP(nxt)] == EXACT
1166 			     && STR_LEN(nxt) == 1))
1167 			goto nogo;
1168 #ifdef DEBUGGING
1169 		    nxt2 = nxt;
1170 #endif
1171 		    nxt = regnext(nxt);
1172 		    if (OP(nxt) != CLOSE)
1173 			goto nogo;
1174 		    /* Now we know that nxt2 is the only contents: */
1175 		    oscan->flags = (U8)ARG(nxt);
1176 		    OP(oscan) = CURLYN;
1177 		    OP(nxt1) = NOTHING;	/* was OPEN. */
1178 #ifdef DEBUGGING
1179 		    OP(nxt1 + 1) = OPTIMIZED; /* was count. */
1180 		    NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
1181 		    NEXT_OFF(nxt2) = 0;	/* just for consistancy with CURLY. */
1182 		    OP(nxt) = OPTIMIZED;	/* was CLOSE. */
1183 		    OP(nxt + 1) = OPTIMIZED; /* was count. */
1184 		    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
1185 #endif
1186 		}
1187 	      nogo:
1188 
1189 		/* Try optimization CURLYX => CURLYM. */
1190 		if (  OP(oscan) == CURLYX && data
1191 		      && !(data->flags & SF_HAS_PAR)
1192 		      && !(data->flags & SF_HAS_EVAL)
1193 		      && !deltanext	/* atom is fixed width */
1194 		      && minnext != 0	/* CURLYM can't handle zero width */
1195 		) {
1196 		    /* XXXX How to optimize if data == 0? */
1197 		    /* Optimize to a simpler form.  */
1198 		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
1199 		    regnode *nxt2;
1200 
1201 		    OP(oscan) = CURLYM;
1202 		    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
1203 			    && (OP(nxt2) != WHILEM))
1204 			nxt = nxt2;
1205 		    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
1206 		    /* Need to optimize away parenths. */
1207 		    if (data->flags & SF_IN_PAR) {
1208 			/* Set the parenth number.  */
1209 			regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
1210 
1211 			if (OP(nxt) != CLOSE)
1212 			    FAIL("Panic opt close");
1213 			oscan->flags = (U8)ARG(nxt);
1214 			OP(nxt1) = OPTIMIZED;	/* was OPEN. */
1215 			OP(nxt) = OPTIMIZED;	/* was CLOSE. */
1216 #ifdef DEBUGGING
1217 			OP(nxt1 + 1) = OPTIMIZED; /* was count. */
1218 			OP(nxt + 1) = OPTIMIZED; /* was count. */
1219 			NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
1220 			NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
1221 #endif
1222 #if 0
1223 			while ( nxt1 && (OP(nxt1) != WHILEM)) {
1224 			    regnode *nnxt = regnext(nxt1);
1225 
1226 			    if (nnxt == nxt) {
1227 				if (reg_off_by_arg[OP(nxt1)])
1228 				    ARG_SET(nxt1, nxt2 - nxt1);
1229 				else if (nxt2 - nxt1 < U16_MAX)
1230 				    NEXT_OFF(nxt1) = nxt2 - nxt1;
1231 				else
1232 				    OP(nxt) = NOTHING;	/* Cannot beautify */
1233 			    }
1234 			    nxt1 = nnxt;
1235 			}
1236 #endif
1237 			/* Optimize again: */
1238 			study_chunk(pRExC_state, &nxt1, &deltanext, nxt,
1239 				    NULL, 0);
1240 		    }
1241 		    else
1242 			oscan->flags = 0;
1243 		}
1244 		else if ((OP(oscan) == CURLYX)
1245 			 && (flags & SCF_WHILEM_VISITED_POS)
1246 			 /* See the comment on a similar expression above.
1247 			    However, this time it not a subexpression
1248 			    we care about, but the expression itself. */
1249 			 && (maxcount == REG_INFTY)
1250 			 && data && ++data->whilem_c < 16) {
1251 		    /* This stays as CURLYX, we can put the count/of pair. */
1252 		    /* Find WHILEM (as in regexec.c) */
1253 		    regnode *nxt = oscan + NEXT_OFF(oscan);
1254 
1255 		    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
1256 			nxt += ARG(nxt);
1257 		    PREVOPER(nxt)->flags = (U8)(data->whilem_c
1258 			| (RExC_whilem_seen << 4)); /* On WHILEM */
1259 		}
1260 		if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
1261 		    pars++;
1262 		if (flags & SCF_DO_SUBSTR) {
1263 		    SV *last_str = Nullsv;
1264 		    int counted = mincount != 0;
1265 
1266 		    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
1267 #if defined(SPARC64_GCC_WORKAROUND)
1268 			I32 b = 0;
1269 			STRLEN l = 0;
1270 			const char *s = NULL;
1271 			I32 old = 0;
1272 
1273 			if (pos_before >= data->last_start_min)
1274 			    b = pos_before;
1275 			else
1276 			    b = data->last_start_min;
1277 
1278 			l = 0;
1279 			s = SvPV_const(data->last_found, l);
1280 			old = b - data->last_start_min;
1281 
1282 #else
1283 			I32 b = pos_before >= data->last_start_min
1284 			    ? pos_before : data->last_start_min;
1285 			STRLEN l;
1286 			const char *s = SvPV_const(data->last_found, l);
1287 			I32 old = b - data->last_start_min;
1288 #endif
1289 
1290 			if (UTF)
1291 			    old = utf8_hop((U8*)s, old) - (U8*)s;
1292 
1293 			l -= old;
1294 			/* Get the added string: */
1295 			last_str = newSVpvn(s  + old, l);
1296 			if (UTF)
1297 			    SvUTF8_on(last_str);
1298 			if (deltanext == 0 && pos_before == b) {
1299 			    /* What was added is a constant string */
1300 			    if (mincount > 1) {
1301 				SvGROW(last_str, (mincount * l) + 1);
1302 				repeatcpy(SvPVX(last_str) + l,
1303 					  SvPVX_const(last_str), l, mincount - 1);
1304 				SvCUR_set(last_str, SvCUR(last_str) * mincount);
1305 				/* Add additional parts. */
1306 				SvCUR_set(data->last_found,
1307 					  SvCUR(data->last_found) - l);
1308 				sv_catsv(data->last_found, last_str);
1309 				{
1310 				    SV * sv = data->last_found;
1311 				    MAGIC *mg =
1312 					SvUTF8(sv) && SvMAGICAL(sv) ?
1313 					mg_find(sv, PERL_MAGIC_utf8) : NULL;
1314 				    if (mg && mg->mg_len >= 0)
1315 					mg->mg_len += CHR_SVLEN(last_str);
1316 				}
1317 				data->last_end += l * (mincount - 1);
1318 			    }
1319 			} else {
1320 			    /* start offset must point into the last copy */
1321 			    data->last_start_min += minnext * (mincount - 1);
1322 			    data->last_start_max += is_inf ? I32_MAX
1323 				: (maxcount - 1) * (minnext + data->pos_delta);
1324 			}
1325 		    }
1326 		    /* It is counted once already... */
1327 		    data->pos_min += minnext * (mincount - counted);
1328 		    data->pos_delta += - counted * deltanext +
1329 			(minnext + deltanext) * maxcount - minnext * mincount;
1330 		    if (mincount != maxcount) {
1331 			 /* Cannot extend fixed substrings found inside
1332 			    the group.  */
1333 			scan_commit(pRExC_state,data);
1334 			if (mincount && last_str) {
1335 			    sv_setsv(data->last_found, last_str);
1336 			    data->last_end = data->pos_min;
1337 			    data->last_start_min =
1338 				data->pos_min - CHR_SVLEN(last_str);
1339 			    data->last_start_max = is_inf
1340 				? I32_MAX
1341 				: data->pos_min + data->pos_delta
1342 				- CHR_SVLEN(last_str);
1343 			}
1344 			data->longest = &(data->longest_float);
1345 		    }
1346 		    SvREFCNT_dec(last_str);
1347 		}
1348 		if (data && (fl & SF_HAS_EVAL))
1349 		    data->flags |= SF_HAS_EVAL;
1350 	      optimize_curly_tail:
1351 		if (OP(oscan) != CURLYX) {
1352 		    while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
1353 			   && NEXT_OFF(next))
1354 			NEXT_OFF(oscan) += NEXT_OFF(next);
1355 		}
1356 		continue;
1357 	    default:			/* REF and CLUMP only? */
1358 		if (flags & SCF_DO_SUBSTR) {
1359 		    scan_commit(pRExC_state,data);	/* Cannot expect anything... */
1360 		    data->longest = &(data->longest_float);
1361 		}
1362 		is_inf = is_inf_internal = 1;
1363 		if (flags & SCF_DO_STCLASS_OR)
1364 		    cl_anything(pRExC_state, data->start_class);
1365 		flags &= ~SCF_DO_STCLASS;
1366 		break;
1367 	    }
1368 	}
1369 	else if (strchr((const char*)PL_simple,OP(scan))) {
1370 	    int value = 0;
1371 
1372 	    if (flags & SCF_DO_SUBSTR) {
1373 		scan_commit(pRExC_state,data);
1374 		data->pos_min++;
1375 	    }
1376 	    min++;
1377 	    if (flags & SCF_DO_STCLASS) {
1378 		data->start_class->flags &= ~ANYOF_EOS;	/* No match on empty */
1379 
1380 		/* Some of the logic below assumes that switching
1381 		   locale on will only add false positives. */
1382 		switch (PL_regkind[(U8)OP(scan)]) {
1383 		case SANY:
1384 		default:
1385 		  do_default:
1386 		    /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
1387 		    if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
1388 			cl_anything(pRExC_state, data->start_class);
1389 		    break;
1390 		case REG_ANY:
1391 		    if (OP(scan) == SANY)
1392 			goto do_default;
1393 		    if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
1394 			value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
1395 				 || (data->start_class->flags & ANYOF_CLASS));
1396 			cl_anything(pRExC_state, data->start_class);
1397 		    }
1398 		    if (flags & SCF_DO_STCLASS_AND || !value)
1399 			ANYOF_BITMAP_CLEAR(data->start_class,'\n');
1400 		    break;
1401 		case ANYOF:
1402 		    if (flags & SCF_DO_STCLASS_AND)
1403 			cl_and(data->start_class,
1404 			       (struct regnode_charclass_class*)scan);
1405 		    else
1406 			cl_or(pRExC_state, data->start_class,
1407 			      (struct regnode_charclass_class*)scan);
1408 		    break;
1409 		case ALNUM:
1410 		    if (flags & SCF_DO_STCLASS_AND) {
1411 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1412 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
1413 			    for (value = 0; value < 256; value++)
1414 				if (!isALNUM(value))
1415 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1416 			}
1417 		    }
1418 		    else {
1419 			if (data->start_class->flags & ANYOF_LOCALE)
1420 			    ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
1421 			else {
1422 			    for (value = 0; value < 256; value++)
1423 				if (isALNUM(value))
1424 				    ANYOF_BITMAP_SET(data->start_class, value);
1425 			}
1426 		    }
1427 		    break;
1428 		case ALNUML:
1429 		    if (flags & SCF_DO_STCLASS_AND) {
1430 			if (data->start_class->flags & ANYOF_LOCALE)
1431 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NALNUM);
1432 		    }
1433 		    else {
1434 			ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
1435 			data->start_class->flags |= ANYOF_LOCALE;
1436 		    }
1437 		    break;
1438 		case NALNUM:
1439 		    if (flags & SCF_DO_STCLASS_AND) {
1440 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1441 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
1442 			    for (value = 0; value < 256; value++)
1443 				if (isALNUM(value))
1444 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1445 			}
1446 		    }
1447 		    else {
1448 			if (data->start_class->flags & ANYOF_LOCALE)
1449 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
1450 			else {
1451 			    for (value = 0; value < 256; value++)
1452 				if (!isALNUM(value))
1453 				    ANYOF_BITMAP_SET(data->start_class, value);
1454 			}
1455 		    }
1456 		    break;
1457 		case NALNUML:
1458 		    if (flags & SCF_DO_STCLASS_AND) {
1459 			if (data->start_class->flags & ANYOF_LOCALE)
1460 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_ALNUM);
1461 		    }
1462 		    else {
1463 			data->start_class->flags |= ANYOF_LOCALE;
1464 			ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
1465 		    }
1466 		    break;
1467 		case SPACE:
1468 		    if (flags & SCF_DO_STCLASS_AND) {
1469 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1470 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
1471 			    for (value = 0; value < 256; value++)
1472 				if (!isSPACE(value))
1473 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1474 			}
1475 		    }
1476 		    else {
1477 			if (data->start_class->flags & ANYOF_LOCALE)
1478 			    ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
1479 			else {
1480 			    for (value = 0; value < 256; value++)
1481 				if (isSPACE(value))
1482 				    ANYOF_BITMAP_SET(data->start_class, value);
1483 			}
1484 		    }
1485 		    break;
1486 		case SPACEL:
1487 		    if (flags & SCF_DO_STCLASS_AND) {
1488 			if (data->start_class->flags & ANYOF_LOCALE)
1489 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NSPACE);
1490 		    }
1491 		    else {
1492 			data->start_class->flags |= ANYOF_LOCALE;
1493 			ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
1494 		    }
1495 		    break;
1496 		case NSPACE:
1497 		    if (flags & SCF_DO_STCLASS_AND) {
1498 			if (!(data->start_class->flags & ANYOF_LOCALE)) {
1499 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
1500 			    for (value = 0; value < 256; value++)
1501 				if (isSPACE(value))
1502 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1503 			}
1504 		    }
1505 		    else {
1506 			if (data->start_class->flags & ANYOF_LOCALE)
1507 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
1508 			else {
1509 			    for (value = 0; value < 256; value++)
1510 				if (!isSPACE(value))
1511 				    ANYOF_BITMAP_SET(data->start_class, value);
1512 			}
1513 		    }
1514 		    break;
1515 		case NSPACEL:
1516 		    if (flags & SCF_DO_STCLASS_AND) {
1517 			if (data->start_class->flags & ANYOF_LOCALE) {
1518 			    ANYOF_CLASS_CLEAR(data->start_class,ANYOF_SPACE);
1519 			    for (value = 0; value < 256; value++)
1520 				if (!isSPACE(value))
1521 				    ANYOF_BITMAP_CLEAR(data->start_class, value);
1522 			}
1523 		    }
1524 		    else {
1525 			data->start_class->flags |= ANYOF_LOCALE;
1526 			ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
1527 		    }
1528 		    break;
1529 		case DIGIT:
1530 		    if (flags & SCF_DO_STCLASS_AND) {
1531 			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_NDIGIT);
1532 			for (value = 0; value < 256; value++)
1533 			    if (!isDIGIT(value))
1534 				ANYOF_BITMAP_CLEAR(data->start_class, value);
1535 		    }
1536 		    else {
1537 			if (data->start_class->flags & ANYOF_LOCALE)
1538 			    ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
1539 			else {
1540 			    for (value = 0; value < 256; value++)
1541 				if (isDIGIT(value))
1542 				    ANYOF_BITMAP_SET(data->start_class, value);
1543 			}
1544 		    }
1545 		    break;
1546 		case NDIGIT:
1547 		    if (flags & SCF_DO_STCLASS_AND) {
1548 			ANYOF_CLASS_CLEAR(data->start_class,ANYOF_DIGIT);
1549 			for (value = 0; value < 256; value++)
1550 			    if (isDIGIT(value))
1551 				ANYOF_BITMAP_CLEAR(data->start_class, value);
1552 		    }
1553 		    else {
1554 			if (data->start_class->flags & ANYOF_LOCALE)
1555 			    ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
1556 			else {
1557 			    for (value = 0; value < 256; value++)
1558 				if (!isDIGIT(value))
1559 				    ANYOF_BITMAP_SET(data->start_class, value);
1560 			}
1561 		    }
1562 		    break;
1563 		}
1564 		if (flags & SCF_DO_STCLASS_OR)
1565 		    cl_and(data->start_class, &and_with);
1566 		flags &= ~SCF_DO_STCLASS;
1567 	    }
1568 	}
1569 	else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
1570 	    data->flags |= (OP(scan) == MEOL
1571 			    ? SF_BEFORE_MEOL
1572 			    : SF_BEFORE_SEOL);
1573 	}
1574 	else if (  PL_regkind[(U8)OP(scan)] == BRANCHJ
1575 		 /* Lookbehind, or need to calculate parens/evals/stclass: */
1576 		   && (scan->flags || data || (flags & SCF_DO_STCLASS))
1577 		   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
1578 	    /* Lookahead/lookbehind */
1579 	    I32 deltanext, minnext, fake = 0;
1580 	    regnode *nscan;
1581 	    struct regnode_charclass_class intrnl;
1582 	    int f = 0;
1583 
1584 	    data_fake.flags = 0;
1585 	    if (data) {
1586 		data_fake.whilem_c = data->whilem_c;
1587 		data_fake.last_closep = data->last_closep;
1588 	    }
1589 	    else
1590 		data_fake.last_closep = &fake;
1591 	    if ( flags & SCF_DO_STCLASS && !scan->flags
1592 		 && OP(scan) == IFMATCH ) { /* Lookahead */
1593 		cl_init(pRExC_state, &intrnl);
1594 		data_fake.start_class = &intrnl;
1595 		f |= SCF_DO_STCLASS_AND;
1596 	    }
1597 	    if (flags & SCF_WHILEM_VISITED_POS)
1598 		f |= SCF_WHILEM_VISITED_POS;
1599 	    next = regnext(scan);
1600 	    nscan = NEXTOPER(NEXTOPER(scan));
1601 	    minnext = study_chunk(pRExC_state, &nscan, &deltanext, last, &data_fake, f);
1602 	    if (scan->flags) {
1603 		if (deltanext) {
1604 		    vFAIL("Variable length lookbehind not implemented");
1605 		}
1606 		else if (minnext > U8_MAX) {
1607 		    vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
1608 		}
1609 		scan->flags = (U8)minnext;
1610 	    }
1611 	    if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
1612 		pars++;
1613 	    if (data && (data_fake.flags & SF_HAS_EVAL))
1614 		data->flags |= SF_HAS_EVAL;
1615 	    if (data)
1616 		data->whilem_c = data_fake.whilem_c;
1617 	    if (f & SCF_DO_STCLASS_AND) {
1618 		const int was = (data->start_class->flags & ANYOF_EOS);
1619 
1620 		cl_and(data->start_class, &intrnl);
1621 		if (was)
1622 		    data->start_class->flags |= ANYOF_EOS;
1623 	    }
1624 	}
1625 	else if (OP(scan) == OPEN) {
1626 	    pars++;
1627 	}
1628 	else if (OP(scan) == CLOSE) {
1629 	    if ((I32)ARG(scan) == is_par) {
1630 		next = regnext(scan);
1631 
1632 		if ( next && (OP(next) != WHILEM) && next < last)
1633 		    is_par = 0;		/* Disable optimization */
1634 	    }
1635 	    if (data)
1636 		*(data->last_closep) = ARG(scan);
1637 	}
1638 	else if (OP(scan) == EVAL) {
1639 		if (data)
1640 		    data->flags |= SF_HAS_EVAL;
1641 	}
1642 	else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
1643 		if (flags & SCF_DO_SUBSTR) {
1644 		    scan_commit(pRExC_state,data);
1645 		    data->longest = &(data->longest_float);
1646 		}
1647 		is_inf = is_inf_internal = 1;
1648 		if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
1649 		    cl_anything(pRExC_state, data->start_class);
1650 		flags &= ~SCF_DO_STCLASS;
1651 	}
1652 	/* Else: zero-length, ignore. */
1653 	scan = regnext(scan);
1654     }
1655 
1656   finish:
1657     *scanp = scan;
1658     *deltap = is_inf_internal ? I32_MAX : delta;
1659     if (flags & SCF_DO_SUBSTR && is_inf)
1660 	data->pos_delta = I32_MAX - data->pos_min;
1661     if (is_par > U8_MAX)
1662 	is_par = 0;
1663     if (is_par && pars==1 && data) {
1664 	data->flags |= SF_IN_PAR;
1665 	data->flags &= ~SF_HAS_PAR;
1666     }
1667     else if (pars && data) {
1668 	data->flags |= SF_HAS_PAR;
1669 	data->flags &= ~SF_IN_PAR;
1670     }
1671     if (flags & SCF_DO_STCLASS_OR)
1672 	cl_and(data->start_class, &and_with);
1673     return min;
1674 }
1675 
1676 STATIC I32
1677 S_add_data(pTHX_ RExC_state_t *pRExC_state, I32 n, const char *s)
1678 {
1679     if (RExC_rx->data) {
1680 	Renewc(RExC_rx->data,
1681 	       sizeof(*RExC_rx->data) + sizeof(void*) * (RExC_rx->data->count + n - 1),
1682 	       char, struct reg_data);
1683 	Renew(RExC_rx->data->what, RExC_rx->data->count + n, U8);
1684 	RExC_rx->data->count += n;
1685     }
1686     else {
1687 	Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1),
1688 	     char, struct reg_data);
1689 	Newx(RExC_rx->data->what, n, U8);
1690 	RExC_rx->data->count = n;
1691     }
1692     Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
1693     return RExC_rx->data->count - n;
1694 }
1695 
1696 void
1697 Perl_reginitcolors(pTHX)
1698 {
1699     const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
1700     if (s) {
1701 	char *t = savepv(s);
1702 	int i = 0;
1703 	PL_colors[0] = t;
1704 	while (++i < 6) {
1705 	    t = strchr(t, '\t');
1706 	    if (t) {
1707 		*t = '\0';
1708 		PL_colors[i] = ++t;
1709 	    }
1710 	    else
1711 		PL_colors[i] = t = (char *)"";
1712 	}
1713     } else {
1714 	int i = 0;
1715 	while (i < 6)
1716 	    PL_colors[i++] = (char *)"";
1717     }
1718     PL_colorset = 1;
1719 }
1720 
1721 
1722 /*
1723  - pregcomp - compile a regular expression into internal code
1724  *
1725  * We can't allocate space until we know how big the compiled form will be,
1726  * but we can't compile it (and thus know how big it is) until we've got a
1727  * place to put the code.  So we cheat:  we compile it twice, once with code
1728  * generation turned off and size counting turned on, and once "for real".
1729  * This also means that we don't allocate space until we are sure that the
1730  * thing really will compile successfully, and we never have to move the
1731  * code and thus invalidate pointers into it.  (Note that it has to be in
1732  * one piece because free() must be able to free it all.) [NB: not true in perl]
1733  *
1734  * Beware that the optimization-preparation code in here knows about some
1735  * of the structure of the compiled regexp.  [I'll say.]
1736  */
1737 regexp *
1738 Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
1739 {
1740     register regexp *r;
1741     regnode *scan;
1742     regnode *first;
1743     I32 flags;
1744     I32 minlen = 0;
1745     I32 sawplus = 0;
1746     I32 sawopen = 0;
1747     scan_data_t data;
1748     RExC_state_t RExC_state;
1749     RExC_state_t *pRExC_state = &RExC_state;
1750 
1751     if (exp == NULL)
1752 	FAIL("NULL regexp argument");
1753 
1754     RExC_orig_utf8 = RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
1755 
1756     DEBUG_r({
1757 	 if (!PL_colorset) reginitcolors();
1758 	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
1759 		       PL_colors[4],PL_colors[5],PL_colors[0],
1760 		       (int)(xend - exp), exp, PL_colors[1]);
1761     });
1762 
1763 redo_first_pass:
1764     RExC_precomp = exp;
1765     RExC_flags = pm->op_pmflags;
1766     RExC_sawback = 0;
1767 
1768     RExC_seen = 0;
1769     RExC_seen_zerolen = *exp == '^' ? -1 : 0;
1770     RExC_seen_evals = 0;
1771     RExC_extralen = 0;
1772 
1773     /* First pass: determine size, legality. */
1774     RExC_parse = exp;
1775     RExC_start = exp;
1776     RExC_end = xend;
1777     RExC_naughty = 0;
1778     RExC_npar = 1;
1779     RExC_size = 0L;
1780     RExC_emit = &PL_regdummy;
1781     RExC_whilem_seen = 0;
1782 #if 0 /* REGC() is (currently) a NOP at the first pass.
1783        * Clever compilers notice this and complain. --jhi */
1784     REGC((U8)REG_MAGIC, (char*)RExC_emit);
1785 #endif
1786     if (reg(pRExC_state, 0, &flags) == NULL) {
1787 	RExC_precomp = Nullch;
1788 	return(NULL);
1789     }
1790     if (RExC_utf8 && !RExC_orig_utf8) {
1791     	STRLEN len = xend-exp;
1792     	DEBUG_r(PerlIO_printf(Perl_debug_log,
1793 	   "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
1794 	exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
1795 	xend = exp + len;
1796 	RExC_orig_utf8 = RExC_utf8;
1797 	SAVEFREEPV(exp);
1798 	goto redo_first_pass;
1799     }
1800 
1801     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
1802 
1803     /* Small enough for pointer-storage convention?
1804        If extralen==0, this means that we will not need long jumps. */
1805     if (RExC_size >= 0x10000L && RExC_extralen)
1806         RExC_size += RExC_extralen;
1807     else
1808 	RExC_extralen = 0;
1809     if (RExC_whilem_seen > 15)
1810 	RExC_whilem_seen = 15;
1811 
1812     /* Allocate space and initialize. */
1813     Newxc(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode),
1814 	 char, regexp);
1815     if (r == NULL)
1816 	FAIL("Regexp out of space");
1817 
1818 #ifdef DEBUGGING
1819     /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
1820     Zero(r, sizeof(regexp) + (unsigned)RExC_size * sizeof(regnode), char);
1821 #endif
1822     r->refcnt = 1;
1823     r->prelen = xend - exp;
1824     r->precomp = savepvn(RExC_precomp, r->prelen);
1825     r->subbeg = NULL;
1826     r->reganch = pm->op_pmflags & PMf_COMPILETIME;
1827     r->nparens = RExC_npar - 1;	/* set early to validate backrefs */
1828 
1829     r->substrs = 0;			/* Useful during FAIL. */
1830     r->startp = 0;			/* Useful during FAIL. */
1831     r->endp = 0;			/* Useful during FAIL. */
1832 
1833     Newxz(r->offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
1834     if (r->offsets) {
1835 	r->offsets[0] = RExC_size;
1836     }
1837     DEBUG_r(PerlIO_printf(Perl_debug_log,
1838                           "%s %"UVuf" bytes for offset annotations.\n",
1839                           r->offsets ? "Got" : "Couldn't get",
1840                           (UV)((2*RExC_size+1) * sizeof(U32))));
1841 
1842     RExC_rx = r;
1843 
1844     /* Second pass: emit code. */
1845     RExC_flags = pm->op_pmflags;	/* don't let top level (?i) bleed */
1846     RExC_parse = exp;
1847     RExC_end = xend;
1848     RExC_naughty = 0;
1849     RExC_npar = 1;
1850     RExC_emit_start = r->program;
1851     RExC_emit = r->program;
1852     /* Store the count of eval-groups for security checks: */
1853     RExC_emit->next_off = (U16)((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
1854     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
1855     r->data = 0;
1856     if (reg(pRExC_state, 0, &flags) == NULL)
1857 	return(NULL);
1858 
1859     /* Dig out information for optimizations. */
1860     r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
1861     pm->op_pmflags = RExC_flags;
1862     if (UTF)
1863         r->reganch |= ROPT_UTF8;	/* Unicode in it? */
1864     r->regstclass = NULL;
1865     if (RExC_naughty >= 10)	/* Probably an expensive pattern. */
1866 	r->reganch |= ROPT_NAUGHTY;
1867     scan = r->program + 1;		/* First BRANCH. */
1868 
1869     /* XXXX To minimize changes to RE engine we always allocate
1870        3-units-long substrs field. */
1871     Newxz(r->substrs, 1, struct reg_substr_data);
1872 
1873     StructCopy(&zero_scan_data, &data, scan_data_t);
1874     /* XXXX Should not we check for something else?  Usually it is OPEN1... */
1875     if (OP(scan) != BRANCH) {	/* Only one top-level choice. */
1876 	I32 fake;
1877 	STRLEN longest_float_length, longest_fixed_length;
1878 	struct regnode_charclass_class ch_class;
1879 	int stclass_flag;
1880 	I32 last_close = 0;
1881 
1882 	first = scan;
1883 	/* Skip introductions and multiplicators >= 1. */
1884 	while ((OP(first) == OPEN && (sawopen = 1)) ||
1885 	       /* An OR of *one* alternative - should not happen now. */
1886 	    (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
1887 	    (OP(first) == PLUS) ||
1888 	    (OP(first) == MINMOD) ||
1889 	       /* An {n,m} with n>0 */
1890 	    (PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
1891 		if (OP(first) == PLUS)
1892 		    sawplus = 1;
1893 		else
1894 		    first += regarglen[(U8)OP(first)];
1895 		first = NEXTOPER(first);
1896 	}
1897 
1898 	/* Starting-point info. */
1899       again:
1900 	if (PL_regkind[(U8)OP(first)] == EXACT) {
1901 	    if (OP(first) == EXACT)
1902 	        ;	/* Empty, get anchored substr later. */
1903 	    else if ((OP(first) == EXACTF || OP(first) == EXACTFL))
1904 		r->regstclass = first;
1905 	}
1906 	else if (strchr((const char*)PL_simple,OP(first)))
1907 	    r->regstclass = first;
1908 	else if (PL_regkind[(U8)OP(first)] == BOUND ||
1909 		 PL_regkind[(U8)OP(first)] == NBOUND)
1910 	    r->regstclass = first;
1911 	else if (PL_regkind[(U8)OP(first)] == BOL) {
1912 	    r->reganch |= (OP(first) == MBOL
1913 			   ? ROPT_ANCH_MBOL
1914 			   : (OP(first) == SBOL
1915 			      ? ROPT_ANCH_SBOL
1916 			      : ROPT_ANCH_BOL));
1917 	    first = NEXTOPER(first);
1918 	    goto again;
1919 	}
1920 	else if (OP(first) == GPOS) {
1921 	    r->reganch |= ROPT_ANCH_GPOS;
1922 	    first = NEXTOPER(first);
1923 	    goto again;
1924 	}
1925 	else if (!sawopen && (OP(first) == STAR &&
1926 	    PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
1927 	    !(r->reganch & ROPT_ANCH) )
1928 	{
1929 	    /* turn .* into ^.* with an implied $*=1 */
1930 	    const int type =
1931 		(OP(NEXTOPER(first)) == REG_ANY)
1932 		    ? ROPT_ANCH_MBOL
1933 		    : ROPT_ANCH_SBOL;
1934 	    r->reganch |= type | ROPT_IMPLICIT;
1935 	    first = NEXTOPER(first);
1936 	    goto again;
1937 	}
1938 	if (sawplus && (!sawopen || !RExC_sawback)
1939 	    && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
1940 	    /* x+ must match at the 1st pos of run of x's */
1941 	    r->reganch |= ROPT_SKIP;
1942 
1943 	/* Scan is after the zeroth branch, first is atomic matcher. */
1944 	DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
1945 			      (IV)(first - scan + 1)));
1946 	/*
1947 	* If there's something expensive in the r.e., find the
1948 	* longest literal string that must appear and make it the
1949 	* regmust.  Resolve ties in favor of later strings, since
1950 	* the regstart check works with the beginning of the r.e.
1951 	* and avoiding duplication strengthens checking.  Not a
1952 	* strong reason, but sufficient in the absence of others.
1953 	* [Now we resolve ties in favor of the earlier string if
1954 	* it happens that c_offset_min has been invalidated, since the
1955 	* earlier string may buy us something the later one won't.]
1956 	*/
1957 	minlen = 0;
1958 
1959 	data.longest_fixed = newSVpvn("",0);
1960 	data.longest_float = newSVpvn("",0);
1961 	data.last_found = newSVpvn("",0);
1962 	data.longest = &(data.longest_fixed);
1963 	first = scan;
1964 	if (!r->regstclass) {
1965 	    cl_init(pRExC_state, &ch_class);
1966 	    data.start_class = &ch_class;
1967 	    stclass_flag = SCF_DO_STCLASS_AND;
1968 	} else				/* XXXX Check for BOUND? */
1969 	    stclass_flag = 0;
1970 	data.last_closep = &last_close;
1971 
1972 	minlen = study_chunk(pRExC_state, &first, &fake, scan + RExC_size, /* Up to end */
1973 			     &data, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag);
1974 	if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
1975 	     && data.last_start_min == 0 && data.last_end > 0
1976 	     && !RExC_seen_zerolen
1977 	     && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
1978 	    r->reganch |= ROPT_CHECK_ALL;
1979 	scan_commit(pRExC_state, &data);
1980 	SvREFCNT_dec(data.last_found);
1981 
1982 	longest_float_length = CHR_SVLEN(data.longest_float);
1983 	if (longest_float_length
1984 	    || (data.flags & SF_FL_BEFORE_EOL
1985 		&& (!(data.flags & SF_FL_BEFORE_MEOL)
1986 		    || (RExC_flags & PMf_MULTILINE)))) {
1987 	    int t;
1988 
1989 	    if (SvCUR(data.longest_fixed) 			/* ok to leave SvCUR */
1990 		&& data.offset_fixed == data.offset_float_min
1991 		&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
1992 		    goto remove_float;		/* As in (a)+. */
1993 
1994 	    if (SvUTF8(data.longest_float)) {
1995 		r->float_utf8 = data.longest_float;
1996 		r->float_substr = Nullsv;
1997 	    } else {
1998 		r->float_substr = data.longest_float;
1999 		r->float_utf8 = Nullsv;
2000 	    }
2001 	    r->float_min_offset = data.offset_float_min;
2002 	    r->float_max_offset = data.offset_float_max;
2003 	    t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
2004 		       && (!(data.flags & SF_FL_BEFORE_MEOL)
2005 			   || (RExC_flags & PMf_MULTILINE)));
2006 	    fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
2007 	}
2008 	else {
2009 	  remove_float:
2010 	    r->float_substr = r->float_utf8 = Nullsv;
2011 	    SvREFCNT_dec(data.longest_float);
2012 	    longest_float_length = 0;
2013 	}
2014 
2015 	longest_fixed_length = CHR_SVLEN(data.longest_fixed);
2016 	if (longest_fixed_length
2017 	    || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
2018 		&& (!(data.flags & SF_FIX_BEFORE_MEOL)
2019 		    || (RExC_flags & PMf_MULTILINE)))) {
2020 	    int t;
2021 
2022 	    if (SvUTF8(data.longest_fixed)) {
2023 		r->anchored_utf8 = data.longest_fixed;
2024 		r->anchored_substr = Nullsv;
2025 	    } else {
2026 		r->anchored_substr = data.longest_fixed;
2027 		r->anchored_utf8 = Nullsv;
2028 	    }
2029 	    r->anchored_offset = data.offset_fixed;
2030 	    t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
2031 		 && (!(data.flags & SF_FIX_BEFORE_MEOL)
2032 		     || (RExC_flags & PMf_MULTILINE)));
2033 	    fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
2034 	}
2035 	else {
2036 	    r->anchored_substr = r->anchored_utf8 = Nullsv;
2037 	    SvREFCNT_dec(data.longest_fixed);
2038 	    longest_fixed_length = 0;
2039 	}
2040 	if (r->regstclass
2041 	    && (OP(r->regstclass) == REG_ANY || OP(r->regstclass) == SANY))
2042 	    r->regstclass = NULL;
2043 	if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
2044 	    && stclass_flag
2045 	    && !(data.start_class->flags & ANYOF_EOS)
2046 	    && !cl_is_anything(data.start_class))
2047 	{
2048 	    const I32 n = add_data(pRExC_state, 1, "f");
2049 
2050 	    Newx(RExC_rx->data->data[n], 1,
2051 		struct regnode_charclass_class);
2052 	    StructCopy(data.start_class,
2053 		       (struct regnode_charclass_class*)RExC_rx->data->data[n],
2054 		       struct regnode_charclass_class);
2055 	    r->regstclass = (regnode*)RExC_rx->data->data[n];
2056 	    r->reganch &= ~ROPT_SKIP;	/* Used in find_byclass(). */
2057 	    PL_regdata = r->data; /* for regprop() */
2058 	    DEBUG_r({ SV *sv = sv_newmortal();
2059 	              regprop(sv, (regnode*)data.start_class);
2060 		      PerlIO_printf(Perl_debug_log,
2061 				    "synthetic stclass \"%s\".\n",
2062 				    SvPVX_const(sv));});
2063 	}
2064 
2065 	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
2066 	if (longest_fixed_length > longest_float_length) {
2067 	    r->check_substr = r->anchored_substr;
2068 	    r->check_utf8 = r->anchored_utf8;
2069 	    r->check_offset_min = r->check_offset_max = r->anchored_offset;
2070 	    if (r->reganch & ROPT_ANCH_SINGLE)
2071 		r->reganch |= ROPT_NOSCAN;
2072 	}
2073 	else {
2074 	    r->check_substr = r->float_substr;
2075 	    r->check_utf8 = r->float_utf8;
2076 	    r->check_offset_min = data.offset_float_min;
2077 	    r->check_offset_max = data.offset_float_max;
2078 	}
2079 	/* XXXX Currently intuiting is not compatible with ANCH_GPOS.
2080 	   This should be changed ASAP!  */
2081 	if ((r->check_substr || r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) {
2082 	    r->reganch |= RE_USE_INTUIT;
2083 	    if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
2084 		r->reganch |= RE_INTUIT_TAIL;
2085 	}
2086     }
2087     else {
2088 	/* Several toplevels. Best we can is to set minlen. */
2089 	I32 fake;
2090 	struct regnode_charclass_class ch_class;
2091 	I32 last_close = 0;
2092 
2093 	DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
2094 	scan = r->program + 1;
2095 	cl_init(pRExC_state, &ch_class);
2096 	data.start_class = &ch_class;
2097 	data.last_closep = &last_close;
2098 	minlen = study_chunk(pRExC_state, &scan, &fake, scan + RExC_size, &data, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS);
2099 	r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
2100 		= r->float_substr = r->float_utf8 = Nullsv;
2101 	if (!(data.start_class->flags & ANYOF_EOS)
2102 	    && !cl_is_anything(data.start_class))
2103 	{
2104 	    const I32 n = add_data(pRExC_state, 1, "f");
2105 
2106 	    Newx(RExC_rx->data->data[n], 1,
2107 		struct regnode_charclass_class);
2108 	    StructCopy(data.start_class,
2109 		       (struct regnode_charclass_class*)RExC_rx->data->data[n],
2110 		       struct regnode_charclass_class);
2111 	    r->regstclass = (regnode*)RExC_rx->data->data[n];
2112 	    r->reganch &= ~ROPT_SKIP;	/* Used in find_byclass(). */
2113 	    DEBUG_r({ SV* sv = sv_newmortal();
2114 	              regprop(sv, (regnode*)data.start_class);
2115 		      PerlIO_printf(Perl_debug_log,
2116 				    "synthetic stclass \"%s\".\n",
2117 				    SvPVX_const(sv));});
2118 	}
2119     }
2120 
2121     r->minlen = minlen;
2122     if (RExC_seen & REG_SEEN_GPOS)
2123 	r->reganch |= ROPT_GPOS_SEEN;
2124     if (RExC_seen & REG_SEEN_LOOKBEHIND)
2125 	r->reganch |= ROPT_LOOKBEHIND_SEEN;
2126     if (RExC_seen & REG_SEEN_EVAL)
2127 	r->reganch |= ROPT_EVAL_SEEN;
2128     if (RExC_seen & REG_SEEN_CANY)
2129 	r->reganch |= ROPT_CANY_SEEN;
2130     Newxz(r->startp, RExC_npar, I32);
2131     Newxz(r->endp, RExC_npar, I32);
2132     PL_regdata = r->data; /* for regprop() */
2133     DEBUG_r(regdump(r));
2134     return(r);
2135 }
2136 
2137 /*
2138  - reg - regular expression, i.e. main body or parenthesized thing
2139  *
2140  * Caller must absorb opening parenthesis.
2141  *
2142  * Combining parenthesis handling with the base level of regular expression
2143  * is a trifle forced, but the need to tie the tails of the branches to what
2144  * follows makes it hard to avoid.
2145  */
2146 STATIC regnode *
2147 S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
2148     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
2149 {
2150     register regnode *ret;		/* Will be the head of the group. */
2151     register regnode *br;
2152     register regnode *lastbr;
2153     register regnode *ender = 0;
2154     register I32 parno = 0;
2155     I32 flags, oregflags = RExC_flags, have_branch = 0, open = 0;
2156 
2157     /* for (?g), (?gc), and (?o) warnings; warning
2158        about (?c) will warn about (?g) -- japhy    */
2159 
2160     I32 wastedflags = 0x00,
2161         wasted_o    = 0x01,
2162         wasted_g    = 0x02,
2163         wasted_gc   = 0x02 | 0x04,
2164         wasted_c    = 0x04;
2165 
2166     char * parse_start = RExC_parse; /* MJD */
2167     char * const oregcomp_parse = RExC_parse;
2168     char c;
2169 
2170     *flagp = 0;				/* Tentatively. */
2171 
2172 
2173     /* Make an OPEN node, if parenthesized. */
2174     if (paren) {
2175 	if (*RExC_parse == '?') { /* (?...) */
2176 	    U32 posflags = 0, negflags = 0;
2177 	    U32 *flagsp = &posflags;
2178 	    int logical = 0;
2179 	    const char * const seqstart = RExC_parse;
2180 
2181 	    RExC_parse++;
2182 	    paren = *RExC_parse++;
2183 	    ret = NULL;			/* For look-ahead/behind. */
2184 	    switch (paren) {
2185 	    case '<':           /* (?<...) */
2186 		RExC_seen |= REG_SEEN_LOOKBEHIND;
2187 		if (*RExC_parse == '!')
2188 		    paren = ',';
2189 		if (*RExC_parse != '=' && *RExC_parse != '!')
2190 		    goto unknown;
2191 		RExC_parse++;
2192 	    case '=':           /* (?=...) */
2193 	    case '!':           /* (?!...) */
2194 		RExC_seen_zerolen++;
2195 	    case ':':           /* (?:...) */
2196 	    case '>':           /* (?>...) */
2197 		break;
2198 	    case '$':           /* (?$...) */
2199 	    case '@':           /* (?@...) */
2200 		vFAIL2("Sequence (?%c...) not implemented", (int)paren);
2201 		break;
2202 	    case '#':           /* (?#...) */
2203 		while (*RExC_parse && *RExC_parse != ')')
2204 		    RExC_parse++;
2205 		if (*RExC_parse != ')')
2206 		    FAIL("Sequence (?#... not terminated");
2207 		nextchar(pRExC_state);
2208 		*flagp = TRYAGAIN;
2209 		return NULL;
2210 	    case 'p':           /* (?p...) */
2211 		if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
2212 		    vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
2213 		/* FALL THROUGH*/
2214 	    case '?':           /* (??...) */
2215 		logical = 1;
2216 		if (*RExC_parse != '{')
2217 		    goto unknown;
2218 		paren = *RExC_parse++;
2219 		/* FALL THROUGH */
2220 	    case '{':           /* (?{...}) */
2221 	    {
2222 		I32 count = 1, n = 0;
2223 		char c;
2224 		char *s = RExC_parse;
2225 		SV *sv;
2226 		OP_4tree *sop, *rop;
2227 
2228 		RExC_seen_zerolen++;
2229 		RExC_seen |= REG_SEEN_EVAL;
2230 		while (count && (c = *RExC_parse)) {
2231 		    if (c == '\\' && RExC_parse[1])
2232 			RExC_parse++;
2233 		    else if (c == '{')
2234 			count++;
2235 		    else if (c == '}')
2236 			count--;
2237 		    RExC_parse++;
2238 		}
2239 		if (*RExC_parse != ')')
2240 		{
2241 		    RExC_parse = s;
2242 		    vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
2243 		}
2244 		if (!SIZE_ONLY) {
2245 		    PAD *pad;
2246 
2247 		    if (RExC_parse - 1 - s)
2248 			sv = newSVpvn(s, RExC_parse - 1 - s);
2249 		    else
2250 			sv = newSVpvn("", 0);
2251 
2252 		    ENTER;
2253 		    Perl_save_re_context(aTHX);
2254 		    rop = sv_compile_2op(sv, &sop, "re", &pad);
2255 		    sop->op_private |= OPpREFCOUNTED;
2256 		    /* re_dup will OpREFCNT_inc */
2257 		    OpREFCNT_set(sop, 1);
2258 		    LEAVE;
2259 
2260 		    n = add_data(pRExC_state, 3, "nop");
2261 		    RExC_rx->data->data[n] = (void*)rop;
2262 		    RExC_rx->data->data[n+1] = (void*)sop;
2263 		    RExC_rx->data->data[n+2] = (void*)pad;
2264 		    SvREFCNT_dec(sv);
2265 		}
2266 		else {						/* First pass */
2267 		    if (PL_reginterp_cnt < ++RExC_seen_evals
2268 			&& IN_PERL_RUNTIME)
2269 			/* No compiled RE interpolated, has runtime
2270 			   components ===> unsafe.  */
2271 			FAIL("Eval-group not allowed at runtime, use re 'eval'");
2272 		    if (PL_tainting && PL_tainted)
2273 			FAIL("Eval-group in insecure regular expression");
2274 		}
2275 
2276 		nextchar(pRExC_state);
2277 		if (logical) {
2278 		    ret = reg_node(pRExC_state, LOGICAL);
2279 		    if (!SIZE_ONLY)
2280 			ret->flags = 2;
2281 		    regtail(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
2282                     /* deal with the length of this later - MJD */
2283 		    return ret;
2284 		}
2285 		ret = reganode(pRExC_state, EVAL, n);
2286 		Set_Node_Length(ret, RExC_parse - parse_start + 1);
2287 		Set_Node_Offset(ret, parse_start);
2288 		return ret;
2289 	    }
2290 	    case '(':           /* (?(?{...})...) and (?(?=...)...) */
2291 	    {
2292 		if (RExC_parse[0] == '?') {        /* (?(?...)) */
2293 		    if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
2294 			|| RExC_parse[1] == '<'
2295 			|| RExC_parse[1] == '{') { /* Lookahead or eval. */
2296 			I32 flag;
2297 
2298 			ret = reg_node(pRExC_state, LOGICAL);
2299 			if (!SIZE_ONLY)
2300 			    ret->flags = 1;
2301 			regtail(pRExC_state, ret, reg(pRExC_state, 1, &flag));
2302 			goto insert_if;
2303 		    }
2304 		}
2305 		else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
2306                     /* (?(1)...) */
2307 		    parno = atoi(RExC_parse++);
2308 
2309 		    while (isDIGIT(*RExC_parse))
2310 			RExC_parse++;
2311                     ret = reganode(pRExC_state, GROUPP, parno);
2312 
2313 		    if ((c = *nextchar(pRExC_state)) != ')')
2314 			vFAIL("Switch condition not recognized");
2315 		  insert_if:
2316 		    regtail(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
2317 		    br = regbranch(pRExC_state, &flags, 1);
2318 		    if (br == NULL)
2319 			br = reganode(pRExC_state, LONGJMP, 0);
2320 		    else
2321 			regtail(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
2322 		    c = *nextchar(pRExC_state);
2323 		    if (flags&HASWIDTH)
2324 			*flagp |= HASWIDTH;
2325 		    if (c == '|') {
2326 			lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one for optimizer. */
2327 			regbranch(pRExC_state, &flags, 1);
2328 			regtail(pRExC_state, ret, lastbr);
2329 		 	if (flags&HASWIDTH)
2330 			    *flagp |= HASWIDTH;
2331 			c = *nextchar(pRExC_state);
2332 		    }
2333 		    else
2334 			lastbr = NULL;
2335 		    if (c != ')')
2336 			vFAIL("Switch (?(condition)... contains too many branches");
2337 		    ender = reg_node(pRExC_state, TAIL);
2338 		    regtail(pRExC_state, br, ender);
2339 		    if (lastbr) {
2340 			regtail(pRExC_state, lastbr, ender);
2341 			regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender);
2342 		    }
2343 		    else
2344 			regtail(pRExC_state, ret, ender);
2345 		    return ret;
2346 		}
2347 		else {
2348 		    vFAIL2("Unknown switch condition (?(%.2s", RExC_parse);
2349 		}
2350 	    }
2351             case 0:
2352 		RExC_parse--; /* for vFAIL to print correctly */
2353                 vFAIL("Sequence (? incomplete");
2354                 break;
2355 	    default:
2356 		--RExC_parse;
2357 	      parse_flags:      /* (?i) */
2358 		while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
2359 		    /* (?g), (?gc) and (?o) are useless here
2360 		       and must be globally applied -- japhy */
2361 
2362 		    if (*RExC_parse == 'o' || *RExC_parse == 'g') {
2363 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
2364 			    I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g;
2365 			    if (! (wastedflags & wflagbit) ) {
2366 				wastedflags |= wflagbit;
2367 				vWARN5(
2368 				    RExC_parse + 1,
2369 				    "Useless (%s%c) - %suse /%c modifier",
2370 				    flagsp == &negflags ? "?-" : "?",
2371 				    *RExC_parse,
2372 				    flagsp == &negflags ? "don't " : "",
2373 				    *RExC_parse
2374 				);
2375 			    }
2376 			}
2377 		    }
2378 		    else if (*RExC_parse == 'c') {
2379 			if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
2380 			    if (! (wastedflags & wasted_c) ) {
2381 				wastedflags |= wasted_gc;
2382 				vWARN3(
2383 				    RExC_parse + 1,
2384 				    "Useless (%sc) - %suse /gc modifier",
2385 				    flagsp == &negflags ? "?-" : "?",
2386 				    flagsp == &negflags ? "don't " : ""
2387 				);
2388 			    }
2389 			}
2390 		    }
2391 		    else { pmflag(flagsp, *RExC_parse); }
2392 
2393 		    ++RExC_parse;
2394 		}
2395 		if (*RExC_parse == '-') {
2396 		    flagsp = &negflags;
2397 		    wastedflags = 0;  /* reset so (?g-c) warns twice */
2398 		    ++RExC_parse;
2399 		    goto parse_flags;
2400 		}
2401 		RExC_flags |= posflags;
2402 		RExC_flags &= ~negflags;
2403 		if (*RExC_parse == ':') {
2404 		    RExC_parse++;
2405 		    paren = ':';
2406 		    break;
2407 		}
2408 	      unknown:
2409 		if (*RExC_parse != ')') {
2410 		    RExC_parse++;
2411 		    vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
2412 		}
2413 		nextchar(pRExC_state);
2414 		*flagp = TRYAGAIN;
2415 		return NULL;
2416 	    }
2417 	}
2418 	else {                  /* (...) */
2419 	    parno = RExC_npar;
2420 	    RExC_npar++;
2421 	    ret = reganode(pRExC_state, OPEN, parno);
2422             Set_Node_Length(ret, 1); /* MJD */
2423             Set_Node_Offset(ret, RExC_parse); /* MJD */
2424 	    open = 1;
2425 	}
2426     }
2427     else                        /* ! paren */
2428 	ret = NULL;
2429 
2430     /* Pick up the branches, linking them together. */
2431     parse_start = RExC_parse;   /* MJD */
2432     br = regbranch(pRExC_state, &flags, 1);
2433     /*     branch_len = (paren != 0); */
2434 
2435     if (br == NULL)
2436 	return(NULL);
2437     if (*RExC_parse == '|') {
2438 	if (!SIZE_ONLY && RExC_extralen) {
2439 	    reginsert(pRExC_state, BRANCHJ, br);
2440 	}
2441 	else {                  /* MJD */
2442 	    reginsert(pRExC_state, BRANCH, br);
2443             Set_Node_Length(br, paren != 0);
2444             Set_Node_Offset_To_R(br-RExC_emit_start, parse_start-RExC_start);
2445         }
2446 	have_branch = 1;
2447 	if (SIZE_ONLY)
2448 	    RExC_extralen += 1;		/* For BRANCHJ-BRANCH. */
2449     }
2450     else if (paren == ':') {
2451 	*flagp |= flags&SIMPLE;
2452     }
2453     if (open) {				/* Starts with OPEN. */
2454 	regtail(pRExC_state, ret, br);		/* OPEN -> first. */
2455     }
2456     else if (paren != '?')		/* Not Conditional */
2457 	ret = br;
2458     *flagp |= flags & (SPSTART | HASWIDTH);
2459     lastbr = br;
2460     while (*RExC_parse == '|') {
2461 	if (!SIZE_ONLY && RExC_extralen) {
2462 	    ender = reganode(pRExC_state, LONGJMP,0);
2463 	    regtail(pRExC_state, NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
2464 	}
2465 	if (SIZE_ONLY)
2466 	    RExC_extralen += 2;		/* Account for LONGJMP. */
2467 	nextchar(pRExC_state);
2468 	br = regbranch(pRExC_state, &flags, 0);
2469 
2470 	if (br == NULL)
2471 	    return(NULL);
2472 	regtail(pRExC_state, lastbr, br);		/* BRANCH -> BRANCH. */
2473 	lastbr = br;
2474 	if (flags&HASWIDTH)
2475 	    *flagp |= HASWIDTH;
2476 	*flagp |= flags&SPSTART;
2477     }
2478 
2479     if (have_branch || paren != ':') {
2480 	/* Make a closing node, and hook it on the end. */
2481 	switch (paren) {
2482 	case ':':
2483 	    ender = reg_node(pRExC_state, TAIL);
2484 	    break;
2485 	case 1:
2486 	    ender = reganode(pRExC_state, CLOSE, parno);
2487             Set_Node_Offset(ender,RExC_parse+1); /* MJD */
2488             Set_Node_Length(ender,1); /* MJD */
2489 	    break;
2490 	case '<':
2491 	case ',':
2492 	case '=':
2493 	case '!':
2494 	    *flagp &= ~HASWIDTH;
2495 	    /* FALL THROUGH */
2496 	case '>':
2497 	    ender = reg_node(pRExC_state, SUCCEED);
2498 	    break;
2499 	case 0:
2500 	    ender = reg_node(pRExC_state, END);
2501 	    break;
2502 	}
2503 	regtail(pRExC_state, lastbr, ender);
2504 
2505 	if (have_branch) {
2506 	    /* Hook the tails of the branches to the closing node. */
2507 	    for (br = ret; br != NULL; br = regnext(br)) {
2508 		regoptail(pRExC_state, br, ender);
2509 	    }
2510 	}
2511     }
2512 
2513     {
2514         const char *p;
2515         static const char parens[] = "=!<,>";
2516 
2517 	if (paren && (p = strchr(parens, paren))) {
2518 	    U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
2519 	    int flag = (p - parens) > 1;
2520 
2521 	    if (paren == '>')
2522 		node = SUSPEND, flag = 0;
2523 	    reginsert(pRExC_state, node,ret);
2524 	    Set_Node_Cur_Length(ret);
2525 	    Set_Node_Offset(ret, parse_start + 1);
2526 	    ret->flags = flag;
2527 	    regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL));
2528 	}
2529     }
2530 
2531     /* Check for proper termination. */
2532     if (paren) {
2533 	RExC_flags = oregflags;
2534 	if (RExC_parse >= RExC_end || *nextchar(pRExC_state) != ')') {
2535 	    RExC_parse = oregcomp_parse;
2536 	    vFAIL("Unmatched (");
2537 	}
2538     }
2539     else if (!paren && RExC_parse < RExC_end) {
2540 	if (*RExC_parse == ')') {
2541 	    RExC_parse++;
2542 	    vFAIL("Unmatched )");
2543 	}
2544 	else
2545 	    FAIL("Junk on end of regexp");	/* "Can't happen". */
2546 	/* NOTREACHED */
2547     }
2548 
2549     return(ret);
2550 }
2551 
2552 /*
2553  - regbranch - one alternative of an | operator
2554  *
2555  * Implements the concatenation operator.
2556  */
2557 STATIC regnode *
2558 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first)
2559 {
2560     register regnode *ret;
2561     register regnode *chain = NULL;
2562     register regnode *latest;
2563     I32 flags = 0, c = 0;
2564 
2565     if (first)
2566 	ret = NULL;
2567     else {
2568 	if (!SIZE_ONLY && RExC_extralen)
2569 	    ret = reganode(pRExC_state, BRANCHJ,0);
2570 	else {
2571 	    ret = reg_node(pRExC_state, BRANCH);
2572             Set_Node_Length(ret, 1);
2573         }
2574     }
2575 
2576     if (!first && SIZE_ONLY)
2577 	RExC_extralen += 1;			/* BRANCHJ */
2578 
2579     *flagp = WORST;			/* Tentatively. */
2580 
2581     RExC_parse--;
2582     nextchar(pRExC_state);
2583     while (RExC_parse < RExC_end && *RExC_parse != '|' && *RExC_parse != ')') {
2584 	flags &= ~TRYAGAIN;
2585 	latest = regpiece(pRExC_state, &flags);
2586 	if (latest == NULL) {
2587 	    if (flags & TRYAGAIN)
2588 		continue;
2589 	    return(NULL);
2590 	}
2591 	else if (ret == NULL)
2592 	    ret = latest;
2593 	*flagp |= flags&HASWIDTH;
2594 	if (chain == NULL) 	/* First piece. */
2595 	    *flagp |= flags&SPSTART;
2596 	else {
2597 	    RExC_naughty++;
2598 	    regtail(pRExC_state, chain, latest);
2599 	}
2600 	chain = latest;
2601 	c++;
2602     }
2603     if (chain == NULL) {	/* Loop ran zero times. */
2604 	chain = reg_node(pRExC_state, NOTHING);
2605 	if (ret == NULL)
2606 	    ret = chain;
2607     }
2608     if (c == 1) {
2609 	*flagp |= flags&SIMPLE;
2610     }
2611 
2612     return(ret);
2613 }
2614 
2615 /*
2616  - regpiece - something followed by possible [*+?]
2617  *
2618  * Note that the branching code sequences used for ? and the general cases
2619  * of * and + are somewhat optimized:  they use the same NOTHING node as
2620  * both the endmarker for their branch list and the body of the last branch.
2621  * It might seem that this node could be dispensed with entirely, but the
2622  * endmarker role is not redundant.
2623  */
2624 STATIC regnode *
2625 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
2626 {
2627     register regnode *ret;
2628     register char op;
2629     register char *next;
2630     I32 flags;
2631     const char * const origparse = RExC_parse;
2632     char *maxpos;
2633     I32 min;
2634     I32 max = REG_INFTY;
2635     char *parse_start;
2636 
2637     ret = regatom(pRExC_state, &flags);
2638     if (ret == NULL) {
2639 	if (flags & TRYAGAIN)
2640 	    *flagp |= TRYAGAIN;
2641 	return(NULL);
2642     }
2643 
2644     op = *RExC_parse;
2645 
2646     if (op == '{' && regcurly(RExC_parse)) {
2647         parse_start = RExC_parse; /* MJD */
2648 	next = RExC_parse + 1;
2649 	maxpos = Nullch;
2650 	while (isDIGIT(*next) || *next == ',') {
2651 	    if (*next == ',') {
2652 		if (maxpos)
2653 		    break;
2654 		else
2655 		    maxpos = next;
2656 	    }
2657 	    next++;
2658 	}
2659 	if (*next == '}') {		/* got one */
2660 	    if (!maxpos)
2661 		maxpos = next;
2662 	    RExC_parse++;
2663 	    min = atoi(RExC_parse);
2664 	    if (*maxpos == ',')
2665 		maxpos++;
2666 	    else
2667 		maxpos = RExC_parse;
2668 	    max = atoi(maxpos);
2669 	    if (!max && *maxpos != '0')
2670 		max = REG_INFTY;		/* meaning "infinity" */
2671 	    else if (max >= REG_INFTY)
2672 		vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
2673 	    RExC_parse = next;
2674 	    nextchar(pRExC_state);
2675 
2676 	do_curly:
2677 	    if ((flags&SIMPLE)) {
2678 		RExC_naughty += 2 + RExC_naughty / 2;
2679 		reginsert(pRExC_state, CURLY, ret);
2680                 Set_Node_Offset(ret, parse_start+1); /* MJD */
2681                 Set_Node_Cur_Length(ret);
2682 	    }
2683 	    else {
2684 		regnode *w = reg_node(pRExC_state, WHILEM);
2685 
2686 		w->flags = 0;
2687 		regtail(pRExC_state, ret, w);
2688 		if (!SIZE_ONLY && RExC_extralen) {
2689 		    reginsert(pRExC_state, LONGJMP,ret);
2690 		    reginsert(pRExC_state, NOTHING,ret);
2691 		    NEXT_OFF(ret) = 3;	/* Go over LONGJMP. */
2692 		}
2693 		reginsert(pRExC_state, CURLYX,ret);
2694                                 /* MJD hk */
2695                 Set_Node_Offset(ret, parse_start+1);
2696                 Set_Node_Length(ret,
2697                                 op == '{' ? (RExC_parse - parse_start) : 1);
2698 
2699 		if (!SIZE_ONLY && RExC_extralen)
2700 		    NEXT_OFF(ret) = 3;	/* Go over NOTHING to LONGJMP. */
2701 		regtail(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
2702 		if (SIZE_ONLY)
2703 		    RExC_whilem_seen++, RExC_extralen += 3;
2704 		RExC_naughty += 4 + RExC_naughty;	/* compound interest */
2705 	    }
2706 	    ret->flags = 0;
2707 
2708 	    if (min > 0)
2709 		*flagp = WORST;
2710 	    if (max > 0)
2711 		*flagp |= HASWIDTH;
2712 	    if (max && max < min)
2713 		vFAIL("Can't do {n,m} with n > m");
2714 	    if (!SIZE_ONLY) {
2715 		ARG1_SET(ret, (U16)min);
2716 		ARG2_SET(ret, (U16)max);
2717 	    }
2718 
2719 	    goto nest_check;
2720 	}
2721     }
2722 
2723     if (!ISMULT1(op)) {
2724 	*flagp = flags;
2725 	return(ret);
2726     }
2727 
2728 #if 0				/* Now runtime fix should be reliable. */
2729 
2730     /* if this is reinstated, don't forget to put this back into perldiag:
2731 
2732 	    =item Regexp *+ operand could be empty at {#} in regex m/%s/
2733 
2734 	   (F) The part of the regexp subject to either the * or + quantifier
2735            could match an empty string. The {#} shows in the regular
2736            expression about where the problem was discovered.
2737 
2738     */
2739 
2740     if (!(flags&HASWIDTH) && op != '?')
2741       vFAIL("Regexp *+ operand could be empty");
2742 #endif
2743 
2744     parse_start = RExC_parse;
2745     nextchar(pRExC_state);
2746 
2747     *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
2748 
2749     if (op == '*' && (flags&SIMPLE)) {
2750 	reginsert(pRExC_state, STAR, ret);
2751 	ret->flags = 0;
2752 	RExC_naughty += 4;
2753     }
2754     else if (op == '*') {
2755 	min = 0;
2756 	goto do_curly;
2757     }
2758     else if (op == '+' && (flags&SIMPLE)) {
2759 	reginsert(pRExC_state, PLUS, ret);
2760 	ret->flags = 0;
2761 	RExC_naughty += 3;
2762     }
2763     else if (op == '+') {
2764 	min = 1;
2765 	goto do_curly;
2766     }
2767     else if (op == '?') {
2768 	min = 0; max = 1;
2769 	goto do_curly;
2770     }
2771   nest_check:
2772     if (!SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
2773 	vWARN3(RExC_parse,
2774 	       "%.*s matches null string many times",
2775 	       RExC_parse - origparse,
2776 	       origparse);
2777     }
2778 
2779     if (*RExC_parse == '?') {
2780 	nextchar(pRExC_state);
2781 	reginsert(pRExC_state, MINMOD, ret);
2782 	regtail(pRExC_state, ret, ret + NODE_STEP_REGNODE);
2783     }
2784     if (ISMULT2(RExC_parse)) {
2785 	RExC_parse++;
2786 	vFAIL("Nested quantifiers");
2787     }
2788 
2789     return(ret);
2790 }
2791 
2792 /*
2793  - regatom - the lowest level
2794  *
2795  * Optimization:  gobbles an entire sequence of ordinary characters so that
2796  * it can turn them into a single node, which is smaller to store and
2797  * faster to run.  Backslashed characters are exceptions, each becoming a
2798  * separate node; the code is simpler that way and it's not worth fixing.
2799  *
2800  * [Yes, it is worth fixing, some scripts can run twice the speed.] */
2801 STATIC regnode *
2802 S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
2803 {
2804     register regnode *ret = 0;
2805     I32 flags;
2806     char *parse_start = RExC_parse;
2807 
2808     *flagp = WORST;		/* Tentatively. */
2809 
2810 tryagain:
2811     switch (*RExC_parse) {
2812     case '^':
2813 	RExC_seen_zerolen++;
2814 	nextchar(pRExC_state);
2815 	if (RExC_flags & PMf_MULTILINE)
2816 	    ret = reg_node(pRExC_state, MBOL);
2817 	else if (RExC_flags & PMf_SINGLELINE)
2818 	    ret = reg_node(pRExC_state, SBOL);
2819 	else
2820 	    ret = reg_node(pRExC_state, BOL);
2821         Set_Node_Length(ret, 1); /* MJD */
2822 	break;
2823     case '$':
2824 	nextchar(pRExC_state);
2825 	if (*RExC_parse)
2826 	    RExC_seen_zerolen++;
2827 	if (RExC_flags & PMf_MULTILINE)
2828 	    ret = reg_node(pRExC_state, MEOL);
2829 	else if (RExC_flags & PMf_SINGLELINE)
2830 	    ret = reg_node(pRExC_state, SEOL);
2831 	else
2832 	    ret = reg_node(pRExC_state, EOL);
2833         Set_Node_Length(ret, 1); /* MJD */
2834 	break;
2835     case '.':
2836 	nextchar(pRExC_state);
2837 	if (RExC_flags & PMf_SINGLELINE)
2838 	    ret = reg_node(pRExC_state, SANY);
2839 	else
2840 	    ret = reg_node(pRExC_state, REG_ANY);
2841 	*flagp |= HASWIDTH|SIMPLE;
2842 	RExC_naughty++;
2843         Set_Node_Length(ret, 1); /* MJD */
2844 	break;
2845     case '[':
2846     {
2847 	char *oregcomp_parse = ++RExC_parse;
2848 	ret = regclass(pRExC_state);
2849 	if (*RExC_parse != ']') {
2850 	    RExC_parse = oregcomp_parse;
2851 	    vFAIL("Unmatched [");
2852 	}
2853 	nextchar(pRExC_state);
2854 	*flagp |= HASWIDTH|SIMPLE;
2855         Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
2856 	break;
2857     }
2858     case '(':
2859 	nextchar(pRExC_state);
2860 	ret = reg(pRExC_state, 1, &flags);
2861 	if (ret == NULL) {
2862 		if (flags & TRYAGAIN) {
2863 		    if (RExC_parse == RExC_end) {
2864 			 /* Make parent create an empty node if needed. */
2865 			*flagp |= TRYAGAIN;
2866 			return(NULL);
2867 		    }
2868 		    goto tryagain;
2869 		}
2870 		return(NULL);
2871 	}
2872 	*flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
2873 	break;
2874     case '|':
2875     case ')':
2876 	if (flags & TRYAGAIN) {
2877 	    *flagp |= TRYAGAIN;
2878 	    return NULL;
2879 	}
2880 	vFAIL("Internal urp");
2881 				/* Supposed to be caught earlier. */
2882 	break;
2883     case '{':
2884 	if (!regcurly(RExC_parse)) {
2885 	    RExC_parse++;
2886 	    goto defchar;
2887 	}
2888 	/* FALL THROUGH */
2889     case '?':
2890     case '+':
2891     case '*':
2892 	RExC_parse++;
2893 	vFAIL("Quantifier follows nothing");
2894 	break;
2895     case '\\':
2896 	switch (*++RExC_parse) {
2897 	case 'A':
2898 	    RExC_seen_zerolen++;
2899 	    ret = reg_node(pRExC_state, SBOL);
2900 	    *flagp |= SIMPLE;
2901 	    nextchar(pRExC_state);
2902             Set_Node_Length(ret, 2); /* MJD */
2903 	    break;
2904 	case 'G':
2905 	    ret = reg_node(pRExC_state, GPOS);
2906 	    RExC_seen |= REG_SEEN_GPOS;
2907 	    *flagp |= SIMPLE;
2908 	    nextchar(pRExC_state);
2909             Set_Node_Length(ret, 2); /* MJD */
2910 	    break;
2911 	case 'Z':
2912 	    ret = reg_node(pRExC_state, SEOL);
2913 	    *flagp |= SIMPLE;
2914 	    RExC_seen_zerolen++;		/* Do not optimize RE away */
2915 	    nextchar(pRExC_state);
2916 	    break;
2917 	case 'z':
2918 	    ret = reg_node(pRExC_state, EOS);
2919 	    *flagp |= SIMPLE;
2920 	    RExC_seen_zerolen++;		/* Do not optimize RE away */
2921 	    nextchar(pRExC_state);
2922             Set_Node_Length(ret, 2); /* MJD */
2923 	    break;
2924 	case 'C':
2925 	    ret = reg_node(pRExC_state, CANY);
2926 	    RExC_seen |= REG_SEEN_CANY;
2927 	    *flagp |= HASWIDTH|SIMPLE;
2928 	    nextchar(pRExC_state);
2929             Set_Node_Length(ret, 2); /* MJD */
2930 	    break;
2931 	case 'X':
2932 	    ret = reg_node(pRExC_state, CLUMP);
2933 	    *flagp |= HASWIDTH;
2934 	    nextchar(pRExC_state);
2935             Set_Node_Length(ret, 2); /* MJD */
2936 	    break;
2937 	case 'w':
2938 	    ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML     : ALNUM));
2939 	    *flagp |= HASWIDTH|SIMPLE;
2940 	    nextchar(pRExC_state);
2941             Set_Node_Length(ret, 2); /* MJD */
2942 	    break;
2943 	case 'W':
2944 	    ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML    : NALNUM));
2945 	    *flagp |= HASWIDTH|SIMPLE;
2946 	    nextchar(pRExC_state);
2947             Set_Node_Length(ret, 2); /* MJD */
2948 	    break;
2949 	case 'b':
2950 	    RExC_seen_zerolen++;
2951 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
2952 	    ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL     : BOUND));
2953 	    *flagp |= SIMPLE;
2954 	    nextchar(pRExC_state);
2955             Set_Node_Length(ret, 2); /* MJD */
2956 	    break;
2957 	case 'B':
2958 	    RExC_seen_zerolen++;
2959 	    RExC_seen |= REG_SEEN_LOOKBEHIND;
2960 	    ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL    : NBOUND));
2961 	    *flagp |= SIMPLE;
2962 	    nextchar(pRExC_state);
2963             Set_Node_Length(ret, 2); /* MJD */
2964 	    break;
2965 	case 's':
2966 	    ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL     : SPACE));
2967 	    *flagp |= HASWIDTH|SIMPLE;
2968 	    nextchar(pRExC_state);
2969             Set_Node_Length(ret, 2); /* MJD */
2970 	    break;
2971 	case 'S':
2972 	    ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL    : NSPACE));
2973 	    *flagp |= HASWIDTH|SIMPLE;
2974 	    nextchar(pRExC_state);
2975             Set_Node_Length(ret, 2); /* MJD */
2976 	    break;
2977 	case 'd':
2978 	    ret = reg_node(pRExC_state, DIGIT);
2979 	    *flagp |= HASWIDTH|SIMPLE;
2980 	    nextchar(pRExC_state);
2981             Set_Node_Length(ret, 2); /* MJD */
2982 	    break;
2983 	case 'D':
2984 	    ret = reg_node(pRExC_state, NDIGIT);
2985 	    *flagp |= HASWIDTH|SIMPLE;
2986 	    nextchar(pRExC_state);
2987             Set_Node_Length(ret, 2); /* MJD */
2988 	    break;
2989 	case 'p':
2990 	case 'P':
2991 	    {
2992 		char* oldregxend = RExC_end;
2993 		char* parse_start = RExC_parse - 2;
2994 
2995 		if (RExC_parse[1] == '{') {
2996 		  /* a lovely hack--pretend we saw [\pX] instead */
2997 		    RExC_end = strchr(RExC_parse, '}');
2998 		    if (!RExC_end) {
2999 		        U8 c = (U8)*RExC_parse;
3000 			RExC_parse += 2;
3001 			RExC_end = oldregxend;
3002 			vFAIL2("Missing right brace on \\%c{}", c);
3003 		    }
3004 		    RExC_end++;
3005 		}
3006 		else {
3007 		    RExC_end = RExC_parse + 2;
3008 		    if (RExC_end > oldregxend)
3009 			RExC_end = oldregxend;
3010 		}
3011 		RExC_parse--;
3012 
3013 		ret = regclass(pRExC_state);
3014 
3015 		RExC_end = oldregxend;
3016 		RExC_parse--;
3017 
3018 		Set_Node_Offset(ret, parse_start + 2);
3019 		Set_Node_Cur_Length(ret);
3020 		nextchar(pRExC_state);
3021 		*flagp |= HASWIDTH|SIMPLE;
3022 	    }
3023 	    break;
3024 	case 'n':
3025 	case 'r':
3026 	case 't':
3027 	case 'f':
3028 	case 'e':
3029 	case 'a':
3030 	case 'x':
3031 	case 'c':
3032 	case '0':
3033 	    goto defchar;
3034 	case '1': case '2': case '3': case '4':
3035 	case '5': case '6': case '7': case '8': case '9':
3036 	    {
3037 		const I32 num = atoi(RExC_parse);
3038 
3039 		if (num > 9 && num >= RExC_npar)
3040 		    goto defchar;
3041 		else {
3042                     char * parse_start = RExC_parse - 1; /* MJD */
3043 		    while (isDIGIT(*RExC_parse))
3044 			RExC_parse++;
3045 
3046 		    if (!SIZE_ONLY && num > (I32)RExC_rx->nparens)
3047 			vFAIL("Reference to nonexistent group");
3048 		    RExC_sawback = 1;
3049 		    ret = reganode(pRExC_state,
3050 				   (U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
3051 				   num);
3052 		    *flagp |= HASWIDTH;
3053 
3054                     /* override incorrect value set in reganode MJD */
3055                     Set_Node_Offset(ret, parse_start+1);
3056                     Set_Node_Cur_Length(ret); /* MJD */
3057 		    RExC_parse--;
3058 		    nextchar(pRExC_state);
3059 		}
3060 	    }
3061 	    break;
3062 	case '\0':
3063 	    if (RExC_parse >= RExC_end)
3064 		FAIL("Trailing \\");
3065 	    /* FALL THROUGH */
3066 	default:
3067 	    /* Do not generate "unrecognized" warnings here, we fall
3068 	       back into the quick-grab loop below */
3069 	    parse_start--;
3070 	    goto defchar;
3071 	}
3072 	break;
3073 
3074     case '#':
3075 	if (RExC_flags & PMf_EXTENDED) {
3076 	    while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++;
3077 	    if (RExC_parse < RExC_end)
3078 		goto tryagain;
3079 	}
3080 	/* FALL THROUGH */
3081 
3082     default: {
3083 	    register STRLEN len;
3084 	    register UV ender;
3085 	    register char *p;
3086 	    char *oldp, *s;
3087 	    STRLEN foldlen;
3088 	    U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
3089 
3090             parse_start = RExC_parse - 1;
3091 
3092 	    RExC_parse++;
3093 
3094 	defchar:
3095 	    ender = 0;
3096 	    ret = reg_node(pRExC_state,
3097 			   (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
3098 	    s = STRING(ret);
3099 	    for (len = 0, p = RExC_parse - 1;
3100 	      len < 127 && p < RExC_end;
3101 	      len++)
3102 	    {
3103 		oldp = p;
3104 
3105 		if (RExC_flags & PMf_EXTENDED)
3106 		    p = regwhite(p, RExC_end);
3107 		switch (*p) {
3108 		case '^':
3109 		case '$':
3110 		case '.':
3111 		case '[':
3112 		case '(':
3113 		case ')':
3114 		case '|':
3115 		    goto loopdone;
3116 		case '\\':
3117 		    switch (*++p) {
3118 		    case 'A':
3119 		    case 'C':
3120 		    case 'X':
3121 		    case 'G':
3122 		    case 'Z':
3123 		    case 'z':
3124 		    case 'w':
3125 		    case 'W':
3126 		    case 'b':
3127 		    case 'B':
3128 		    case 's':
3129 		    case 'S':
3130 		    case 'd':
3131 		    case 'D':
3132 		    case 'p':
3133 		    case 'P':
3134 			--p;
3135 			goto loopdone;
3136 		    case 'n':
3137 			ender = '\n';
3138 			p++;
3139 			break;
3140 		    case 'r':
3141 			ender = '\r';
3142 			p++;
3143 			break;
3144 		    case 't':
3145 			ender = '\t';
3146 			p++;
3147 			break;
3148 		    case 'f':
3149 			ender = '\f';
3150 			p++;
3151 			break;
3152 		    case 'e':
3153 			  ender = ASCII_TO_NATIVE('\033');
3154 			p++;
3155 			break;
3156 		    case 'a':
3157 			  ender = ASCII_TO_NATIVE('\007');
3158 			p++;
3159 			break;
3160 		    case 'x':
3161 			if (*++p == '{') {
3162 			    char* const e = strchr(p, '}');
3163 
3164 			    if (!e) {
3165 				RExC_parse = p + 1;
3166 				vFAIL("Missing right brace on \\x{}");
3167 			    }
3168 			    else {
3169                                 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3170                                     | PERL_SCAN_DISALLOW_PREFIX;
3171                                 STRLEN numlen = e - p - 1;
3172 				ender = grok_hex(p + 1, &numlen, &flags, NULL);
3173 				if (ender > 0xff)
3174 				    RExC_utf8 = 1;
3175 				p = e + 1;
3176 			    }
3177 			}
3178 			else {
3179                             I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3180 			    STRLEN numlen = 2;
3181 			    ender = grok_hex(p, &numlen, &flags, NULL);
3182 			    p += numlen;
3183 			}
3184 			break;
3185 		    case 'c':
3186 			p++;
3187 			ender = UCHARAT(p++);
3188 			ender = toCTRL(ender);
3189 			break;
3190 		    case '0': case '1': case '2': case '3':case '4':
3191 		    case '5': case '6': case '7': case '8':case '9':
3192 			if (*p == '0' ||
3193 			  (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
3194                             I32 flags = 0;
3195 			    STRLEN numlen = 3;
3196 			    ender = grok_oct(p, &numlen, &flags, NULL);
3197 			    p += numlen;
3198 			}
3199 			else {
3200 			    --p;
3201 			    goto loopdone;
3202 			}
3203 			break;
3204 		    case '\0':
3205 			if (p >= RExC_end)
3206 			    FAIL("Trailing \\");
3207 			/* FALL THROUGH */
3208 		    default:
3209 			if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
3210 			    vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
3211 			goto normal_default;
3212 		    }
3213 		    break;
3214 		default:
3215 		  normal_default:
3216 		    if (UTF8_IS_START(*p) && UTF) {
3217 			STRLEN numlen;
3218 			ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
3219 					       &numlen, 0);
3220 			p += numlen;
3221 		    }
3222 		    else
3223 			ender = *p++;
3224 		    break;
3225 		}
3226 		if (RExC_flags & PMf_EXTENDED)
3227 		    p = regwhite(p, RExC_end);
3228 		if (UTF && FOLD) {
3229 		    /* Prime the casefolded buffer. */
3230 		    ender = toFOLD_uni(ender, tmpbuf, &foldlen);
3231 		}
3232 		if (ISMULT2(p)) { /* Back off on ?+*. */
3233 		    if (len)
3234 			p = oldp;
3235 		    else if (UTF) {
3236 		         STRLEN unilen;
3237 
3238 			 if (FOLD) {
3239 			      /* Emit all the Unicode characters. */
3240 			      STRLEN numlen;
3241 			      for (foldbuf = tmpbuf;
3242 				   foldlen;
3243 				   foldlen -= numlen) {
3244 				   ender = utf8_to_uvchr(foldbuf, &numlen);
3245 				   if (numlen > 0) {
3246 					reguni(pRExC_state, ender, s, &unilen);
3247 					s       += unilen;
3248 					len     += unilen;
3249 					/* In EBCDIC the numlen
3250 					 * and unilen can differ. */
3251 					foldbuf += numlen;
3252 					if (numlen >= foldlen)
3253 					     break;
3254 				   }
3255 				   else
3256 					break; /* "Can't happen." */
3257 			      }
3258 			 }
3259 			 else {
3260 			      reguni(pRExC_state, ender, s, &unilen);
3261 			      if (unilen > 0) {
3262 				   s   += unilen;
3263 				   len += unilen;
3264 			      }
3265 			 }
3266 		    }
3267 		    else {
3268 			len++;
3269 			REGC((char)ender, s++);
3270 		    }
3271 		    break;
3272 		}
3273 		if (UTF) {
3274 		     STRLEN unilen;
3275 
3276 		     if (FOLD) {
3277 		          /* Emit all the Unicode characters. */
3278 			  STRLEN numlen;
3279 			  for (foldbuf = tmpbuf;
3280 			       foldlen;
3281 			       foldlen -= numlen) {
3282 			       ender = utf8_to_uvchr(foldbuf, &numlen);
3283 			       if (numlen > 0) {
3284 				    reguni(pRExC_state, ender, s, &unilen);
3285 				    len     += unilen;
3286 				    s       += unilen;
3287 				    /* In EBCDIC the numlen
3288 				     * and unilen can differ. */
3289 				    foldbuf += numlen;
3290 				    if (numlen >= foldlen)
3291 					 break;
3292 			       }
3293 			       else
3294 				    break;
3295 			  }
3296 		     }
3297 		     else {
3298 			  reguni(pRExC_state, ender, s, &unilen);
3299 			  if (unilen > 0) {
3300 			       s   += unilen;
3301 			       len += unilen;
3302 			  }
3303 		     }
3304 		     len--;
3305 		}
3306 		else
3307 		    REGC((char)ender, s++);
3308 	    }
3309 	loopdone:
3310 	    RExC_parse = p - 1;
3311             Set_Node_Cur_Length(ret); /* MJD */
3312 	    nextchar(pRExC_state);
3313 	    {
3314 		/* len is STRLEN which is unsigned, need to copy to signed */
3315 		IV iv = len;
3316 		if (iv < 0)
3317 		    vFAIL("Internal disaster");
3318 	    }
3319 	    if (len > 0)
3320 		*flagp |= HASWIDTH;
3321 	    if (len == 1 && UNI_IS_INVARIANT(ender))
3322 		*flagp |= SIMPLE;
3323 	    if (!SIZE_ONLY)
3324 		STR_LEN(ret) = len;
3325 	    if (SIZE_ONLY)
3326 		RExC_size += STR_SZ(len);
3327 	    else
3328 		RExC_emit += STR_SZ(len);
3329 	}
3330 	break;
3331     }
3332 
3333     /* If the encoding pragma is in effect recode the text of
3334      * any EXACT-kind nodes. */
3335     if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
3336 	STRLEN oldlen = STR_LEN(ret);
3337 	SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
3338 
3339 	if (RExC_utf8)
3340 	    SvUTF8_on(sv);
3341 	if (sv_utf8_downgrade(sv, TRUE)) {
3342 	    const char * const s = sv_recode_to_utf8(sv, PL_encoding);
3343 	    const STRLEN newlen = SvCUR(sv);
3344 
3345 	    if (SvUTF8(sv))
3346 		RExC_utf8 = 1;
3347 	    if (!SIZE_ONLY) {
3348 		DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
3349 				      (int)oldlen, STRING(ret),
3350 				      (int)newlen, s));
3351 		Copy(s, STRING(ret), newlen, char);
3352 		STR_LEN(ret) += newlen - oldlen;
3353 		RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
3354 	    } else
3355 		RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
3356 	}
3357     }
3358 
3359     return(ret);
3360 }
3361 
3362 STATIC char *
3363 S_regwhite(pTHX_ char *p, const char *e)
3364 {
3365     while (p < e) {
3366 	if (isSPACE(*p))
3367 	    ++p;
3368 	else if (*p == '#') {
3369 	    do {
3370 		p++;
3371 	    } while (p < e && *p != '\n');
3372 	}
3373 	else
3374 	    break;
3375     }
3376     return p;
3377 }
3378 
3379 /* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
3380    Character classes ([:foo:]) can also be negated ([:^foo:]).
3381    Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
3382    Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
3383    but trigger failures because they are currently unimplemented. */
3384 
3385 #define POSIXCC_DONE(c)   ((c) == ':')
3386 #define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
3387 #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
3388 
3389 STATIC I32
3390 S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
3391 {
3392     I32 namedclass = OOB_NAMEDCLASS;
3393 
3394     if (value == '[' && RExC_parse + 1 < RExC_end &&
3395 	/* I smell either [: or [= or [. -- POSIX has been here, right? */
3396 	POSIXCC(UCHARAT(RExC_parse))) {
3397 	const char c = UCHARAT(RExC_parse);
3398 	char* s = RExC_parse++;
3399 
3400 	while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
3401 	    RExC_parse++;
3402 	if (RExC_parse == RExC_end)
3403 	    /* Grandfather lone [:, [=, [. */
3404 	    RExC_parse = s;
3405 	else {
3406 	    const char* t = RExC_parse++; /* skip over the c */
3407 	    const char *posixcc;
3408 
3409 	    assert(*t == c);
3410 
3411   	    if (UCHARAT(RExC_parse) == ']') {
3412   		RExC_parse++; /* skip over the ending ] */
3413   		posixcc = s + 1;
3414 		if (*s == ':') {
3415 		    const I32 complement = *posixcc == '^' ? *posixcc++ : 0;
3416 		    const I32 skip = t - posixcc;
3417 
3418 		    /* Initially switch on the length of the name.  */
3419 		    switch (skip) {
3420 		    case 4:
3421 			if (memEQ(posixcc, "word", 4)) {
3422 			    /* this is not POSIX, this is the Perl \w */;
3423 			    namedclass
3424 				= complement ? ANYOF_NALNUM : ANYOF_ALNUM;
3425 			}
3426 			break;
3427 		    case 5:
3428 			/* Names all of length 5.  */
3429 			/* alnum alpha ascii blank cntrl digit graph lower
3430 			   print punct space upper  */
3431 			/* Offset 4 gives the best switch position.  */
3432 			switch (posixcc[4]) {
3433 			case 'a':
3434 			    if (memEQ(posixcc, "alph", 4)) {
3435 				/*                  a     */
3436 				namedclass
3437 				    = complement ? ANYOF_NALPHA : ANYOF_ALPHA;
3438 			    }
3439 			    break;
3440 			case 'e':
3441 			    if (memEQ(posixcc, "spac", 4)) {
3442 				/*                  e     */
3443 				namedclass
3444 				    = complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
3445 			    }
3446 			    break;
3447 			case 'h':
3448 			    if (memEQ(posixcc, "grap", 4)) {
3449 				/*                  h     */
3450 				namedclass
3451 				    = complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
3452 			    }
3453 			    break;
3454 			case 'i':
3455 			    if (memEQ(posixcc, "asci", 4)) {
3456 				/*                  i     */
3457 				namedclass
3458 				    = complement ? ANYOF_NASCII : ANYOF_ASCII;
3459 			    }
3460 			    break;
3461 			case 'k':
3462 			    if (memEQ(posixcc, "blan", 4)) {
3463 				/*                  k     */
3464 				namedclass
3465 				    = complement ? ANYOF_NBLANK : ANYOF_BLANK;
3466 			    }
3467 			    break;
3468 			case 'l':
3469 			    if (memEQ(posixcc, "cntr", 4)) {
3470 				/*                  l     */
3471 				namedclass
3472 				    = complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
3473 			    }
3474 			    break;
3475 			case 'm':
3476 			    if (memEQ(posixcc, "alnu", 4)) {
3477 				/*                  m     */
3478 				namedclass
3479 				    = complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
3480 			    }
3481 			    break;
3482 			case 'r':
3483 			    if (memEQ(posixcc, "lowe", 4)) {
3484 				/*                  r     */
3485 				namedclass
3486 				    = complement ? ANYOF_NLOWER : ANYOF_LOWER;
3487 			    }
3488 			    if (memEQ(posixcc, "uppe", 4)) {
3489 				/*                  r     */
3490 				namedclass
3491 				    = complement ? ANYOF_NUPPER : ANYOF_UPPER;
3492 			    }
3493 			    break;
3494 			case 't':
3495 			    if (memEQ(posixcc, "digi", 4)) {
3496 				/*                  t     */
3497 				namedclass
3498 				    = complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
3499 			    }
3500 			    if (memEQ(posixcc, "prin", 4)) {
3501 				/*                  t     */
3502 				namedclass
3503 				    = complement ? ANYOF_NPRINT : ANYOF_PRINT;
3504 			    }
3505 			    if (memEQ(posixcc, "punc", 4)) {
3506 				/*                  t     */
3507 				namedclass
3508 				    = complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
3509 			    }
3510 			    break;
3511 			}
3512 			break;
3513 		    case 6:
3514 			if (memEQ(posixcc, "xdigit", 6)) {
3515 			    namedclass
3516 				= complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
3517 			}
3518 			break;
3519 		    }
3520 
3521 		    if (namedclass == OOB_NAMEDCLASS)
3522 		    {
3523 			Simple_vFAIL3("POSIX class [:%.*s:] unknown",
3524 				      t - s - 1, s + 1);
3525 		    }
3526 		    assert (posixcc[skip] == ':');
3527 		    assert (posixcc[skip+1] == ']');
3528 		} else if (!SIZE_ONLY) {
3529 		    /* [[=foo=]] and [[.foo.]] are still future. */
3530 
3531 		    /* adjust RExC_parse so the warning shows after
3532 		       the class closes */
3533 		    while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
3534 			RExC_parse++;
3535 		    Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
3536 		}
3537 	    } else {
3538 		/* Maternal grandfather:
3539 		 * "[:" ending in ":" but not in ":]" */
3540 		RExC_parse = s;
3541 	    }
3542 	}
3543     }
3544 
3545     return namedclass;
3546 }
3547 
3548 STATIC void
3549 S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
3550 {
3551     if (!SIZE_ONLY && POSIXCC(UCHARAT(RExC_parse))) {
3552 	const char *s = RExC_parse;
3553 	const char  c = *s++;
3554 
3555 	while(*s && isALNUM(*s))
3556 	    s++;
3557 	if (*s && c == *s && s[1] == ']') {
3558 	    if (ckWARN(WARN_REGEXP))
3559 		vWARN3(s+2,
3560 			"POSIX syntax [%c %c] belongs inside character classes",
3561 			c, c);
3562 
3563 	    /* [[=foo=]] and [[.foo.]] are still future. */
3564 	    if (POSIXCC_NOTYET(c)) {
3565 		/* adjust RExC_parse so the error shows after
3566 		   the class closes */
3567 		while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
3568 		    ;
3569 		Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
3570 	    }
3571 	}
3572     }
3573 }
3574 
3575 STATIC regnode *
3576 S_regclass(pTHX_ RExC_state_t *pRExC_state)
3577 {
3578     register UV value;
3579     register UV nextvalue;
3580     register IV prevvalue = OOB_UNICODE;
3581     register IV range = 0;
3582     register regnode *ret;
3583     STRLEN numlen;
3584     IV namedclass;
3585     char *rangebegin = 0;
3586     bool need_class = 0;
3587     SV *listsv = Nullsv;
3588     register char *e;
3589     UV n;
3590     bool optimize_invert   = TRUE;
3591     AV* unicode_alternate  = 0;
3592 #ifdef EBCDIC
3593     UV literal_endpoint = 0;
3594 #endif
3595 
3596     ret = reganode(pRExC_state, ANYOF, 0);
3597 
3598     if (!SIZE_ONLY)
3599 	ANYOF_FLAGS(ret) = 0;
3600 
3601     if (UCHARAT(RExC_parse) == '^') {	/* Complement of range. */
3602 	RExC_naughty++;
3603 	RExC_parse++;
3604 	if (!SIZE_ONLY)
3605 	    ANYOF_FLAGS(ret) |= ANYOF_INVERT;
3606     }
3607 
3608     if (SIZE_ONLY)
3609 	RExC_size += ANYOF_SKIP;
3610     else {
3611  	RExC_emit += ANYOF_SKIP;
3612 	if (FOLD)
3613 	    ANYOF_FLAGS(ret) |= ANYOF_FOLD;
3614 	if (LOC)
3615 	    ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
3616 	ANYOF_BITMAP_ZERO(ret);
3617 	listsv = newSVpvn("# comment\n", 10);
3618     }
3619 
3620     nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
3621 
3622     if (!SIZE_ONLY && POSIXCC(nextvalue))
3623 	checkposixcc(pRExC_state);
3624 
3625     /* allow 1st char to be ] (allowing it to be - is dealt with later) */
3626     if (UCHARAT(RExC_parse) == ']')
3627 	goto charclassloop;
3628 
3629     while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
3630 
3631     charclassloop:
3632 
3633 	namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
3634 
3635 	if (!range)
3636 	    rangebegin = RExC_parse;
3637 	if (UTF) {
3638 	    value = utf8n_to_uvchr((U8*)RExC_parse,
3639 				   RExC_end - RExC_parse,
3640 				   &numlen, 0);
3641 	    RExC_parse += numlen;
3642 	}
3643 	else
3644 	    value = UCHARAT(RExC_parse++);
3645 	nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
3646 	if (value == '[' && POSIXCC(nextvalue))
3647 	    namedclass = regpposixcc(pRExC_state, value);
3648 	else if (value == '\\') {
3649 	    if (UTF) {
3650 		value = utf8n_to_uvchr((U8*)RExC_parse,
3651 				   RExC_end - RExC_parse,
3652 				   &numlen, 0);
3653 		RExC_parse += numlen;
3654 	    }
3655 	    else
3656 		value = UCHARAT(RExC_parse++);
3657 	    /* Some compilers cannot handle switching on 64-bit integer
3658 	     * values, therefore value cannot be an UV.  Yes, this will
3659 	     * be a problem later if we want switch on Unicode.
3660 	     * A similar issue a little bit later when switching on
3661 	     * namedclass. --jhi */
3662 	    switch ((I32)value) {
3663 	    case 'w':	namedclass = ANYOF_ALNUM;	break;
3664 	    case 'W':	namedclass = ANYOF_NALNUM;	break;
3665 	    case 's':	namedclass = ANYOF_SPACE;	break;
3666 	    case 'S':	namedclass = ANYOF_NSPACE;	break;
3667 	    case 'd':	namedclass = ANYOF_DIGIT;	break;
3668 	    case 'D':	namedclass = ANYOF_NDIGIT;	break;
3669 	    case 'p':
3670 	    case 'P':
3671 		if (RExC_parse >= RExC_end)
3672 		    vFAIL2("Empty \\%c{}", (U8)value);
3673 		if (*RExC_parse == '{') {
3674 		    const U8 c = (U8)value;
3675 		    e = strchr(RExC_parse++, '}');
3676                     if (!e)
3677                         vFAIL2("Missing right brace on \\%c{}", c);
3678 		    while (isSPACE(UCHARAT(RExC_parse)))
3679 		        RExC_parse++;
3680                     if (e == RExC_parse)
3681                         vFAIL2("Empty \\%c{}", c);
3682 		    n = e - RExC_parse;
3683 		    while (isSPACE(UCHARAT(RExC_parse + n - 1)))
3684 		        n--;
3685 		}
3686 		else {
3687 		    e = RExC_parse;
3688 		    n = 1;
3689 		}
3690 		if (!SIZE_ONLY) {
3691 		    if (UCHARAT(RExC_parse) == '^') {
3692 			 RExC_parse++;
3693 			 n--;
3694 			 value = value == 'p' ? 'P' : 'p'; /* toggle */
3695 			 while (isSPACE(UCHARAT(RExC_parse))) {
3696 			      RExC_parse++;
3697 			      n--;
3698 			 }
3699 		    }
3700 		    if (value == 'p')
3701 			 Perl_sv_catpvf(aTHX_ listsv,
3702 					"+utf8::%.*s\n", (int)n, RExC_parse);
3703 		    else
3704 			 Perl_sv_catpvf(aTHX_ listsv,
3705 					"!utf8::%.*s\n", (int)n, RExC_parse);
3706 		}
3707 		RExC_parse = e + 1;
3708 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
3709 		namedclass = ANYOF_MAX;  /* no official name, but it's named */
3710 		break;
3711 	    case 'n':	value = '\n';			break;
3712 	    case 'r':	value = '\r';			break;
3713 	    case 't':	value = '\t';			break;
3714 	    case 'f':	value = '\f';			break;
3715 	    case 'b':	value = '\b';			break;
3716 	    case 'e':	value = ASCII_TO_NATIVE('\033');break;
3717 	    case 'a':	value = ASCII_TO_NATIVE('\007');break;
3718 	    case 'x':
3719 		if (*RExC_parse == '{') {
3720                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3721                         | PERL_SCAN_DISALLOW_PREFIX;
3722 		    e = strchr(RExC_parse++, '}');
3723                     if (!e)
3724                         vFAIL("Missing right brace on \\x{}");
3725 
3726 		    numlen = e - RExC_parse;
3727 		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
3728 		    RExC_parse = e + 1;
3729 		}
3730 		else {
3731                     I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3732 		    numlen = 2;
3733 		    value = grok_hex(RExC_parse, &numlen, &flags, NULL);
3734 		    RExC_parse += numlen;
3735 		}
3736 		break;
3737 	    case 'c':
3738 		value = UCHARAT(RExC_parse++);
3739 		value = toCTRL(value);
3740 		break;
3741 	    case '0': case '1': case '2': case '3': case '4':
3742 	    case '5': case '6': case '7': case '8': case '9':
3743             {
3744                 I32 flags = 0;
3745 		numlen = 3;
3746 		value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
3747 		RExC_parse += numlen;
3748 		break;
3749             }
3750 	    default:
3751 		if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
3752 		    vWARN2(RExC_parse,
3753 			   "Unrecognized escape \\%c in character class passed through",
3754 			   (int)value);
3755 		break;
3756 	    }
3757 	} /* end of \blah */
3758 #ifdef EBCDIC
3759 	else
3760 	    literal_endpoint++;
3761 #endif
3762 
3763 	if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
3764 
3765 	    if (!SIZE_ONLY && !need_class)
3766 		ANYOF_CLASS_ZERO(ret);
3767 
3768 	    need_class = 1;
3769 
3770 	    /* a bad range like a-\d, a-[:digit:] ? */
3771 	    if (range) {
3772 		if (!SIZE_ONLY) {
3773 		    if (ckWARN(WARN_REGEXP))
3774 			vWARN4(RExC_parse,
3775 			       "False [] range \"%*.*s\"",
3776 			       RExC_parse - rangebegin,
3777 			       RExC_parse - rangebegin,
3778 			       rangebegin);
3779 		    if (prevvalue < 256) {
3780 			ANYOF_BITMAP_SET(ret, prevvalue);
3781 			ANYOF_BITMAP_SET(ret, '-');
3782 		    }
3783 		    else {
3784 			ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
3785 			Perl_sv_catpvf(aTHX_ listsv,
3786 				       "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
3787 		    }
3788 		}
3789 
3790 		range = 0; /* this was not a true range */
3791 	    }
3792 
3793 	    if (!SIZE_ONLY) {
3794 		const char *what = NULL;
3795 		char yesno = 0;
3796 
3797 	        if (namedclass > OOB_NAMEDCLASS)
3798 		    optimize_invert = FALSE;
3799 		/* Possible truncation here but in some 64-bit environments
3800 		 * the compiler gets heartburn about switch on 64-bit values.
3801 		 * A similar issue a little earlier when switching on value.
3802 		 * --jhi */
3803 		switch ((I32)namedclass) {
3804 		case ANYOF_ALNUM:
3805 		    if (LOC)
3806 			ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
3807 		    else {
3808 			for (value = 0; value < 256; value++)
3809 			    if (isALNUM(value))
3810 				ANYOF_BITMAP_SET(ret, value);
3811 		    }
3812 		    yesno = '+';
3813 		    what = "Word";
3814 		    break;
3815 		case ANYOF_NALNUM:
3816 		    if (LOC)
3817 			ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
3818 		    else {
3819 			for (value = 0; value < 256; value++)
3820 			    if (!isALNUM(value))
3821 				ANYOF_BITMAP_SET(ret, value);
3822 		    }
3823 		    yesno = '!';
3824 		    what = "Word";
3825 		    break;
3826 		case ANYOF_ALNUMC:
3827 		    if (LOC)
3828 			ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
3829 		    else {
3830 			for (value = 0; value < 256; value++)
3831 			    if (isALNUMC(value))
3832 				ANYOF_BITMAP_SET(ret, value);
3833 		    }
3834 		    yesno = '+';
3835 		    what = "Alnum";
3836 		    break;
3837 		case ANYOF_NALNUMC:
3838 		    if (LOC)
3839 			ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
3840 		    else {
3841 			for (value = 0; value < 256; value++)
3842 			    if (!isALNUMC(value))
3843 				ANYOF_BITMAP_SET(ret, value);
3844 		    }
3845 		    yesno = '!';
3846 		    what = "Alnum";
3847 		    break;
3848 		case ANYOF_ALPHA:
3849 		    if (LOC)
3850 			ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
3851 		    else {
3852 			for (value = 0; value < 256; value++)
3853 			    if (isALPHA(value))
3854 				ANYOF_BITMAP_SET(ret, value);
3855 		    }
3856 		    yesno = '+';
3857 		    what = "Alpha";
3858 		    break;
3859 		case ANYOF_NALPHA:
3860 		    if (LOC)
3861 			ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
3862 		    else {
3863 			for (value = 0; value < 256; value++)
3864 			    if (!isALPHA(value))
3865 				ANYOF_BITMAP_SET(ret, value);
3866 		    }
3867 		    yesno = '!';
3868 		    what = "Alpha";
3869 		    break;
3870 		case ANYOF_ASCII:
3871 		    if (LOC)
3872 			ANYOF_CLASS_SET(ret, ANYOF_ASCII);
3873 		    else {
3874 #ifndef EBCDIC
3875 			for (value = 0; value < 128; value++)
3876 			    ANYOF_BITMAP_SET(ret, value);
3877 #else  /* EBCDIC */
3878 			for (value = 0; value < 256; value++) {
3879 			    if (isASCII(value))
3880 			        ANYOF_BITMAP_SET(ret, value);
3881 			}
3882 #endif /* EBCDIC */
3883 		    }
3884 		    yesno = '+';
3885 		    what = "ASCII";
3886 		    break;
3887 		case ANYOF_NASCII:
3888 		    if (LOC)
3889 			ANYOF_CLASS_SET(ret, ANYOF_NASCII);
3890 		    else {
3891 #ifndef EBCDIC
3892 			for (value = 128; value < 256; value++)
3893 			    ANYOF_BITMAP_SET(ret, value);
3894 #else  /* EBCDIC */
3895 			for (value = 0; value < 256; value++) {
3896 			    if (!isASCII(value))
3897 			        ANYOF_BITMAP_SET(ret, value);
3898 			}
3899 #endif /* EBCDIC */
3900 		    }
3901 		    yesno = '!';
3902 		    what = "ASCII";
3903 		    break;
3904 		case ANYOF_BLANK:
3905 		    if (LOC)
3906 			ANYOF_CLASS_SET(ret, ANYOF_BLANK);
3907 		    else {
3908 			for (value = 0; value < 256; value++)
3909 			    if (isBLANK(value))
3910 				ANYOF_BITMAP_SET(ret, value);
3911 		    }
3912 		    yesno = '+';
3913 		    what = "Blank";
3914 		    break;
3915 		case ANYOF_NBLANK:
3916 		    if (LOC)
3917 			ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
3918 		    else {
3919 			for (value = 0; value < 256; value++)
3920 			    if (!isBLANK(value))
3921 				ANYOF_BITMAP_SET(ret, value);
3922 		    }
3923 		    yesno = '!';
3924 		    what = "Blank";
3925 		    break;
3926 		case ANYOF_CNTRL:
3927 		    if (LOC)
3928 			ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
3929 		    else {
3930 			for (value = 0; value < 256; value++)
3931 			    if (isCNTRL(value))
3932 				ANYOF_BITMAP_SET(ret, value);
3933 		    }
3934 		    yesno = '+';
3935 		    what = "Cntrl";
3936 		    break;
3937 		case ANYOF_NCNTRL:
3938 		    if (LOC)
3939 			ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
3940 		    else {
3941 			for (value = 0; value < 256; value++)
3942 			    if (!isCNTRL(value))
3943 				ANYOF_BITMAP_SET(ret, value);
3944 		    }
3945 		    yesno = '!';
3946 		    what = "Cntrl";
3947 		    break;
3948 		case ANYOF_DIGIT:
3949 		    if (LOC)
3950 			ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
3951 		    else {
3952 			/* consecutive digits assumed */
3953 			for (value = '0'; value <= '9'; value++)
3954 			    ANYOF_BITMAP_SET(ret, value);
3955 		    }
3956 		    yesno = '+';
3957 		    what = "Digit";
3958 		    break;
3959 		case ANYOF_NDIGIT:
3960 		    if (LOC)
3961 			ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
3962 		    else {
3963 			/* consecutive digits assumed */
3964 			for (value = 0; value < '0'; value++)
3965 			    ANYOF_BITMAP_SET(ret, value);
3966 			for (value = '9' + 1; value < 256; value++)
3967 			    ANYOF_BITMAP_SET(ret, value);
3968 		    }
3969 		    yesno = '!';
3970 		    what = "Digit";
3971 		    break;
3972 		case ANYOF_GRAPH:
3973 		    if (LOC)
3974 			ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
3975 		    else {
3976 			for (value = 0; value < 256; value++)
3977 			    if (isGRAPH(value))
3978 				ANYOF_BITMAP_SET(ret, value);
3979 		    }
3980 		    yesno = '+';
3981 		    what = "Graph";
3982 		    break;
3983 		case ANYOF_NGRAPH:
3984 		    if (LOC)
3985 			ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
3986 		    else {
3987 			for (value = 0; value < 256; value++)
3988 			    if (!isGRAPH(value))
3989 				ANYOF_BITMAP_SET(ret, value);
3990 		    }
3991 		    yesno = '!';
3992 		    what = "Graph";
3993 		    break;
3994 		case ANYOF_LOWER:
3995 		    if (LOC)
3996 			ANYOF_CLASS_SET(ret, ANYOF_LOWER);
3997 		    else {
3998 			for (value = 0; value < 256; value++)
3999 			    if (isLOWER(value))
4000 				ANYOF_BITMAP_SET(ret, value);
4001 		    }
4002 		    yesno = '+';
4003 		    what = "Lower";
4004 		    break;
4005 		case ANYOF_NLOWER:
4006 		    if (LOC)
4007 			ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
4008 		    else {
4009 			for (value = 0; value < 256; value++)
4010 			    if (!isLOWER(value))
4011 				ANYOF_BITMAP_SET(ret, value);
4012 		    }
4013 		    yesno = '!';
4014 		    what = "Lower";
4015 		    break;
4016 		case ANYOF_PRINT:
4017 		    if (LOC)
4018 			ANYOF_CLASS_SET(ret, ANYOF_PRINT);
4019 		    else {
4020 			for (value = 0; value < 256; value++)
4021 			    if (isPRINT(value))
4022 				ANYOF_BITMAP_SET(ret, value);
4023 		    }
4024 		    yesno = '+';
4025 		    what = "Print";
4026 		    break;
4027 		case ANYOF_NPRINT:
4028 		    if (LOC)
4029 			ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
4030 		    else {
4031 			for (value = 0; value < 256; value++)
4032 			    if (!isPRINT(value))
4033 				ANYOF_BITMAP_SET(ret, value);
4034 		    }
4035 		    yesno = '!';
4036 		    what = "Print";
4037 		    break;
4038 		case ANYOF_PSXSPC:
4039 		    if (LOC)
4040 			ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
4041 		    else {
4042 			for (value = 0; value < 256; value++)
4043 			    if (isPSXSPC(value))
4044 				ANYOF_BITMAP_SET(ret, value);
4045 		    }
4046 		    yesno = '+';
4047 		    what = "Space";
4048 		    break;
4049 		case ANYOF_NPSXSPC:
4050 		    if (LOC)
4051 			ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
4052 		    else {
4053 			for (value = 0; value < 256; value++)
4054 			    if (!isPSXSPC(value))
4055 				ANYOF_BITMAP_SET(ret, value);
4056 		    }
4057 		    yesno = '!';
4058 		    what = "Space";
4059 		    break;
4060 		case ANYOF_PUNCT:
4061 		    if (LOC)
4062 			ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
4063 		    else {
4064 			for (value = 0; value < 256; value++)
4065 			    if (isPUNCT(value))
4066 				ANYOF_BITMAP_SET(ret, value);
4067 		    }
4068 		    yesno = '+';
4069 		    what = "Punct";
4070 		    break;
4071 		case ANYOF_NPUNCT:
4072 		    if (LOC)
4073 			ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
4074 		    else {
4075 			for (value = 0; value < 256; value++)
4076 			    if (!isPUNCT(value))
4077 				ANYOF_BITMAP_SET(ret, value);
4078 		    }
4079 		    yesno = '!';
4080 		    what = "Punct";
4081 		    break;
4082 		case ANYOF_SPACE:
4083 		    if (LOC)
4084 			ANYOF_CLASS_SET(ret, ANYOF_SPACE);
4085 		    else {
4086 			for (value = 0; value < 256; value++)
4087 			    if (isSPACE(value))
4088 				ANYOF_BITMAP_SET(ret, value);
4089 		    }
4090 		    yesno = '+';
4091 		    what = "SpacePerl";
4092 		    break;
4093 		case ANYOF_NSPACE:
4094 		    if (LOC)
4095 			ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
4096 		    else {
4097 			for (value = 0; value < 256; value++)
4098 			    if (!isSPACE(value))
4099 				ANYOF_BITMAP_SET(ret, value);
4100 		    }
4101 		    yesno = '!';
4102 		    what = "SpacePerl";
4103 		    break;
4104 		case ANYOF_UPPER:
4105 		    if (LOC)
4106 			ANYOF_CLASS_SET(ret, ANYOF_UPPER);
4107 		    else {
4108 			for (value = 0; value < 256; value++)
4109 			    if (isUPPER(value))
4110 				ANYOF_BITMAP_SET(ret, value);
4111 		    }
4112 		    yesno = '+';
4113 		    what = "Upper";
4114 		    break;
4115 		case ANYOF_NUPPER:
4116 		    if (LOC)
4117 			ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
4118 		    else {
4119 			for (value = 0; value < 256; value++)
4120 			    if (!isUPPER(value))
4121 				ANYOF_BITMAP_SET(ret, value);
4122 		    }
4123 		    yesno = '!';
4124 		    what = "Upper";
4125 		    break;
4126 		case ANYOF_XDIGIT:
4127 		    if (LOC)
4128 			ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
4129 		    else {
4130 			for (value = 0; value < 256; value++)
4131 			    if (isXDIGIT(value))
4132 				ANYOF_BITMAP_SET(ret, value);
4133 		    }
4134 		    yesno = '+';
4135 		    what = "XDigit";
4136 		    break;
4137 		case ANYOF_NXDIGIT:
4138 		    if (LOC)
4139 			ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
4140 		    else {
4141 			for (value = 0; value < 256; value++)
4142 			    if (!isXDIGIT(value))
4143 				ANYOF_BITMAP_SET(ret, value);
4144 		    }
4145 		    yesno = '!';
4146 		    what = "XDigit";
4147 		    break;
4148 		case ANYOF_MAX:
4149 		    /* this is to handle \p and \P */
4150 		    break;
4151 		default:
4152 		    vFAIL("Invalid [::] class");
4153 		    break;
4154 		}
4155 		if (what) {
4156 		    /* Strings such as "+utf8::isWord\n" */
4157 		    Perl_sv_catpvf(aTHX_ listsv, "%cutf8::Is%s\n", yesno, what);
4158 		}
4159 		if (LOC)
4160 		    ANYOF_FLAGS(ret) |= ANYOF_CLASS;
4161 		continue;
4162 	    }
4163 	} /* end of namedclass \blah */
4164 
4165 	if (range) {
4166 	    if (prevvalue > (IV)value) /* b-a */ {
4167 		Simple_vFAIL4("Invalid [] range \"%*.*s\"",
4168 			      RExC_parse - rangebegin,
4169 			      RExC_parse - rangebegin,
4170 			      rangebegin);
4171 		range = 0; /* not a valid range */
4172 	    }
4173 	}
4174 	else {
4175 	    prevvalue = value; /* save the beginning of the range */
4176 	    if (*RExC_parse == '-' && RExC_parse+1 < RExC_end &&
4177 		RExC_parse[1] != ']') {
4178 		RExC_parse++;
4179 
4180 		/* a bad range like \w-, [:word:]- ? */
4181 		if (namedclass > OOB_NAMEDCLASS) {
4182 		    if (ckWARN(WARN_REGEXP))
4183 			vWARN4(RExC_parse,
4184 			       "False [] range \"%*.*s\"",
4185 			       RExC_parse - rangebegin,
4186 			       RExC_parse - rangebegin,
4187 			       rangebegin);
4188 		    if (!SIZE_ONLY)
4189 			ANYOF_BITMAP_SET(ret, '-');
4190 		} else
4191 		    range = 1;	/* yeah, it's a range! */
4192 		continue;	/* but do it the next time */
4193 	    }
4194 	}
4195 
4196 	/* now is the next time */
4197 	if (!SIZE_ONLY) {
4198 	    IV i;
4199 
4200 	    if (prevvalue < 256) {
4201 	        const IV ceilvalue = value < 256 ? value : 255;
4202 
4203 #ifdef EBCDIC
4204 		/* In EBCDIC [\x89-\x91] should include
4205 		 * the \x8e but [i-j] should not. */
4206 		if (literal_endpoint == 2 &&
4207 		    ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
4208 		     (isUPPER(prevvalue) && isUPPER(ceilvalue))))
4209 		{
4210 		    if (isLOWER(prevvalue)) {
4211 			for (i = prevvalue; i <= ceilvalue; i++)
4212 			    if (isLOWER(i))
4213 				ANYOF_BITMAP_SET(ret, i);
4214 		    } else {
4215 			for (i = prevvalue; i <= ceilvalue; i++)
4216 			    if (isUPPER(i))
4217 				ANYOF_BITMAP_SET(ret, i);
4218 		    }
4219 		}
4220 		else
4221 #endif
4222 		      for (i = prevvalue; i <= ceilvalue; i++)
4223 			  ANYOF_BITMAP_SET(ret, i);
4224 	  }
4225 	  if (value > 255 || UTF) {
4226 	        const UV prevnatvalue  = NATIVE_TO_UNI(prevvalue);
4227 		const UV natvalue      = NATIVE_TO_UNI(value);
4228 
4229 		ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
4230 		if (prevnatvalue < natvalue) { /* what about > ? */
4231 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
4232 				   prevnatvalue, natvalue);
4233 		}
4234 		else if (prevnatvalue == natvalue) {
4235 		    Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", natvalue);
4236 		    if (FOLD) {
4237 			 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
4238 			 STRLEN foldlen;
4239 			 const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
4240 
4241 			 /* If folding and foldable and a single
4242 			  * character, insert also the folded version
4243 			  * to the charclass. */
4244 			 if (f != value) {
4245 			      if (foldlen == (STRLEN)UNISKIP(f))
4246 				  Perl_sv_catpvf(aTHX_ listsv,
4247 						 "%04"UVxf"\n", f);
4248 			      else {
4249 				  /* Any multicharacter foldings
4250 				   * require the following transform:
4251 				   * [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst)
4252 				   * where E folds into "pq" and F folds
4253 				   * into "rst", all other characters
4254 				   * fold to single characters.  We save
4255 				   * away these multicharacter foldings,
4256 				   * to be later saved as part of the
4257 				   * additional "s" data. */
4258 				  SV *sv;
4259 
4260 				  if (!unicode_alternate)
4261 				      unicode_alternate = newAV();
4262 				  sv = newSVpvn((char*)foldbuf, foldlen);
4263 				  SvUTF8_on(sv);
4264 				  av_push(unicode_alternate, sv);
4265 			      }
4266 			 }
4267 
4268 			 /* If folding and the value is one of the Greek
4269 			  * sigmas insert a few more sigmas to make the
4270 			  * folding rules of the sigmas to work right.
4271 			  * Note that not all the possible combinations
4272 			  * are handled here: some of them are handled
4273 			  * by the standard folding rules, and some of
4274 			  * them (literal or EXACTF cases) are handled
4275 			  * during runtime in regexec.c:S_find_byclass(). */
4276 			 if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
4277 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4278 					     (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
4279 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4280 					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
4281 			 }
4282 			 else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA)
4283 			      Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
4284 					     (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA);
4285 		    }
4286 		}
4287 	    }
4288 #ifdef EBCDIC
4289 	    literal_endpoint = 0;
4290 #endif
4291         }
4292 
4293 	range = 0; /* this range (if it was one) is done now */
4294     }
4295 
4296     if (need_class) {
4297 	ANYOF_FLAGS(ret) |= ANYOF_LARGE;
4298 	if (SIZE_ONLY)
4299 	    RExC_size += ANYOF_CLASS_ADD_SKIP;
4300 	else
4301 	    RExC_emit += ANYOF_CLASS_ADD_SKIP;
4302     }
4303 
4304     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
4305     if (!SIZE_ONLY &&
4306 	 /* If the only flag is folding (plus possibly inversion). */
4307 	((ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD)
4308        ) {
4309 	for (value = 0; value < 256; ++value) {
4310 	    if (ANYOF_BITMAP_TEST(ret, value)) {
4311 		UV fold = PL_fold[value];
4312 
4313 		if (fold != value)
4314 		    ANYOF_BITMAP_SET(ret, fold);
4315 	    }
4316 	}
4317 	ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
4318     }
4319 
4320     /* optimize inverted simple patterns (e.g. [^a-z]) */
4321     if (!SIZE_ONLY && optimize_invert &&
4322 	/* If the only flag is inversion. */
4323 	(ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) ==	ANYOF_INVERT) {
4324 	for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
4325 	    ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
4326 	ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
4327     }
4328 
4329     if (!SIZE_ONLY) {
4330 	AV *av = newAV();
4331 	SV *rv;
4332 
4333 	/* The 0th element stores the character class description
4334 	 * in its textual form: used later (regexec.c:Perl_regclass_swash())
4335 	 * to initialize the appropriate swash (which gets stored in
4336 	 * the 1st element), and also useful for dumping the regnode.
4337 	 * The 2nd element stores the multicharacter foldings,
4338 	 * used later (regexec.c:S_reginclass()). */
4339 	av_store(av, 0, listsv);
4340 	av_store(av, 1, NULL);
4341 	av_store(av, 2, (SV*)unicode_alternate);
4342 	rv = newRV_noinc((SV*)av);
4343 	n = add_data(pRExC_state, 1, "s");
4344 	RExC_rx->data->data[n] = (void*)rv;
4345 	ARG_SET(ret, n);
4346     }
4347 
4348     return ret;
4349 }
4350 
4351 STATIC char*
4352 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
4353 {
4354     char* retval = RExC_parse++;
4355 
4356     for (;;) {
4357 	if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
4358 		RExC_parse[2] == '#') {
4359 	    while (*RExC_parse != ')') {
4360 		if (RExC_parse == RExC_end)
4361 		    FAIL("Sequence (?#... not terminated");
4362 		RExC_parse++;
4363 	    }
4364 	    RExC_parse++;
4365 	    continue;
4366 	}
4367 	if (RExC_flags & PMf_EXTENDED) {
4368 	    if (isSPACE(*RExC_parse)) {
4369 		RExC_parse++;
4370 		continue;
4371 	    }
4372 	    else if (*RExC_parse == '#') {
4373 		while (RExC_parse < RExC_end)
4374 		    if (*RExC_parse++ == '\n') break;
4375 		continue;
4376 	    }
4377 	}
4378 	return retval;
4379     }
4380 }
4381 
4382 /*
4383 - reg_node - emit a node
4384 */
4385 STATIC regnode *			/* Location. */
4386 S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
4387 {
4388     register regnode *ptr;
4389     regnode * const ret = RExC_emit;
4390 
4391     if (SIZE_ONLY) {
4392 	SIZE_ALIGN(RExC_size);
4393 	RExC_size += 1;
4394 	return(ret);
4395     }
4396 
4397     NODE_ALIGN_FILL(ret);
4398     ptr = ret;
4399     FILL_ADVANCE_NODE(ptr, op);
4400     if (RExC_offsets) {         /* MJD */
4401 	MJD_OFFSET_DEBUG(("%s:%u: (op %s) %s %u <- %u (len %u) (max %u).\n",
4402               "reg_node", __LINE__,
4403               reg_name[op],
4404               RExC_emit - RExC_emit_start > RExC_offsets[0]
4405               ? "Overwriting end of array!\n" : "OK",
4406               RExC_emit - RExC_emit_start,
4407               RExC_parse - RExC_start,
4408               RExC_offsets[0]));
4409 	Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
4410     }
4411 
4412     RExC_emit = ptr;
4413 
4414     return(ret);
4415 }
4416 
4417 /*
4418 - reganode - emit a node with an argument
4419 */
4420 STATIC regnode *			/* Location. */
4421 S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
4422 {
4423     register regnode *ptr;
4424     regnode * const ret = RExC_emit;
4425 
4426     if (SIZE_ONLY) {
4427 	SIZE_ALIGN(RExC_size);
4428 	RExC_size += 2;
4429 	return(ret);
4430     }
4431 
4432     NODE_ALIGN_FILL(ret);
4433     ptr = ret;
4434     FILL_ADVANCE_NODE_ARG(ptr, op, arg);
4435     if (RExC_offsets) {         /* MJD */
4436 	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %u <- %u (max %u).\n",
4437               "reganode",
4438 	      __LINE__,
4439 	      reg_name[op],
4440               RExC_emit - RExC_emit_start > RExC_offsets[0] ?
4441               "Overwriting end of array!\n" : "OK",
4442               RExC_emit - RExC_emit_start,
4443               RExC_parse - RExC_start,
4444               RExC_offsets[0]));
4445 	Set_Cur_Node_Offset;
4446     }
4447 
4448     RExC_emit = ptr;
4449 
4450     return(ret);
4451 }
4452 
4453 /*
4454 - reguni - emit (if appropriate) a Unicode character
4455 */
4456 STATIC void
4457 S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
4458 {
4459     *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
4460 }
4461 
4462 /*
4463 - reginsert - insert an operator in front of already-emitted operand
4464 *
4465 * Means relocating the operand.
4466 */
4467 STATIC void
4468 S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd)
4469 {
4470     register regnode *src;
4471     register regnode *dst;
4472     register regnode *place;
4473     const int offset = regarglen[(U8)op];
4474 
4475 /* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
4476 
4477     if (SIZE_ONLY) {
4478 	RExC_size += NODE_STEP_REGNODE + offset;
4479 	return;
4480     }
4481 
4482     src = RExC_emit;
4483     RExC_emit += NODE_STEP_REGNODE + offset;
4484     dst = RExC_emit;
4485     while (src > opnd) {
4486 	StructCopy(--src, --dst, regnode);
4487         if (RExC_offsets) {     /* MJD 20010112 */
4488 	    MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s copy %u -> %u (max %u).\n",
4489                   "reg_insert",
4490 		  __LINE__,
4491 		  reg_name[op],
4492                   dst - RExC_emit_start > RExC_offsets[0]
4493                   ? "Overwriting end of array!\n" : "OK",
4494                   src - RExC_emit_start,
4495                   dst - RExC_emit_start,
4496                   RExC_offsets[0]));
4497 	    Set_Node_Offset_To_R(dst-RExC_emit_start, Node_Offset(src));
4498 	    Set_Node_Length_To_R(dst-RExC_emit_start, Node_Length(src));
4499         }
4500     }
4501 
4502 
4503     place = opnd;		/* Op node, where operand used to be. */
4504     if (RExC_offsets) {         /* MJD */
4505 	MJD_OFFSET_DEBUG(("%s(%d): (op %s) %s %u <- %u (max %u).\n",
4506               "reginsert",
4507 	      __LINE__,
4508 	      reg_name[op],
4509               place - RExC_emit_start > RExC_offsets[0]
4510               ? "Overwriting end of array!\n" : "OK",
4511               place - RExC_emit_start,
4512               RExC_parse - RExC_start,
4513               RExC_offsets[0]));
4514 	Set_Node_Offset(place, RExC_parse);
4515 	Set_Node_Length(place, 1);
4516     }
4517     src = NEXTOPER(place);
4518     FILL_ADVANCE_NODE(place, op);
4519     Zero(src, offset, regnode);
4520 }
4521 
4522 /*
4523 - regtail - set the next-pointer at the end of a node chain of p to val.
4524 */
4525 STATIC void
4526 S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
4527 {
4528     register regnode *scan;
4529 
4530     if (SIZE_ONLY)
4531 	return;
4532 
4533     /* Find last node. */
4534     scan = p;
4535     for (;;) {
4536 	regnode * const temp = regnext(scan);
4537 	if (temp == NULL)
4538 	    break;
4539 	scan = temp;
4540     }
4541 
4542     if (reg_off_by_arg[OP(scan)]) {
4543 	ARG_SET(scan, val - scan);
4544     }
4545     else {
4546 	NEXT_OFF(scan) = val - scan;
4547     }
4548 }
4549 
4550 /*
4551 - regoptail - regtail on operand of first argument; nop if operandless
4552 */
4553 STATIC void
4554 S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
4555 {
4556     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
4557     if (p == NULL || SIZE_ONLY)
4558 	return;
4559     if (PL_regkind[(U8)OP(p)] == BRANCH) {
4560 	regtail(pRExC_state, NEXTOPER(p), val);
4561     }
4562     else if ( PL_regkind[(U8)OP(p)] == BRANCHJ) {
4563 	regtail(pRExC_state, NEXTOPER(NEXTOPER(p)), val);
4564     }
4565     else
4566 	return;
4567 }
4568 
4569 /*
4570  - regcurly - a little FSA that accepts {\d+,?\d*}
4571  */
4572 STATIC I32
4573 S_regcurly(pTHX_ register const char *s)
4574 {
4575     if (*s++ != '{')
4576 	return FALSE;
4577     if (!isDIGIT(*s))
4578 	return FALSE;
4579     while (isDIGIT(*s))
4580 	s++;
4581     if (*s == ',')
4582 	s++;
4583     while (isDIGIT(*s))
4584 	s++;
4585     if (*s != '}')
4586 	return FALSE;
4587     return TRUE;
4588 }
4589 
4590 /*
4591  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
4592  */
4593 void
4594 Perl_regdump(pTHX_ regexp *r)
4595 {
4596 #ifdef DEBUGGING
4597     SV *sv = sv_newmortal();
4598 
4599     (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
4600 
4601     /* Header fields of interest. */
4602     if (r->anchored_substr)
4603 	PerlIO_printf(Perl_debug_log,
4604 		      "anchored \"%s%.*s%s\"%s at %"IVdf" ",
4605 		      PL_colors[0],
4606 		      (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
4607 		      SvPVX_const(r->anchored_substr),
4608 		      PL_colors[1],
4609 		      SvTAIL(r->anchored_substr) ? "$" : "",
4610 		      (IV)r->anchored_offset);
4611     else if (r->anchored_utf8)
4612 	PerlIO_printf(Perl_debug_log,
4613 		      "anchored utf8 \"%s%.*s%s\"%s at %"IVdf" ",
4614 		      PL_colors[0],
4615 		      (int)(SvCUR(r->anchored_utf8) - (SvTAIL(r->anchored_utf8)!=0)),
4616 		      SvPVX_const(r->anchored_utf8),
4617 		      PL_colors[1],
4618 		      SvTAIL(r->anchored_utf8) ? "$" : "",
4619 		      (IV)r->anchored_offset);
4620     if (r->float_substr)
4621 	PerlIO_printf(Perl_debug_log,
4622 		      "floating \"%s%.*s%s\"%s at %"IVdf"..%"UVuf" ",
4623 		      PL_colors[0],
4624 		      (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
4625 		      SvPVX_const(r->float_substr),
4626 		      PL_colors[1],
4627 		      SvTAIL(r->float_substr) ? "$" : "",
4628 		      (IV)r->float_min_offset, (UV)r->float_max_offset);
4629     else if (r->float_utf8)
4630 	PerlIO_printf(Perl_debug_log,
4631 		      "floating utf8 \"%s%.*s%s\"%s at %"IVdf"..%"UVuf" ",
4632 		      PL_colors[0],
4633 		      (int)(SvCUR(r->float_utf8) - (SvTAIL(r->float_utf8)!=0)),
4634 		      SvPVX_const(r->float_utf8),
4635 		      PL_colors[1],
4636 		      SvTAIL(r->float_utf8) ? "$" : "",
4637 		      (IV)r->float_min_offset, (UV)r->float_max_offset);
4638     if (r->check_substr || r->check_utf8)
4639 	PerlIO_printf(Perl_debug_log,
4640 		      r->check_substr == r->float_substr
4641 		      && r->check_utf8 == r->float_utf8
4642 		      ? "(checking floating" : "(checking anchored");
4643     if (r->reganch & ROPT_NOSCAN)
4644 	PerlIO_printf(Perl_debug_log, " noscan");
4645     if (r->reganch & ROPT_CHECK_ALL)
4646 	PerlIO_printf(Perl_debug_log, " isall");
4647     if (r->check_substr || r->check_utf8)
4648 	PerlIO_printf(Perl_debug_log, ") ");
4649 
4650     if (r->regstclass) {
4651 	regprop(sv, r->regstclass);
4652 	PerlIO_printf(Perl_debug_log, "stclass \"%s\" ", SvPVX_const(sv));
4653     }
4654     if (r->reganch & ROPT_ANCH) {
4655 	PerlIO_printf(Perl_debug_log, "anchored");
4656 	if (r->reganch & ROPT_ANCH_BOL)
4657 	    PerlIO_printf(Perl_debug_log, "(BOL)");
4658 	if (r->reganch & ROPT_ANCH_MBOL)
4659 	    PerlIO_printf(Perl_debug_log, "(MBOL)");
4660 	if (r->reganch & ROPT_ANCH_SBOL)
4661 	    PerlIO_printf(Perl_debug_log, "(SBOL)");
4662 	if (r->reganch & ROPT_ANCH_GPOS)
4663 	    PerlIO_printf(Perl_debug_log, "(GPOS)");
4664 	PerlIO_putc(Perl_debug_log, ' ');
4665     }
4666     if (r->reganch & ROPT_GPOS_SEEN)
4667 	PerlIO_printf(Perl_debug_log, "GPOS ");
4668     if (r->reganch & ROPT_SKIP)
4669 	PerlIO_printf(Perl_debug_log, "plus ");
4670     if (r->reganch & ROPT_IMPLICIT)
4671 	PerlIO_printf(Perl_debug_log, "implicit ");
4672     PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
4673     if (r->reganch & ROPT_EVAL_SEEN)
4674 	PerlIO_printf(Perl_debug_log, "with eval ");
4675     PerlIO_printf(Perl_debug_log, "\n");
4676     if (r->offsets) {
4677       U32 i;
4678       const U32 len = r->offsets[0];
4679       PerlIO_printf(Perl_debug_log, "Offsets: [%"UVuf"]\n\t", (UV)r->offsets[0]);
4680       for (i = 1; i <= len; i++)
4681         PerlIO_printf(Perl_debug_log, "%"UVuf"[%"UVuf"] ",
4682                       (UV)r->offsets[i*2-1],
4683                       (UV)r->offsets[i*2]);
4684       PerlIO_printf(Perl_debug_log, "\n");
4685     }
4686 #endif	/* DEBUGGING */
4687 }
4688 
4689 /*
4690 - regprop - printable representation of opcode
4691 */
4692 void
4693 Perl_regprop(pTHX_ SV *sv, regnode *o)
4694 {
4695 #ifdef DEBUGGING
4696     register int k;
4697 
4698     sv_setpvn(sv, "", 0);
4699     if (OP(o) >= reg_num)		/* regnode.type is unsigned */
4700 	/* It would be nice to FAIL() here, but this may be called from
4701 	   regexec.c, and it would be hard to supply pRExC_state. */
4702 	Perl_croak(aTHX_ "Corrupted regexp opcode");
4703     sv_catpv(sv, reg_name[OP(o)]); /* Take off const! */
4704 
4705     k = PL_regkind[(U8)OP(o)];
4706 
4707     if (k == EXACT) {
4708 	SV * const dsv = sv_2mortal(newSVpvn("", 0));
4709 	/* Using is_utf8_string() is a crude hack but it may
4710 	 * be the best for now since we have no flag "this EXACTish
4711 	 * node was UTF-8" --jhi */
4712 	const bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o));
4713 	const char * const s = do_utf8 ?
4714 	  pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60,
4715 			 UNI_DISPLAY_REGEX) :
4716 	  STRING(o);
4717 	const int len = do_utf8 ?
4718 	  strlen(s) :
4719 	  STR_LEN(o);
4720 	Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>",
4721 		       PL_colors[0],
4722 		       len, s,
4723 		       PL_colors[1]);
4724     }
4725     else if (k == CURLY) {
4726 	if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
4727 	    Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
4728 	Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
4729     }
4730     else if (k == WHILEM && o->flags)			/* Ordinal/of */
4731 	Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
4732     else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
4733 	Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));	/* Parenth number */
4734     else if (k == LOGICAL)
4735 	Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);	/* 2: embedded, otherwise 1 */
4736     else if (k == ANYOF) {
4737 	int i, rangestart = -1;
4738 	const U8 flags = ANYOF_FLAGS(o);
4739 	const char * const anyofs[] = {	/* Should be synchronized with
4740 					 * ANYOF_ #xdefines in regcomp.h */
4741 	    "\\w",
4742 	    "\\W",
4743 	    "\\s",
4744 	    "\\S",
4745 	    "\\d",
4746 	    "\\D",
4747 	    "[:alnum:]",
4748 	    "[:^alnum:]",
4749 	    "[:alpha:]",
4750 	    "[:^alpha:]",
4751 	    "[:ascii:]",
4752 	    "[:^ascii:]",
4753 	    "[:ctrl:]",
4754 	    "[:^ctrl:]",
4755 	    "[:graph:]",
4756 	    "[:^graph:]",
4757 	    "[:lower:]",
4758 	    "[:^lower:]",
4759 	    "[:print:]",
4760 	    "[:^print:]",
4761 	    "[:punct:]",
4762 	    "[:^punct:]",
4763 	    "[:upper:]",
4764 	    "[:^upper:]",
4765 	    "[:xdigit:]",
4766 	    "[:^xdigit:]",
4767 	    "[:space:]",
4768 	    "[:^space:]",
4769 	    "[:blank:]",
4770 	    "[:^blank:]"
4771 	};
4772 
4773 	if (flags & ANYOF_LOCALE)
4774 	    sv_catpv(sv, "{loc}");
4775 	if (flags & ANYOF_FOLD)
4776 	    sv_catpv(sv, "{i}");
4777 	Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
4778 	if (flags & ANYOF_INVERT)
4779 	    sv_catpv(sv, "^");
4780 	for (i = 0; i <= 256; i++) {
4781 	    if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
4782 		if (rangestart == -1)
4783 		    rangestart = i;
4784 	    } else if (rangestart != -1) {
4785 		if (i <= rangestart + 3)
4786 		    for (; rangestart < i; rangestart++)
4787 			put_byte(sv, rangestart);
4788 		else {
4789 		    put_byte(sv, rangestart);
4790 		    sv_catpv(sv, "-");
4791 		    put_byte(sv, i - 1);
4792 		}
4793 		rangestart = -1;
4794 	    }
4795 	}
4796 
4797 	if (o->flags & ANYOF_CLASS)
4798 	    for (i = 0; i < sizeof(anyofs)/sizeof(char*); i++)
4799 		if (ANYOF_CLASS_TEST(o,i))
4800 		    sv_catpv(sv, anyofs[i]);
4801 
4802 	if (flags & ANYOF_UNICODE)
4803 	    sv_catpv(sv, "{unicode}");
4804 	else if (flags & ANYOF_UNICODE_ALL)
4805 	    sv_catpv(sv, "{unicode_all}");
4806 
4807 	{
4808 	    SV *lv;
4809 	    SV * const sw = regclass_swash(o, FALSE, &lv, 0);
4810 
4811 	    if (lv) {
4812 		if (sw) {
4813 		    U8 s[UTF8_MAXBYTES_CASE+1];
4814 
4815 		    for (i = 0; i <= 256; i++) { /* just the first 256 */
4816 			uvchr_to_utf8(s, i);
4817 
4818 			if (i < 256 && swash_fetch(sw, s, TRUE)) {
4819 			    if (rangestart == -1)
4820 				rangestart = i;
4821 			} else if (rangestart != -1) {
4822 			    if (i <= rangestart + 3)
4823 				for (; rangestart < i; rangestart++) {
4824 				    const U8 * const e = uvchr_to_utf8(s,rangestart);
4825 				    U8 *p;
4826 				    for(p = s; p < e; p++)
4827 					put_byte(sv, *p);
4828 				}
4829 			    else {
4830 				const U8 *e = uvchr_to_utf8(s,rangestart);
4831 				U8 *p;
4832 				for (p = s; p < e; p++)
4833 				    put_byte(sv, *p);
4834 				sv_catpvn(sv, "-", 1);
4835 				e = uvchr_to_utf8(s, i-1);
4836 				for (p = s; p < e; p++)
4837 				    put_byte(sv, *p);
4838 				}
4839 				rangestart = -1;
4840 			    }
4841 			}
4842 
4843 		    sv_catpv(sv, "..."); /* et cetera */
4844 		}
4845 
4846 		{
4847 		    char *s = savesvpv(lv);
4848 		    char *origs = s;
4849 
4850 		    while(*s && *s != '\n') s++;
4851 
4852 		    if (*s == '\n') {
4853 			const char * const t = ++s;
4854 
4855 			while (*s) {
4856 			    if (*s == '\n')
4857 				*s = ' ';
4858 			    s++;
4859 			}
4860 			if (s[-1] == ' ')
4861 			    s[-1] = 0;
4862 
4863 			sv_catpv(sv, t);
4864 		    }
4865 
4866 		    Safefree(origs);
4867 		}
4868 	    }
4869 	}
4870 
4871 	Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
4872     }
4873     else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
4874 	Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
4875 #endif	/* DEBUGGING */
4876 }
4877 
4878 SV *
4879 Perl_re_intuit_string(pTHX_ regexp *prog)
4880 {				/* Assume that RE_INTUIT is set */
4881     DEBUG_r(
4882 	{
4883 	    const char * const s = SvPV_nolen_const(prog->check_substr
4884 		      ? prog->check_substr : prog->check_utf8);
4885 
4886 	    if (!PL_colorset) reginitcolors();
4887 	    PerlIO_printf(Perl_debug_log,
4888 		      "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
4889 		      PL_colors[4],
4890 		      prog->check_substr ? "" : "utf8 ",
4891 		      PL_colors[5],PL_colors[0],
4892 		      s,
4893 		      PL_colors[1],
4894 		      (strlen(s) > 60 ? "..." : ""));
4895 	} );
4896 
4897     return prog->check_substr ? prog->check_substr : prog->check_utf8;
4898 }
4899 
4900 void
4901 Perl_pregfree(pTHX_ struct regexp *r)
4902 {
4903 #ifdef DEBUGGING
4904     SV *dsv = PERL_DEBUG_PAD_ZERO(0);
4905 #endif
4906 
4907     if (!r || (--r->refcnt > 0))
4908 	return;
4909     DEBUG_r({
4910         const char *s = (r->reganch & ROPT_UTF8)
4911             ? pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60, UNI_DISPLAY_REGEX)
4912             : pv_display(dsv, r->precomp, r->prelen, 0, 60);
4913         const int len = SvCUR(dsv);
4914 	 if (!PL_colorset)
4915 	      reginitcolors();
4916 	 PerlIO_printf(Perl_debug_log,
4917 		       "%sFreeing REx:%s `%s%*.*s%s%s'\n",
4918 		       PL_colors[4],PL_colors[5],PL_colors[0],
4919 		       len, len, s,
4920 		       PL_colors[1],
4921 		       len > 60 ? "..." : "");
4922     });
4923 
4924     /* gcov results gave these as non-null 100% of the time, so there's no
4925        optimisation in checking them before calling Safefree  */
4926     Safefree(r->precomp);
4927     Safefree(r->offsets);             /* 20010421 MJD */
4928     if (RX_MATCH_COPIED(r))
4929 	Safefree(r->subbeg);
4930     if (r->substrs) {
4931 	if (r->anchored_substr)
4932 	    SvREFCNT_dec(r->anchored_substr);
4933 	if (r->anchored_utf8)
4934 	    SvREFCNT_dec(r->anchored_utf8);
4935 	if (r->float_substr)
4936 	    SvREFCNT_dec(r->float_substr);
4937 	if (r->float_utf8)
4938 	    SvREFCNT_dec(r->float_utf8);
4939 	Safefree(r->substrs);
4940     }
4941     if (r->data) {
4942 	int n = r->data->count;
4943 	PAD* new_comppad = NULL;
4944 	PAD* old_comppad;
4945 	PADOFFSET refcnt;
4946 
4947 	while (--n >= 0) {
4948           /* If you add a ->what type here, update the comment in regcomp.h */
4949 	    switch (r->data->what[n]) {
4950 	    case 's':
4951 		SvREFCNT_dec((SV*)r->data->data[n]);
4952 		break;
4953 	    case 'f':
4954 		Safefree(r->data->data[n]);
4955 		break;
4956 	    case 'p':
4957 		new_comppad = (AV*)r->data->data[n];
4958 		break;
4959 	    case 'o':
4960 		if (new_comppad == NULL)
4961 		    Perl_croak(aTHX_ "panic: pregfree comppad");
4962 		PAD_SAVE_LOCAL(old_comppad,
4963 		    /* Watch out for global destruction's random ordering. */
4964 		    (SvTYPE(new_comppad) == SVt_PVAV) ?
4965 		    		new_comppad : Null(PAD *)
4966 		);
4967 		OP_REFCNT_LOCK;
4968 		refcnt = OpREFCNT_dec((OP_4tree*)r->data->data[n]);
4969 		OP_REFCNT_UNLOCK;
4970 		if (!refcnt)
4971                     op_free((OP_4tree*)r->data->data[n]);
4972 
4973 		PAD_RESTORE_LOCAL(old_comppad);
4974 		SvREFCNT_dec((SV*)new_comppad);
4975 		new_comppad = NULL;
4976 		break;
4977 	    case 'n':
4978 	        break;
4979 	    default:
4980 		Perl_croak(aTHX_ "panic: regfree data code '%c'", r->data->what[n]);
4981 	    }
4982 	}
4983 	Safefree(r->data->what);
4984 	Safefree(r->data);
4985     }
4986     Safefree(r->startp);
4987     Safefree(r->endp);
4988     Safefree(r);
4989 }
4990 
4991 /*
4992  - regnext - dig the "next" pointer out of a node
4993  *
4994  * [Note, when REGALIGN is defined there are two places in regmatch()
4995  * that bypass this code for speed.]
4996  */
4997 regnode *
4998 Perl_regnext(pTHX_ register regnode *p)
4999 {
5000     register I32 offset;
5001 
5002     if (p == &PL_regdummy)
5003 	return(NULL);
5004 
5005     offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
5006     if (offset == 0)
5007 	return(NULL);
5008 
5009     return(p+offset);
5010 }
5011 
5012 STATIC void
5013 S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
5014 {
5015     va_list args;
5016     STRLEN l1 = strlen(pat1);
5017     STRLEN l2 = strlen(pat2);
5018     char buf[512];
5019     SV *msv;
5020     const char *message;
5021 
5022     if (l1 > 510)
5023 	l1 = 510;
5024     if (l1 + l2 > 510)
5025 	l2 = 510 - l1;
5026     Copy(pat1, buf, l1 , char);
5027     Copy(pat2, buf + l1, l2 , char);
5028     buf[l1 + l2] = '\n';
5029     buf[l1 + l2 + 1] = '\0';
5030 #ifdef I_STDARG
5031     /* ANSI variant takes additional second argument */
5032     va_start(args, pat2);
5033 #else
5034     va_start(args);
5035 #endif
5036     msv = vmess(buf, &args);
5037     va_end(args);
5038     message = SvPV_const(msv,l1);
5039     if (l1 > 512)
5040 	l1 = 512;
5041     Copy(message, buf, l1 , char);
5042     buf[l1-1] = '\0';			/* Overwrite \n */
5043     Perl_croak(aTHX_ "%s", buf);
5044 }
5045 
5046 /* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
5047 
5048 void
5049 Perl_save_re_context(pTHX)
5050 {
5051     SAVEI32(PL_reg_flags);		/* from regexec.c */
5052     SAVEPPTR(PL_bostr);
5053     SAVEPPTR(PL_reginput);		/* String-input pointer. */
5054     SAVEPPTR(PL_regbol);		/* Beginning of input, for ^ check. */
5055     SAVEPPTR(PL_regeol);		/* End of input, for $ check. */
5056     SAVEVPTR(PL_regstartp);		/* Pointer to startp array. */
5057     SAVEVPTR(PL_regendp);		/* Ditto for endp. */
5058     SAVEVPTR(PL_reglastparen);		/* Similarly for lastparen. */
5059     SAVEVPTR(PL_reglastcloseparen);	/* Similarly for lastcloseparen. */
5060     SAVEPPTR(PL_regtill);		/* How far we are required to go. */
5061     SAVEGENERICPV(PL_reg_start_tmp);		/* from regexec.c */
5062     PL_reg_start_tmp = 0;
5063     SAVEI32(PL_reg_start_tmpl);		/* from regexec.c */
5064     PL_reg_start_tmpl = 0;
5065     SAVEVPTR(PL_regdata);
5066     SAVEI32(PL_reg_eval_set);		/* from regexec.c */
5067     SAVEI32(PL_regnarrate);		/* from regexec.c */
5068     SAVEVPTR(PL_regprogram);		/* from regexec.c */
5069     SAVEINT(PL_regindent);		/* from regexec.c */
5070     SAVEVPTR(PL_regcc);			/* from regexec.c */
5071     SAVEVPTR(PL_curcop);
5072     SAVEVPTR(PL_reg_call_cc);		/* from regexec.c */
5073     SAVEVPTR(PL_reg_re);		/* from regexec.c */
5074     SAVEPPTR(PL_reg_ganch);		/* from regexec.c */
5075     SAVESPTR(PL_reg_sv);		/* from regexec.c */
5076     SAVEBOOL(PL_reg_match_utf8);	/* from regexec.c */
5077     SAVEVPTR(PL_reg_magic);		/* from regexec.c */
5078     SAVEI32(PL_reg_oldpos);			/* from regexec.c */
5079     SAVEVPTR(PL_reg_oldcurpm);		/* from regexec.c */
5080     SAVEVPTR(PL_reg_curpm);		/* from regexec.c */
5081     SAVEPPTR(PL_reg_oldsaved);		/* old saved substr during match */
5082     PL_reg_oldsaved = Nullch;
5083     SAVEI32(PL_reg_oldsavedlen);	/* old length of saved substr during match */
5084     PL_reg_oldsavedlen = 0;
5085     SAVEI32(PL_reg_maxiter);		/* max wait until caching pos */
5086     PL_reg_maxiter = 0;
5087     SAVEI32(PL_reg_leftiter);		/* wait until caching pos */
5088     PL_reg_leftiter = 0;
5089     SAVEGENERICPV(PL_reg_poscache);	/* cache of pos of WHILEM */
5090     PL_reg_poscache = Nullch;
5091     SAVEI32(PL_reg_poscache_size);	/* size of pos cache of WHILEM */
5092     PL_reg_poscache_size = 0;
5093     SAVEPPTR(PL_regprecomp);		/* uncompiled string. */
5094     SAVEI32(PL_regnpar);		/* () count. */
5095     SAVEI32(PL_regsize);		/* from regexec.c */
5096 
5097     {
5098 	/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
5099 	REGEXP *rx;
5100 
5101 	if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
5102 	    U32 i;
5103 	    for (i = 1; i <= rx->nparens; i++) {
5104 		GV *mgv;
5105 		char digits[TYPE_CHARS(long)];
5106 		sprintf(digits, "%lu", (long)i);
5107 		if ((mgv = gv_fetchpv(digits, FALSE, SVt_PV)))
5108 		    save_scalar(mgv);
5109 	    }
5110 	}
5111     }
5112 
5113 #ifdef DEBUGGING
5114     SAVEPPTR(PL_reg_starttry);		/* from regexec.c */
5115 #endif
5116 }
5117 
5118 static void
5119 clear_re(pTHX_ void *r)
5120 {
5121     ReREFCNT_dec((regexp *)r);
5122 }
5123 
5124 #ifdef DEBUGGING
5125 
5126 STATIC void
5127 S_put_byte(pTHX_ SV *sv, int c)
5128 {
5129     if (isCNTRL(c) || c == 255 || !isPRINT(c))
5130 	Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
5131     else if (c == '-' || c == ']' || c == '\\' || c == '^')
5132 	Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
5133     else
5134 	Perl_sv_catpvf(aTHX_ sv, "%c", c);
5135 }
5136 
5137 
5138 STATIC regnode *
5139 S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
5140 {
5141     register U8 op = EXACT;	/* Arbitrary non-END op. */
5142     register regnode *next;
5143 
5144     while (op != END && (!last || node < last)) {
5145 	/* While that wasn't END last time... */
5146 
5147 	NODE_ALIGN(node);
5148 	op = OP(node);
5149 	if (op == CLOSE)
5150 	    l--;
5151 	next = regnext(node);
5152 	/* Where, what. */
5153 	if (OP(node) == OPTIMIZED)
5154 	    goto after_print;
5155 	regprop(sv, node);
5156 	PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
5157 		      (int)(2*l + 1), "", SvPVX_const(sv));
5158 	if (next == NULL)		/* Next ptr. */
5159 	    PerlIO_printf(Perl_debug_log, "(0)");
5160 	else
5161 	    PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
5162 	(void)PerlIO_putc(Perl_debug_log, '\n');
5163       after_print:
5164 	if (PL_regkind[(U8)op] == BRANCHJ) {
5165 	    register regnode *nnode = (OP(next) == LONGJMP
5166 				       ? regnext(next)
5167 				       : next);
5168 	    if (last && nnode > last)
5169 		nnode = last;
5170 	    node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
5171 	}
5172 	else if (PL_regkind[(U8)op] == BRANCH) {
5173 	    node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
5174 	}
5175 	else if ( op == CURLY) {   /* "next" might be very big: optimizer */
5176 	    node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
5177 			     NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
5178 	}
5179 	else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
5180 	    node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
5181 			     next, sv, l + 1);
5182 	}
5183 	else if ( op == PLUS || op == STAR) {
5184 	    node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
5185 	}
5186 	else if (op == ANYOF) {
5187 	    /* arglen 1 + class block */
5188 	    node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
5189 		    ? ANYOF_CLASS_SKIP : ANYOF_SKIP);
5190 	    node = NEXTOPER(node);
5191 	}
5192 	else if (PL_regkind[(U8)op] == EXACT) {
5193             /* Literal string, where present. */
5194 	    node += NODE_SZ_STR(node) - 1;
5195 	    node = NEXTOPER(node);
5196 	}
5197 	else {
5198 	    node = NEXTOPER(node);
5199 	    node += regarglen[(U8)op];
5200 	}
5201 	if (op == CURLYX || op == OPEN)
5202 	    l++;
5203 	else if (op == WHILEM)
5204 	    l--;
5205     }
5206     return node;
5207 }
5208 
5209 #endif	/* DEBUGGING */
5210 
5211 /*
5212  * Local variables:
5213  * c-indentation-style: bsd
5214  * c-basic-offset: 4
5215  * indent-tabs-mode: t
5216  * End:
5217  *
5218  * ex: set ts=8 sts=4 sw=4 noet:
5219  */
5220