xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision 48950c12d106c85f315112191a0228d7b83b9510)
1 /*    regexec.c
2  */
3 
4 /*
5  * 	One Ring to rule them all, One Ring to find them
6  &
7  *     [p.v of _The Lord of the Rings_, opening poem]
8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
10  */
11 
12 /* This file contains functions for executing a regular expression.  See
13  * also regcomp.c which funnily enough, contains functions for compiling
14  * a regular expression.
15  *
16  * This file is also copied at build time to ext/re/re_exec.c, where
17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
18  * This causes the main functions to be compiled under new names and with
19  * debugging support added, which makes "use re 'debug'" work.
20  */
21 
22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
23  * confused with the original package (see point 3 below).  Thanks, Henry!
24  */
25 
26 /* Additional note: this code is very heavily munged from Henry's version
27  * in places.  In some spots I've traded clarity for efficiency, so don't
28  * blame Henry for some of the lack of readability.
29  */
30 
31 /* The names of the functions have been changed from regcomp and
32  * regexec to  pregcomp and pregexec in order to avoid conflicts
33  * with the POSIX routines of the same names.
34 */
35 
36 #ifdef PERL_EXT_RE_BUILD
37 #include "re_top.h"
38 #endif
39 
40 /*
41  * pregcomp and pregexec -- regsub and regerror are not used in perl
42  *
43  *	Copyright (c) 1986 by University of Toronto.
44  *	Written by Henry Spencer.  Not derived from licensed software.
45  *
46  *	Permission is granted to anyone to use this software for any
47  *	purpose on any computer system, and to redistribute it freely,
48  *	subject to the following restrictions:
49  *
50  *	1. The author is not responsible for the consequences of use of
51  *		this software, no matter how awful, even if they arise
52  *		from defects in it.
53  *
54  *	2. The origin of this software must not be misrepresented, either
55  *		by explicit claim or by omission.
56  *
57  *	3. Altered versions must be plainly marked as such, and must not
58  *		be misrepresented as being the original software.
59  *
60  ****    Alterations to Henry's code are...
61  ****
62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
64  ****    by Larry Wall and others
65  ****
66  ****    You may distribute under the terms of either the GNU General Public
67  ****    License or the Artistic License, as specified in the README file.
68  *
69  * Beware that some of this code is subtly aware of the way operator
70  * precedence is structured in regular expressions.  Serious changes in
71  * regular-expression syntax might require a total rethink.
72  */
73 #include "EXTERN.h"
74 #define PERL_IN_REGEXEC_C
75 #include "perl.h"
76 
77 #ifdef PERL_IN_XSUB_RE
78 #  include "re_comp.h"
79 #else
80 #  include "regcomp.h"
81 #endif
82 
83 #define RF_tainted	1	/* tainted information used? e.g. locale */
84 #define RF_warned	2		/* warned about big count? */
85 
86 #define RF_utf8		8		/* Pattern contains multibyte chars? */
87 
88 #define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
89 
90 #define RS_init		1		/* eval environment created */
91 #define RS_set		2		/* replsv value is set */
92 
93 #ifndef STATIC
94 #define	STATIC	static
95 #endif
96 
97 /* Valid for non-utf8 strings, non-ANYOFV nodes only: avoids the reginclass
98  * call if there are no complications: i.e., if everything matchable is
99  * straight forward in the bitmap */
100 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0)   \
101 					      : ANYOF_BITMAP_TEST(p,*(c)))
102 
103 /*
104  * Forwards.
105  */
106 
107 #define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
108 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
109 
110 #define HOPc(pos,off) \
111 	(char *)(PL_reg_match_utf8 \
112 	    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
113 	    : (U8*)(pos + off))
114 #define HOPBACKc(pos, off) \
115 	(char*)(PL_reg_match_utf8\
116 	    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
117 	    : (pos - off >= PL_bostr)		\
118 		? (U8*)pos - off		\
119 		: NULL)
120 
121 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
122 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
123 
124 /* these are unrolled below in the CCC_TRY_XXX defined */
125 #ifdef EBCDIC
126     /* Often 'str' is a hard-coded utf8 string instead of utfebcdic. so just
127      * skip the check on EBCDIC platforms */
128 #   define LOAD_UTF8_CHARCLASS(class,str) LOAD_UTF8_CHARCLASS_NO_CHECK(class)
129 #else
130 #   define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
131     if (!CAT2(PL_utf8_,class)) { \
132 	bool ok; \
133 	ENTER; save_re_context(); \
134 	ok=CAT2(is_utf8_,class)((const U8*)str); \
135 	assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
136 #endif
137 
138 /* Doesn't do an assert to verify that is correct */
139 #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
140     if (!CAT2(PL_utf8_,class)) { \
141 	bool throw_away PERL_UNUSED_DECL; \
142 	ENTER; save_re_context(); \
143 	throw_away = CAT2(is_utf8_,class)((const U8*)" "); \
144 	LEAVE; } } STMT_END
145 
146 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
147 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
148 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
149 
150 #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
151 	LOAD_UTF8_CHARCLASS(X_begin, " ");                                  \
152 	LOAD_UTF8_CHARCLASS(X_non_hangul, "A");                             \
153 	/* These are utf8 constants, and not utf-ebcdic constants, so the   \
154 	    * assert should likely and hopefully fail on an EBCDIC machine */ \
155 	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */             \
156 									    \
157 	/* No asserts are done for these, in case called on an early        \
158 	    * Unicode version in which they map to nothing */               \
159 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
160 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L);	    /* U+1100 "\xe1\x84\x80" */ \
161 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV);     /* U+AC00 "\xea\xb0\x80" */ \
162 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT);    /* U+AC01 "\xea\xb0\x81" */ \
163 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
164 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T);      /* U+11A8 "\xe1\x86\xa8" */ \
165 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V)       /* U+1160 "\xe1\x85\xa0" */
166 
167 #define PLACEHOLDER	/* Something for the preprocessor to grab onto */
168 
169 /* The actual code for CCC_TRY, which uses several variables from the routine
170  * it's callable from.  It is designed to be the bulk of a case statement.
171  * FUNC is the macro or function to call on non-utf8 targets that indicate if
172  *      nextchr matches the class.
173  * UTF8_TEST is the whole test string to use for utf8 targets
174  * LOAD is what to use to test, and if not present to load in the swash for the
175  *	class
176  * POS_OR_NEG is either empty or ! to complement the results of FUNC or
177  *	UTF8_TEST test.
178  * The logic is: Fail if we're at the end-of-string; otherwise if the target is
179  * utf8 and a variant, load the swash if necessary and test using the utf8
180  * test.  Advance to the next character if test is ok, otherwise fail; If not
181  * utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
182  * fails, or advance to the next character */
183 
184 #define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR)                \
185     if (locinput >= PL_regeol) {                                              \
186 	sayNO;                                                                \
187     }                                                                         \
188     if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {                          \
189 	LOAD_UTF8_CHARCLASS(CLASS, STR);                                      \
190 	if (POS_OR_NEG (UTF8_TEST)) {                                         \
191 	    sayNO;                                                            \
192 	}                                                                     \
193 	locinput += PL_utf8skip[nextchr];                                     \
194 	nextchr = UCHARAT(locinput);                                          \
195 	break;                                                                \
196     }                                                                         \
197     if (POS_OR_NEG (FUNC(nextchr))) {                                         \
198 	sayNO;                                                                \
199     }                                                                         \
200     nextchr = UCHARAT(++locinput);                                            \
201     break;
202 
203 /* Handle the non-locale cases for a character class and its complement.  It
204  * calls _CCC_TRY_CODE with a ! to complement the test for the character class.
205  * This is because that code fails when the test succeeds, so we want to have
206  * the test fail so that the code succeeds.  The swash is stored in a
207  * predictable PL_ place */
208 #define _CCC_TRY_NONLOCALE(NAME,  NNAME,  FUNC,                               \
209 	                   CLASS, STR)                                        \
210     case NAME:                                                                \
211 	_CCC_TRY_CODE( !, FUNC,                                               \
212 		          cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS),             \
213 			                    (U8*)locinput, TRUE)),            \
214 		          CLASS, STR)                                         \
215     case NNAME:                                                               \
216 	_CCC_TRY_CODE(  PLACEHOLDER , FUNC,                                   \
217 		          cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS),             \
218 			                    (U8*)locinput, TRUE)),            \
219 		          CLASS, STR)                                         \
220 
221 /* Generate the case statements for both locale and non-locale character
222  * classes in regmatch for classes that don't have special unicode semantics.
223  * Locales don't use an immediate swash, but an intermediary special locale
224  * function that is called on the pointer to the current place in the input
225  * string.  That function will resolve to needing the same swash.  One might
226  * think that because we don't know what the locale will match, we shouldn't
227  * check with the swash loading function that it loaded properly; ie, that we
228  * should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
229  * regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
230  * irrelevant here */
231 #define CCC_TRY(NAME,  NNAME,  FUNC,                                          \
232 	        NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8,                           \
233 	        NAMEA, NNAMEA, FUNCA,                                         \
234 		CLASS, STR)                                                   \
235     case NAMEL:                                                               \
236 	PL_reg_flags |= RF_tainted;                                           \
237 	_CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR)     \
238     case NNAMEL:                                                              \
239 	PL_reg_flags |= RF_tainted;                                           \
240 	_CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput),       \
241 		       CLASS, STR)                                            \
242     case NAMEA:                                                               \
243 	if (locinput >= PL_regeol || ! FUNCA(nextchr)) {                      \
244 	    sayNO;                                                            \
245 	}                                                                     \
246 	/* Matched a utf8-invariant, so don't have to worry about utf8 */     \
247 	nextchr = UCHARAT(++locinput);                                        \
248 	break;                                                                \
249     case NNAMEA:                                                              \
250 	if (locinput >= PL_regeol || FUNCA(nextchr)) {                        \
251 	    sayNO;                                                            \
252 	}                                                                     \
253 	if (utf8_target) {                                                    \
254 	    locinput += PL_utf8skip[nextchr];                                 \
255 	    nextchr = UCHARAT(locinput);                                      \
256 	}                                                                     \
257 	else {                                                                \
258 	    nextchr = UCHARAT(++locinput);                                    \
259 	}                                                                     \
260 	break;                                                                \
261     /* Generate the non-locale cases */                                       \
262     _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
263 
264 /* This is like CCC_TRY, but has an extra set of parameters for generating case
265  * statements to handle separate Unicode semantics nodes */
266 #define CCC_TRY_U(NAME,  NNAME,  FUNC,                                         \
267 		  NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8,                          \
268 	          NAMEU, NNAMEU, FUNCU,                                        \
269 	          NAMEA, NNAMEA, FUNCA,                                        \
270 	          CLASS, STR)                                                  \
271     CCC_TRY(NAME, NNAME, FUNC,                                                 \
272 	    NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8,                                \
273 	    NAMEA, NNAMEA, FUNCA,                                              \
274 	    CLASS, STR)                                                        \
275     _CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)
276 
277 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
278 
279 /* for use after a quantifier and before an EXACT-like node -- japhy */
280 /* it would be nice to rework regcomp.sym to generate this stuff. sigh
281  *
282  * NOTE that *nothing* that affects backtracking should be in here, specifically
283  * VERBS must NOT be included. JUMPABLE is used to determine  if we can ignore a
284  * node that is in between two EXACT like nodes when ascertaining what the required
285  * "follow" character is. This should probably be moved to regex compile time
286  * although it may be done at run time beause of the REF possibility - more
287  * investigation required. -- demerphq
288 */
289 #define JUMPABLE(rn) (      \
290     OP(rn) == OPEN ||       \
291     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
292     OP(rn) == EVAL ||   \
293     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
294     OP(rn) == PLUS || OP(rn) == MINMOD || \
295     OP(rn) == KEEPS || \
296     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
297 )
298 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
299 
300 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
301 
302 #if 0
303 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
304    we don't need this definition. */
305 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
306 #define IS_TEXTF(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_TRICKYFOLD || OP(rn)==EXACTFA || OP(rn)==EXACTF || OP(rn)==REFF  || OP(rn)==NREFF )
307 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
308 
309 #else
310 /* ... so we use this as its faster. */
311 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
312 #define IS_TEXTFU(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_TRICKYFOLD || OP(rn) == EXACTFA)
313 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
314 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
315 
316 #endif
317 
318 /*
319   Search for mandatory following text node; for lookahead, the text must
320   follow but for lookbehind (rn->flags != 0) we skip to the next step.
321 */
322 #define FIND_NEXT_IMPT(rn) STMT_START { \
323     while (JUMPABLE(rn)) { \
324 	const OPCODE type = OP(rn); \
325 	if (type == SUSPEND || PL_regkind[type] == CURLY) \
326 	    rn = NEXTOPER(NEXTOPER(rn)); \
327 	else if (type == PLUS) \
328 	    rn = NEXTOPER(rn); \
329 	else if (type == IFMATCH) \
330 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
331 	else rn += NEXT_OFF(rn); \
332     } \
333 } STMT_END
334 
335 
336 static void restore_pos(pTHX_ void *arg);
337 
338 #define REGCP_PAREN_ELEMS 4
339 #define REGCP_OTHER_ELEMS 5
340 #define REGCP_FRAME_ELEMS 1
341 /* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
342  * are needed for the regexp context stack bookkeeping. */
343 
344 STATIC CHECKPOINT
345 S_regcppush(pTHX_ I32 parenfloor)
346 {
347     dVAR;
348     const int retval = PL_savestack_ix;
349     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
350     const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
351     const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
352     int p;
353     GET_RE_DEBUG_FLAGS_DECL;
354 
355     if (paren_elems_to_push < 0)
356 	Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
357 		   paren_elems_to_push);
358 
359     if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
360 	Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
361 		   " out of range (%lu-%ld)",
362 		   total_elems, (unsigned long)PL_regsize, (long)parenfloor);
363 
364     SSGROW(total_elems + REGCP_FRAME_ELEMS);
365 
366     for (p = PL_regsize; p > parenfloor; p--) {
367 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
368 	SSPUSHINT(PL_regoffs[p].end);
369 	SSPUSHINT(PL_regoffs[p].start);
370 	SSPUSHPTR(PL_reg_start_tmp[p]);
371 	SSPUSHINT(p);
372 	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
373 	  "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
374 		      (UV)p, (IV)PL_regoffs[p].start,
375 		      (IV)(PL_reg_start_tmp[p] - PL_bostr),
376 		      (IV)PL_regoffs[p].end
377 	));
378     }
379 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
380     SSPUSHPTR(PL_regoffs);
381     SSPUSHINT(PL_regsize);
382     SSPUSHINT(*PL_reglastparen);
383     SSPUSHINT(*PL_reglastcloseparen);
384     SSPUSHPTR(PL_reginput);
385     SSPUSHUV(SAVEt_REGCONTEXT | elems_shifted); /* Magic cookie. */
386 
387     return retval;
388 }
389 
390 /* These are needed since we do not localize EVAL nodes: */
391 #define REGCP_SET(cp)                                           \
392     DEBUG_STATE_r(                                              \
393             PerlIO_printf(Perl_debug_log,		        \
394 	        "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
395 	        (IV)PL_savestack_ix));                          \
396     cp = PL_savestack_ix
397 
398 #define REGCP_UNWIND(cp)                                        \
399     DEBUG_STATE_r(                                              \
400         if (cp != PL_savestack_ix) 		                \
401     	    PerlIO_printf(Perl_debug_log,		        \
402 		"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
403 	        (IV)(cp), (IV)PL_savestack_ix));                \
404     regcpblow(cp)
405 
406 STATIC char *
407 S_regcppop(pTHX_ const regexp *rex)
408 {
409     dVAR;
410     UV i;
411     char *input;
412     GET_RE_DEBUG_FLAGS_DECL;
413 
414     PERL_ARGS_ASSERT_REGCPPOP;
415 
416     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
417     i = SSPOPUV;
418     assert((i & SAVE_MASK) == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
419     i >>= SAVE_TIGHT_SHIFT; /* Parentheses elements to pop. */
420     input = (char *) SSPOPPTR;
421     *PL_reglastcloseparen = SSPOPINT;
422     *PL_reglastparen = SSPOPINT;
423     PL_regsize = SSPOPINT;
424     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
425 
426     i -= REGCP_OTHER_ELEMS;
427     /* Now restore the parentheses context. */
428     for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
429 	I32 tmps;
430 	U32 paren = (U32)SSPOPINT;
431 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
432 	PL_regoffs[paren].start = SSPOPINT;
433 	tmps = SSPOPINT;
434 	if (paren <= *PL_reglastparen)
435 	    PL_regoffs[paren].end = tmps;
436 	DEBUG_BUFFERS_r(
437 	    PerlIO_printf(Perl_debug_log,
438 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
439 			  (UV)paren, (IV)PL_regoffs[paren].start,
440 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
441 			  (IV)PL_regoffs[paren].end,
442 			  (paren > *PL_reglastparen ? "(no)" : ""));
443 	);
444     }
445     DEBUG_BUFFERS_r(
446 	if (*PL_reglastparen + 1 <= rex->nparens) {
447 	    PerlIO_printf(Perl_debug_log,
448 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
449 			  (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
450 	}
451     );
452 #if 1
453     /* It would seem that the similar code in regtry()
454      * already takes care of this, and in fact it is in
455      * a better location to since this code can #if 0-ed out
456      * but the code in regtry() is needed or otherwise tests
457      * requiring null fields (pat.t#187 and split.t#{13,14}
458      * (as of patchlevel 7877)  will fail.  Then again,
459      * this code seems to be necessary or otherwise
460      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
461      * --jhi updated by dapm */
462     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
463 	if (i > PL_regsize)
464 	    PL_regoffs[i].start = -1;
465 	PL_regoffs[i].end = -1;
466     }
467 #endif
468     return input;
469 }
470 
471 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
472 
473 /*
474  * pregexec and friends
475  */
476 
477 #ifndef PERL_IN_XSUB_RE
478 /*
479  - pregexec - match a regexp against a string
480  */
481 I32
482 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
483 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
484 /* strend: pointer to null at end of string */
485 /* strbeg: real beginning of string */
486 /* minend: end of match must be >=minend after stringarg. */
487 /* nosave: For optimizations. */
488 {
489     PERL_ARGS_ASSERT_PREGEXEC;
490 
491     return
492 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
493 		      nosave ? 0 : REXEC_COPY_STR);
494 }
495 #endif
496 
497 /*
498  * Need to implement the following flags for reg_anch:
499  *
500  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
501  * USE_INTUIT_ML
502  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
503  * INTUIT_AUTORITATIVE_ML
504  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
505  * INTUIT_ONCE_ML
506  *
507  * Another flag for this function: SECOND_TIME (so that float substrs
508  * with giant delta may be not rechecked).
509  */
510 
511 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
512 
513 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
514    Otherwise, only SvCUR(sv) is used to get strbeg. */
515 
516 /* XXXX We assume that strpos is strbeg unless sv. */
517 
518 /* XXXX Some places assume that there is a fixed substring.
519 	An update may be needed if optimizer marks as "INTUITable"
520 	RExen without fixed substrings.  Similarly, it is assumed that
521 	lengths of all the strings are no more than minlen, thus they
522 	cannot come from lookahead.
523 	(Or minlen should take into account lookahead.)
524   NOTE: Some of this comment is not correct. minlen does now take account
525   of lookahead/behind. Further research is required. -- demerphq
526 
527 */
528 
529 /* A failure to find a constant substring means that there is no need to make
530    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
531    finding a substring too deep into the string means that less calls to
532    regtry() should be needed.
533 
534    REx compiler's optimizer found 4 possible hints:
535 	a) Anchored substring;
536 	b) Fixed substring;
537 	c) Whether we are anchored (beginning-of-line or \G);
538 	d) First node (of those at offset 0) which may distinguish positions;
539    We use a)b)d) and multiline-part of c), and try to find a position in the
540    string which does not contradict any of them.
541  */
542 
543 /* Most of decisions we do here should have been done at compile time.
544    The nodes of the REx which we used for the search should have been
545    deleted from the finite automaton. */
546 
547 char *
548 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
549 		     char *strend, const U32 flags, re_scream_pos_data *data)
550 {
551     dVAR;
552     struct regexp *const prog = (struct regexp *)SvANY(rx);
553     register I32 start_shift = 0;
554     /* Should be nonnegative! */
555     register I32 end_shift   = 0;
556     register char *s;
557     register SV *check;
558     char *strbeg;
559     char *t;
560     const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
561     I32 ml_anch;
562     register char *other_last = NULL;	/* other substr checked before this */
563     char *check_at = NULL;		/* check substr found at this pos */
564     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
565     RXi_GET_DECL(prog,progi);
566 #ifdef DEBUGGING
567     const char * const i_strpos = strpos;
568 #endif
569     GET_RE_DEBUG_FLAGS_DECL;
570 
571     PERL_ARGS_ASSERT_RE_INTUIT_START;
572 
573     RX_MATCH_UTF8_set(rx,utf8_target);
574 
575     if (RX_UTF8(rx)) {
576 	PL_reg_flags |= RF_utf8;
577     }
578     DEBUG_EXECUTE_r(
579         debug_start_match(rx, utf8_target, strpos, strend,
580             sv ? "Guessing start of match in sv for"
581                : "Guessing start of match in string for");
582 	      );
583 
584     /* CHR_DIST() would be more correct here but it makes things slow. */
585     if (prog->minlen > strend - strpos) {
586 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
587 			      "String too short... [re_intuit_start]\n"));
588 	goto fail;
589     }
590 
591     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
592     PL_regeol = strend;
593     if (utf8_target) {
594 	if (!prog->check_utf8 && prog->check_substr)
595 	    to_utf8_substr(prog);
596 	check = prog->check_utf8;
597     } else {
598 	if (!prog->check_substr && prog->check_utf8)
599 	    to_byte_substr(prog);
600 	check = prog->check_substr;
601     }
602     if (check == &PL_sv_undef) {
603 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
604 		"Non-utf8 string cannot match utf8 check string\n"));
605 	goto fail;
606     }
607     if (prog->extflags & RXf_ANCH) {	/* Match at beg-of-str or after \n */
608 	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
609 		     || ( (prog->extflags & RXf_ANCH_BOL)
610 			  && !multiline ) );	/* Check after \n? */
611 
612 	if (!ml_anch) {
613 	  if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
614 		&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
615 	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
616 	       && sv && !SvROK(sv)
617 	       && (strpos != strbeg)) {
618 	      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
619 	      goto fail;
620 	  }
621 	  if (prog->check_offset_min == prog->check_offset_max &&
622 	      !(prog->extflags & RXf_CANY_SEEN)) {
623 	    /* Substring at constant offset from beg-of-str... */
624 	    I32 slen;
625 
626 	    s = HOP3c(strpos, prog->check_offset_min, strend);
627 
628 	    if (SvTAIL(check)) {
629 		slen = SvCUR(check);	/* >= 1 */
630 
631 		if ( strend - s > slen || strend - s < slen - 1
632 		     || (strend - s == slen && strend[-1] != '\n')) {
633 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
634 		    goto fail_finish;
635 		}
636 		/* Now should match s[0..slen-2] */
637 		slen--;
638 		if (slen && (*SvPVX_const(check) != *s
639 			     || (slen > 1
640 				 && memNE(SvPVX_const(check), s, slen)))) {
641 		  report_neq:
642 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
643 		    goto fail_finish;
644 		}
645 	    }
646 	    else if (*SvPVX_const(check) != *s
647 		     || ((slen = SvCUR(check)) > 1
648 			 && memNE(SvPVX_const(check), s, slen)))
649 		goto report_neq;
650 	    check_at = s;
651 	    goto success_at_start;
652 	  }
653 	}
654 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
655 	s = strpos;
656 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
657 	end_shift = prog->check_end_shift;
658 
659 	if (!ml_anch) {
660 	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
661 					 - (SvTAIL(check) != 0);
662 	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
663 
664 	    if (end_shift < eshift)
665 		end_shift = eshift;
666 	}
667     }
668     else {				/* Can match at random position */
669 	ml_anch = 0;
670 	s = strpos;
671 	start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
672 	end_shift = prog->check_end_shift;
673 
674 	/* end shift should be non negative here */
675     }
676 
677 #ifdef QDEBUGGING	/* 7/99: reports of failure (with the older version) */
678     if (end_shift < 0)
679 	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
680 		   (IV)end_shift, RX_PRECOMP(prog));
681 #endif
682 
683   restart:
684     /* Find a possible match in the region s..strend by looking for
685        the "check" substring in the region corrected by start/end_shift. */
686 
687     {
688         I32 srch_start_shift = start_shift;
689         I32 srch_end_shift = end_shift;
690         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
691 	    srch_end_shift -= ((strbeg - s) - srch_start_shift);
692 	    srch_start_shift = strbeg - s;
693 	}
694     DEBUG_OPTIMISE_MORE_r({
695         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
696             (IV)prog->check_offset_min,
697             (IV)srch_start_shift,
698             (IV)srch_end_shift,
699             (IV)prog->check_end_shift);
700     });
701 
702     if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
703 	I32 p = -1;			/* Internal iterator of scream. */
704 	I32 * const pp = data ? data->scream_pos : &p;
705 	const MAGIC *mg;
706 	bool found = FALSE;
707 
708 	assert(SvMAGICAL(sv));
709 	mg = mg_find(sv, PERL_MAGIC_study);
710 	assert(mg);
711 
712 	if (mg->mg_private == 1) {
713 	    found = ((U8 *)mg->mg_ptr)[BmRARE(check)] != (U8)~0;
714 	} else if (mg->mg_private == 2) {
715 	    found = ((U16 *)mg->mg_ptr)[BmRARE(check)] != (U16)~0;
716 	} else {
717 	    assert (mg->mg_private == 4);
718 	    found = ((U32 *)mg->mg_ptr)[BmRARE(check)] != (U32)~0;
719 	}
720 
721 	if (found
722 	    || ( BmRARE(check) == '\n'
723 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
724 		 && SvTAIL(check) ))
725 	    s = screaminstr(sv, check,
726 			    srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
727 	else
728 	    goto fail_finish;
729 	/* we may be pointing at the wrong string */
730 	if (s && RXp_MATCH_COPIED(prog))
731 	    s = strbeg + (s - SvPVX_const(sv));
732 	if (data)
733 	    *data->scream_olds = s;
734     }
735     else {
736         U8* start_point;
737         U8* end_point;
738         if (prog->extflags & RXf_CANY_SEEN) {
739             start_point= (U8*)(s + srch_start_shift);
740             end_point= (U8*)(strend - srch_end_shift);
741         } else {
742 	    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
743             end_point= HOP3(strend, -srch_end_shift, strbeg);
744 	}
745 	DEBUG_OPTIMISE_MORE_r({
746             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
747                 (int)(end_point - start_point),
748                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
749                 start_point);
750         });
751 
752 	s = fbm_instr( start_point, end_point,
753 		      check, multiline ? FBMrf_MULTILINE : 0);
754     }
755     }
756     /* Update the count-of-usability, remove useless subpatterns,
757 	unshift s.  */
758 
759     DEBUG_EXECUTE_r({
760         RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
761             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
762         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
763 			  (s ? "Found" : "Did not find"),
764 	    (check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
765 	        ? "anchored" : "floating"),
766 	    quoted,
767 	    RE_SV_TAIL(check),
768 	    (s ? " at offset " : "...\n") );
769     });
770 
771     if (!s)
772 	goto fail_finish;
773     /* Finish the diagnostic message */
774     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
775 
776     /* XXX dmq: first branch is for positive lookbehind...
777        Our check string is offset from the beginning of the pattern.
778        So we need to do any stclass tests offset forward from that
779        point. I think. :-(
780      */
781 
782 
783 
784     check_at=s;
785 
786 
787     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
788        Start with the other substr.
789        XXXX no SCREAM optimization yet - and a very coarse implementation
790        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
791 		*always* match.  Probably should be marked during compile...
792        Probably it is right to do no SCREAM here...
793      */
794 
795     if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
796                 : (prog->float_substr && prog->anchored_substr))
797     {
798 	/* Take into account the "other" substring. */
799 	/* XXXX May be hopelessly wrong for UTF... */
800 	if (!other_last)
801 	    other_last = strpos;
802 	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
803 	  do_other_anchored:
804 	    {
805 		char * const last = HOP3c(s, -start_shift, strbeg);
806 		char *last1, *last2;
807 		char * const saved_s = s;
808 		SV* must;
809 
810 		t = s - prog->check_offset_max;
811 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
812 		    && (!utf8_target
813 			|| ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
814 			    && t > strpos)))
815 		    NOOP;
816 		else
817 		    t = strpos;
818 		t = HOP3c(t, prog->anchored_offset, strend);
819 		if (t < other_last)	/* These positions already checked */
820 		    t = other_last;
821 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
822 		if (last < last1)
823 		    last1 = last;
824                 /* XXXX It is not documented what units *_offsets are in.
825                    We assume bytes, but this is clearly wrong.
826                    Meaning this code needs to be carefully reviewed for errors.
827                    dmq.
828                   */
829 
830 		/* On end-of-str: see comment below. */
831 		must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
832 		if (must == &PL_sv_undef) {
833 		    s = (char*)NULL;
834 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
835 		}
836 		else
837 		    s = fbm_instr(
838 			(unsigned char*)t,
839 			HOP3(HOP3(last1, prog->anchored_offset, strend)
840 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
841 			must,
842 			multiline ? FBMrf_MULTILINE : 0
843 		    );
844                 DEBUG_EXECUTE_r({
845                     RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
846                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
847                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
848 			(s ? "Found" : "Contradicts"),
849                         quoted, RE_SV_TAIL(must));
850                 });
851 
852 
853 		if (!s) {
854 		    if (last1 >= last2) {
855 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
856 						", giving up...\n"));
857 			goto fail_finish;
858 		    }
859 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
860 			", trying floating at offset %ld...\n",
861 			(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
862 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
863 		    s = HOP3c(last, 1, strend);
864 		    goto restart;
865 		}
866 		else {
867 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
868 			  (long)(s - i_strpos)));
869 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
870 		    other_last = HOP3c(s, 1, strend);
871 		    s = saved_s;
872 		    if (t == strpos)
873 			goto try_at_start;
874 		    goto try_at_offset;
875 		}
876 	    }
877 	}
878 	else {		/* Take into account the floating substring. */
879 	    char *last, *last1;
880 	    char * const saved_s = s;
881 	    SV* must;
882 
883 	    t = HOP3c(s, -start_shift, strbeg);
884 	    last1 = last =
885 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
886 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
887 		last = HOP3c(t, prog->float_max_offset, strend);
888 	    s = HOP3c(t, prog->float_min_offset, strend);
889 	    if (s < other_last)
890 		s = other_last;
891  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
892 	    must = utf8_target ? prog->float_utf8 : prog->float_substr;
893 	    /* fbm_instr() takes into account exact value of end-of-str
894 	       if the check is SvTAIL(ed).  Since false positives are OK,
895 	       and end-of-str is not later than strend we are OK. */
896 	    if (must == &PL_sv_undef) {
897 		s = (char*)NULL;
898 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
899 	    }
900 	    else
901 		s = fbm_instr((unsigned char*)s,
902 			      (unsigned char*)last + SvCUR(must)
903 				  - (SvTAIL(must)!=0),
904 			      must, multiline ? FBMrf_MULTILINE : 0);
905 	    DEBUG_EXECUTE_r({
906 	        RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
907 	            SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
908 	        PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
909 		    (s ? "Found" : "Contradicts"),
910 		    quoted, RE_SV_TAIL(must));
911             });
912 	    if (!s) {
913 		if (last1 == last) {
914 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
915 					    ", giving up...\n"));
916 		    goto fail_finish;
917 		}
918 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
919 		    ", trying anchored starting at offset %ld...\n",
920 		    (long)(saved_s + 1 - i_strpos)));
921 		other_last = last;
922 		s = HOP3c(t, 1, strend);
923 		goto restart;
924 	    }
925 	    else {
926 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
927 		      (long)(s - i_strpos)));
928 		other_last = s; /* Fix this later. --Hugo */
929 		s = saved_s;
930 		if (t == strpos)
931 		    goto try_at_start;
932 		goto try_at_offset;
933 	    }
934 	}
935     }
936 
937 
938     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
939 
940     DEBUG_OPTIMISE_MORE_r(
941         PerlIO_printf(Perl_debug_log,
942             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
943             (IV)prog->check_offset_min,
944             (IV)prog->check_offset_max,
945             (IV)(s-strpos),
946             (IV)(t-strpos),
947             (IV)(t-s),
948             (IV)(strend-strpos)
949         )
950     );
951 
952     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
953         && (!utf8_target
954 	    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
955 		 && t > strpos)))
956     {
957 	/* Fixed substring is found far enough so that the match
958 	   cannot start at strpos. */
959       try_at_offset:
960 	if (ml_anch && t[-1] != '\n') {
961 	    /* Eventually fbm_*() should handle this, but often
962 	       anchored_offset is not 0, so this check will not be wasted. */
963 	    /* XXXX In the code below we prefer to look for "^" even in
964 	       presence of anchored substrings.  And we search even
965 	       beyond the found float position.  These pessimizations
966 	       are historical artefacts only.  */
967 	  find_anchor:
968 	    while (t < strend - prog->minlen) {
969 		if (*t == '\n') {
970 		    if (t < check_at - prog->check_offset_min) {
971 			if (utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
972 			    /* Since we moved from the found position,
973 			       we definitely contradict the found anchored
974 			       substr.  Due to the above check we do not
975 			       contradict "check" substr.
976 			       Thus we can arrive here only if check substr
977 			       is float.  Redo checking for "other"=="fixed".
978 			     */
979 			    strpos = t + 1;
980 			    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
981 				PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
982 			    goto do_other_anchored;
983 			}
984 			/* We don't contradict the found floating substring. */
985 			/* XXXX Why not check for STCLASS? */
986 			s = t + 1;
987 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
988 			    PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
989 			goto set_useful;
990 		    }
991 		    /* Position contradicts check-string */
992 		    /* XXXX probably better to look for check-string
993 		       than for "\n", so one should lower the limit for t? */
994 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
995 			PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
996 		    other_last = strpos = s = t + 1;
997 		    goto restart;
998 		}
999 		t++;
1000 	    }
1001 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
1002 			PL_colors[0], PL_colors[1]));
1003 	    goto fail_finish;
1004 	}
1005 	else {
1006 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
1007 			PL_colors[0], PL_colors[1]));
1008 	}
1009 	s = t;
1010       set_useful:
1011 	++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
1012     }
1013     else {
1014 	/* The found string does not prohibit matching at strpos,
1015 	   - no optimization of calling REx engine can be performed,
1016 	   unless it was an MBOL and we are not after MBOL,
1017 	   or a future STCLASS check will fail this. */
1018       try_at_start:
1019 	/* Even in this situation we may use MBOL flag if strpos is offset
1020 	   wrt the start of the string. */
1021 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
1022 	    && (strpos != strbeg) && strpos[-1] != '\n'
1023 	    /* May be due to an implicit anchor of m{.*foo}  */
1024 	    && !(prog->intflags & PREGf_IMPLICIT))
1025 	{
1026 	    t = strpos;
1027 	    goto find_anchor;
1028 	}
1029 	DEBUG_EXECUTE_r( if (ml_anch)
1030 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
1031 			  (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
1032 	);
1033       success_at_start:
1034 	if (!(prog->intflags & PREGf_NAUGHTY)	/* XXXX If strpos moved? */
1035 	    && (utf8_target ? (
1036 		prog->check_utf8		/* Could be deleted already */
1037 		&& --BmUSEFUL(prog->check_utf8) < 0
1038 		&& (prog->check_utf8 == prog->float_utf8)
1039 	    ) : (
1040 		prog->check_substr		/* Could be deleted already */
1041 		&& --BmUSEFUL(prog->check_substr) < 0
1042 		&& (prog->check_substr == prog->float_substr)
1043 	    )))
1044 	{
1045 	    /* If flags & SOMETHING - do not do it many times on the same match */
1046 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
1047 	    /* XXX Does the destruction order has to change with utf8_target? */
1048 	    SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
1049 	    SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
1050 	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
1051 	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
1052 	    check = NULL;			/* abort */
1053 	    s = strpos;
1054 	    /* XXXX If the check string was an implicit check MBOL, then we need to unset the relevant flag
1055 		    see http://bugs.activestate.com/show_bug.cgi?id=87173 */
1056 	    if (prog->intflags & PREGf_IMPLICIT)
1057 		prog->extflags &= ~RXf_ANCH_MBOL;
1058 	    /* XXXX This is a remnant of the old implementation.  It
1059 	            looks wasteful, since now INTUIT can use many
1060 	            other heuristics. */
1061 	    prog->extflags &= ~RXf_USE_INTUIT;
1062 	    /* XXXX What other flags might need to be cleared in this branch? */
1063 	}
1064 	else
1065 	    s = strpos;
1066     }
1067 
1068     /* Last resort... */
1069     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1070     /* trie stclasses are too expensive to use here, we are better off to
1071        leave it to regmatch itself */
1072     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
1073 	/* minlen == 0 is possible if regstclass is \b or \B,
1074 	   and the fixed substr is ''$.
1075 	   Since minlen is already taken into account, s+1 is before strend;
1076 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
1077 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
1078 	   regstclass does not come from lookahead...  */
1079 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
1080 	   This leaves EXACTF-ish only, which are dealt with in find_byclass().  */
1081         const U8* const str = (U8*)STRING(progi->regstclass);
1082         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
1083 		    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
1084 		    : 1);
1085 	char * endpos;
1086 	if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1087             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1088         else if (prog->float_substr || prog->float_utf8)
1089 	    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1090         else
1091             endpos= strend;
1092 
1093         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1094 				      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1095 
1096 	t = s;
1097         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1098 	if (!s) {
1099 #ifdef DEBUGGING
1100 	    const char *what = NULL;
1101 #endif
1102 	    if (endpos == strend) {
1103 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1104 				"Could not match STCLASS...\n") );
1105 		goto fail;
1106 	    }
1107 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1108 				   "This position contradicts STCLASS...\n") );
1109 	    if ((prog->extflags & RXf_ANCH) && !ml_anch)
1110 		goto fail;
1111 	    /* Contradict one of substrings */
1112 	    if (prog->anchored_substr || prog->anchored_utf8) {
1113 		if ((utf8_target ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1114 		    DEBUG_EXECUTE_r( what = "anchored" );
1115 		  hop_and_restart:
1116 		    s = HOP3c(t, 1, strend);
1117 		    if (s + start_shift + end_shift > strend) {
1118 			/* XXXX Should be taken into account earlier? */
1119 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1120 					       "Could not match STCLASS...\n") );
1121 			goto fail;
1122 		    }
1123 		    if (!check)
1124 			goto giveup;
1125 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1126 				"Looking for %s substr starting at offset %ld...\n",
1127 				 what, (long)(s + start_shift - i_strpos)) );
1128 		    goto restart;
1129 		}
1130 		/* Have both, check_string is floating */
1131 		if (t + start_shift >= check_at) /* Contradicts floating=check */
1132 		    goto retry_floating_check;
1133 		/* Recheck anchored substring, but not floating... */
1134 		s = check_at;
1135 		if (!check)
1136 		    goto giveup;
1137 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1138 			  "Looking for anchored substr starting at offset %ld...\n",
1139 			  (long)(other_last - i_strpos)) );
1140 		goto do_other_anchored;
1141 	    }
1142 	    /* Another way we could have checked stclass at the
1143                current position only: */
1144 	    if (ml_anch) {
1145 		s = t = t + 1;
1146 		if (!check)
1147 		    goto giveup;
1148 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1149 			  "Looking for /%s^%s/m starting at offset %ld...\n",
1150 			  PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1151 		goto try_at_offset;
1152 	    }
1153 	    if (!(utf8_target ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
1154 		goto fail;
1155 	    /* Check is floating substring. */
1156 	  retry_floating_check:
1157 	    t = check_at - start_shift;
1158 	    DEBUG_EXECUTE_r( what = "floating" );
1159 	    goto hop_and_restart;
1160 	}
1161 	if (t != s) {
1162             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1163 			"By STCLASS: moving %ld --> %ld\n",
1164                                   (long)(t - i_strpos), (long)(s - i_strpos))
1165                    );
1166         }
1167         else {
1168             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1169                                   "Does not contradict STCLASS...\n");
1170                    );
1171         }
1172     }
1173   giveup:
1174     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1175 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
1176 			  PL_colors[5], (long)(s - i_strpos)) );
1177     return s;
1178 
1179   fail_finish:				/* Substring not found */
1180     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
1181 	BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1182   fail:
1183     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1184 			  PL_colors[4], PL_colors[5]));
1185     return NULL;
1186 }
1187 
1188 #define DECL_TRIE_TYPE(scan) \
1189     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1190                     trie_type = ((scan->flags == EXACT) \
1191                               ? (utf8_target ? trie_utf8 : trie_plain) \
1192                               : (utf8_target ? trie_utf8_fold : trie_latin_utf8_fold))
1193 
1194 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,          \
1195 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                               \
1196     STRLEN skiplen;                                                                 \
1197     switch (trie_type) {                                                            \
1198     case trie_utf8_fold:                                                            \
1199         if ( foldlen>0 ) {                                                          \
1200             uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
1201             foldlen -= len;                                                         \
1202             uscan += len;                                                           \
1203             len=0;                                                                  \
1204         } else {                                                                    \
1205             uvc = to_utf8_fold( (const U8*) uc, foldbuf, &foldlen );                \
1206             len = UTF8SKIP(uc);                                                     \
1207             skiplen = UNISKIP( uvc );                                               \
1208             foldlen -= skiplen;                                                     \
1209             uscan = foldbuf + skiplen;                                              \
1210         }                                                                           \
1211         break;                                                                      \
1212     case trie_latin_utf8_fold:                                                      \
1213         if ( foldlen>0 ) {                                                          \
1214             uvc = utf8n_to_uvuni( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
1215             foldlen -= len;                                                         \
1216             uscan += len;                                                           \
1217             len=0;                                                                  \
1218         } else {                                                                    \
1219             len = 1;                                                                \
1220             uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, 1);                 \
1221             skiplen = UNISKIP( uvc );                                               \
1222             foldlen -= skiplen;                                                     \
1223             uscan = foldbuf + skiplen;                                              \
1224         }                                                                           \
1225         break;                                                                      \
1226     case trie_utf8:                                                                 \
1227         uvc = utf8n_to_uvuni( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
1228         break;                                                                      \
1229     case trie_plain:                                                                \
1230         uvc = (UV)*uc;                                                              \
1231         len = 1;                                                                    \
1232     }                                                                               \
1233     if (uvc < 256) {                                                                \
1234         charid = trie->charmap[ uvc ];                                              \
1235     }                                                                               \
1236     else {                                                                          \
1237         charid = 0;                                                                 \
1238         if (widecharmap) {                                                          \
1239             SV** const svpp = hv_fetch(widecharmap,                                 \
1240                         (char*)&uvc, sizeof(UV), 0);                                \
1241             if (svpp)                                                               \
1242                 charid = (U16)SvIV(*svpp);                                          \
1243         }                                                                           \
1244     }                                                                               \
1245 } STMT_END
1246 
1247 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1248 STMT_START {                                              \
1249     while (s <= e) {                                      \
1250 	if ( (CoNd)                                       \
1251 	     && (ln == 1 || folder(s, pat_string, ln))    \
1252 	     && (!reginfo || regtry(reginfo, &s)) )       \
1253 	    goto got_it;                                  \
1254 	s++;                                              \
1255     }                                                     \
1256 } STMT_END
1257 
1258 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1259 STMT_START {                                          \
1260     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1261 	CoDe                                          \
1262 	s += uskip;                                   \
1263     }                                                 \
1264 } STMT_END
1265 
1266 #define REXEC_FBC_SCAN(CoDe)                          \
1267 STMT_START {                                          \
1268     while (s < strend) {                              \
1269 	CoDe                                          \
1270 	s++;                                          \
1271     }                                                 \
1272 } STMT_END
1273 
1274 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1275 REXEC_FBC_UTF8_SCAN(                                  \
1276     if (CoNd) {                                       \
1277 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1278 	    goto got_it;                              \
1279 	else                                          \
1280 	    tmp = doevery;                            \
1281     }                                                 \
1282     else                                              \
1283 	tmp = 1;                                      \
1284 )
1285 
1286 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1287 REXEC_FBC_SCAN(                                       \
1288     if (CoNd) {                                       \
1289 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1290 	    goto got_it;                              \
1291 	else                                          \
1292 	    tmp = doevery;                            \
1293     }                                                 \
1294     else                                              \
1295 	tmp = 1;                                      \
1296 )
1297 
1298 #define REXEC_FBC_TRYIT               \
1299 if ((!reginfo || regtry(reginfo, &s))) \
1300     goto got_it
1301 
1302 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1303     if (utf8_target) {                                             \
1304 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1305     }                                                          \
1306     else {                                                     \
1307 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1308     }
1309 
1310 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1311     if (utf8_target) {                                             \
1312 	UtFpReLoAd;                                            \
1313 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1314     }                                                          \
1315     else {                                                     \
1316 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1317     }
1318 
1319 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1320     PL_reg_flags |= RF_tainted;                                \
1321     if (utf8_target) {                                             \
1322 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1323     }                                                          \
1324     else {                                                     \
1325 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1326     }
1327 
1328 #define DUMP_EXEC_POS(li,s,doutf8) \
1329     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1330 
1331 
1332 #define UTF8_NOLOAD(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
1333 	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';                         \
1334 	tmp = TEST_NON_UTF8(tmp);                                              \
1335 	REXEC_FBC_UTF8_SCAN(                                                   \
1336 	    if (tmp == ! TEST_NON_UTF8((U8) *s)) { \
1337 		tmp = !tmp;                                                    \
1338 		IF_SUCCESS;                                                    \
1339 	    }                                                                  \
1340 	    else {                                                             \
1341 		IF_FAIL;                                                       \
1342 	    }                                                                  \
1343 	);                                                                     \
1344 
1345 #define UTF8_LOAD(TeSt1_UtF8, TeSt2_UtF8, IF_SUCCESS, IF_FAIL) \
1346 	if (s == PL_bostr) {                                                   \
1347 	    tmp = '\n';                                                        \
1348 	}                                                                      \
1349 	else {                                                                 \
1350 	    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);                 \
1351 	    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);       \
1352 	}                                                                      \
1353 	tmp = TeSt1_UtF8;                                                      \
1354 	LOAD_UTF8_CHARCLASS_ALNUM();                                                                \
1355 	REXEC_FBC_UTF8_SCAN(                                                   \
1356 	    if (tmp == ! (TeSt2_UtF8)) { \
1357 		tmp = !tmp;                                                    \
1358 		IF_SUCCESS;                                                    \
1359 	    }                                                                  \
1360 	    else {                                                             \
1361 		IF_FAIL;                                                       \
1362 	    }                                                                  \
1363 	);                                                                     \
1364 
1365 /* The only difference between the BOUND and NBOUND cases is that
1366  * REXEC_FBC_TRYIT is called when matched in BOUND, and when non-matched in
1367  * NBOUND.  This is accomplished by passing it in either the if or else clause,
1368  * with the other one being empty */
1369 #define FBC_BOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
1370     FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
1371 
1372 #define FBC_BOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
1373     FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
1374 
1375 #define FBC_NBOUND(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
1376     FBC_BOUND_COMMON(UTF8_LOAD(TEST1_UTF8, TEST2_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
1377 
1378 #define FBC_NBOUND_NOLOAD(TEST_NON_UTF8, TEST1_UTF8, TEST2_UTF8) \
1379     FBC_BOUND_COMMON(UTF8_NOLOAD(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
1380 
1381 
1382 /* Common to the BOUND and NBOUND cases.  Unfortunately the UTF8 tests need to
1383  * be passed in completely with the variable name being tested, which isn't
1384  * such a clean interface, but this is easier to read than it was before.  We
1385  * are looking for the boundary (or non-boundary between a word and non-word
1386  * character.  The utf8 and non-utf8 cases have the same logic, but the details
1387  * must be different.  Find the "wordness" of the character just prior to this
1388  * one, and compare it with the wordness of this one.  If they differ, we have
1389  * a boundary.  At the beginning of the string, pretend that the previous
1390  * character was a new-line */
1391 #define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
1392     if (utf8_target) {                                                         \
1393 		UTF8_CODE \
1394     }                                                                          \
1395     else {  /* Not utf8 */                                                     \
1396 	tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';                         \
1397 	tmp = TEST_NON_UTF8(tmp);                                              \
1398 	REXEC_FBC_SCAN(                                                        \
1399 	    if (tmp == ! TEST_NON_UTF8((U8) *s)) {                             \
1400 		tmp = !tmp;                                                    \
1401 		IF_SUCCESS;                                                    \
1402 	    }                                                                  \
1403 	    else {                                                             \
1404 		IF_FAIL;                                                       \
1405 	    }                                                                  \
1406 	);                                                                     \
1407     }                                                                          \
1408     if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))           \
1409 	goto got_it;
1410 
1411 /* We know what class REx starts with.  Try to find this position... */
1412 /* if reginfo is NULL, its a dryrun */
1413 /* annoyingly all the vars in this routine have different names from their counterparts
1414    in regmatch. /grrr */
1415 
1416 STATIC char *
1417 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1418     const char *strend, regmatch_info *reginfo)
1419 {
1420 	dVAR;
1421 	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1422 	char *pat_string;   /* The pattern's exactish string */
1423 	char *pat_end;	    /* ptr to end char of pat_string */
1424 	re_fold_t folder;	/* Function for computing non-utf8 folds */
1425 	const U8 *fold_array;   /* array for folding ords < 256 */
1426 	STRLEN ln;
1427 	STRLEN lnc;
1428 	register STRLEN uskip;
1429 	U8 c1;
1430 	U8 c2;
1431 	char *e;
1432 	register I32 tmp = 1;	/* Scratch variable? */
1433 	register const bool utf8_target = PL_reg_match_utf8;
1434 	UV utf8_fold_flags = 0;
1435         RXi_GET_DECL(prog,progi);
1436 
1437 	PERL_ARGS_ASSERT_FIND_BYCLASS;
1438 
1439 	/* We know what class it must start with. */
1440 	switch (OP(c)) {
1441 	case ANYOFV:
1442 	case ANYOF:
1443 	    if (utf8_target || OP(c) == ANYOFV) {
1444 		STRLEN inclasslen = strend - s;
1445 		REXEC_FBC_UTF8_CLASS_SCAN(
1446                           reginclass(prog, c, (U8*)s, &inclasslen, utf8_target));
1447 	    }
1448 	    else {
1449 		REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
1450 	    }
1451 	    break;
1452 	case CANY:
1453 	    REXEC_FBC_SCAN(
1454 	        if (tmp && (!reginfo || regtry(reginfo, &s)))
1455 		    goto got_it;
1456 		else
1457 		    tmp = doevery;
1458 	    );
1459 	    break;
1460 
1461 	case EXACTFA:
1462 	    if (UTF_PATTERN || utf8_target) {
1463 		utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
1464 		goto do_exactf_utf8;
1465 	    }
1466 	    fold_array = PL_fold_latin1;    /* Latin1 folds are not affected by */
1467 	    folder = foldEQ_latin1;	    /* /a, except the sharp s one which */
1468 	    goto do_exactf_non_utf8;	    /* isn't dealt with by these */
1469 
1470 	case EXACTF:
1471 	    if (utf8_target) {
1472 
1473 		/* regcomp.c already folded this if pattern is in UTF-8 */
1474 		utf8_fold_flags = 0;
1475 		goto do_exactf_utf8;
1476 	    }
1477 	    fold_array = PL_fold;
1478 	    folder = foldEQ;
1479 	    goto do_exactf_non_utf8;
1480 
1481 	case EXACTFL:
1482 	    if (UTF_PATTERN || utf8_target) {
1483 		utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
1484 		goto do_exactf_utf8;
1485 	    }
1486 	    fold_array = PL_fold_locale;
1487 	    folder = foldEQ_locale;
1488 	    goto do_exactf_non_utf8;
1489 
1490 	case EXACTFU_SS:
1491 	    if (UTF_PATTERN) {
1492 		utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
1493 	    }
1494 	    goto do_exactf_utf8;
1495 
1496 	case EXACTFU_TRICKYFOLD:
1497 	case EXACTFU:
1498 	    if (UTF_PATTERN || utf8_target) {
1499 		utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
1500 		goto do_exactf_utf8;
1501 	    }
1502 
1503 	    /* Any 'ss' in the pattern should have been replaced by regcomp,
1504 	     * so we don't have to worry here about this single special case
1505 	     * in the Latin1 range */
1506 	    fold_array = PL_fold_latin1;
1507 	    folder = foldEQ_latin1;
1508 
1509 	    /* FALL THROUGH */
1510 
1511 	do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
1512 			       are no glitches with fold-length differences
1513 			       between the target string and pattern */
1514 
1515 	    /* The idea in the non-utf8 EXACTF* cases is to first find the
1516 	     * first character of the EXACTF* node and then, if necessary,
1517 	     * case-insensitively compare the full text of the node.  c1 is the
1518 	     * first character.  c2 is its fold.  This logic will not work for
1519 	     * Unicode semantics and the german sharp ss, which hence should
1520 	     * not be compiled into a node that gets here. */
1521 	    pat_string = STRING(c);
1522 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1523 
1524 	    /* We know that we have to match at least 'ln' bytes (which is the
1525 	     * same as characters, since not utf8).  If we have to match 3
1526 	     * characters, and there are only 2 availabe, we know without
1527 	     * trying that it will fail; so don't start a match past the
1528 	     * required minimum number from the far end */
1529 	    e = HOP3c(strend, -((I32)ln), s);
1530 
1531 	    if (!reginfo && e < s) {
1532 		e = s;			/* Due to minlen logic of intuit() */
1533 	    }
1534 
1535 	    c1 = *pat_string;
1536 	    c2 = fold_array[c1];
1537 	    if (c1 == c2) { /* If char and fold are the same */
1538 		REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1539 	    }
1540 	    else {
1541 		REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1542 	    }
1543 	    break;
1544 
1545 	do_exactf_utf8:
1546 	{
1547 	    unsigned expansion;
1548 
1549 
1550 	    /* If one of the operands is in utf8, we can't use the simpler
1551 	     * folding above, due to the fact that many different characters
1552 	     * can have the same fold, or portion of a fold, or different-
1553 	     * length fold */
1554 	    pat_string = STRING(c);
1555 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1556 	    pat_end = pat_string + ln;
1557 	    lnc = (UTF_PATTERN) /* length to match in characters */
1558 		    ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
1559 		    : ln;
1560 
1561 	    /* We have 'lnc' characters to match in the pattern, but because of
1562 	     * multi-character folding, each character in the target can match
1563 	     * up to 3 characters (Unicode guarantees it will never exceed
1564 	     * this) if it is utf8-encoded; and up to 2 if not (based on the
1565 	     * fact that the Latin 1 folds are already determined, and the
1566 	     * only multi-char fold in that range is the sharp-s folding to
1567 	     * 'ss'.  Thus, a pattern character can match as little as 1/3 of a
1568 	     * string character.  Adjust lnc accordingly, rounding up, so that
1569 	     * if we need to match at least 4+1/3 chars, that really is 5. */
1570 	    expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
1571 	    lnc = (lnc + expansion - 1) / expansion;
1572 
1573 	    /* As in the non-UTF8 case, if we have to match 3 characters, and
1574 	     * only 2 are left, it's guaranteed to fail, so don't start a
1575 	     * match that would require us to go beyond the end of the string
1576 	     */
1577 	    e = HOP3c(strend, -((I32)lnc), s);
1578 
1579 	    if (!reginfo && e < s) {
1580 		e = s;			/* Due to minlen logic of intuit() */
1581 	    }
1582 
1583 	    /* XXX Note that we could recalculate e to stop the loop earlier,
1584 	     * as the worst case expansion above will rarely be met, and as we
1585 	     * go along we would usually find that e moves further to the left.
1586 	     * This would happen only after we reached the point in the loop
1587 	     * where if there were no expansion we should fail.  Unclear if
1588 	     * worth the expense */
1589 
1590 	    while (s <= e) {
1591 		char *my_strend= (char *)strend;
1592 		if (foldEQ_utf8_flags(s, &my_strend, 0,  utf8_target,
1593 		      pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
1594 		    && (!reginfo || regtry(reginfo, &s)) )
1595 		{
1596 		    goto got_it;
1597 		}
1598 		s += (utf8_target) ? UTF8SKIP(s) : 1;
1599 	    }
1600 	    break;
1601 	}
1602 	case BOUNDL:
1603 	    PL_reg_flags |= RF_tainted;
1604 	    FBC_BOUND(isALNUM_LC,
1605 		      isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
1606 		      isALNUM_LC_utf8((U8*)s));
1607 	    break;
1608 	case NBOUNDL:
1609 	    PL_reg_flags |= RF_tainted;
1610 	    FBC_NBOUND(isALNUM_LC,
1611 		       isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp)),
1612 		       isALNUM_LC_utf8((U8*)s));
1613 	    break;
1614 	case BOUND:
1615 	    FBC_BOUND(isWORDCHAR,
1616 		      isALNUM_uni(tmp),
1617 		      cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
1618 	    break;
1619 	case BOUNDA:
1620 	    FBC_BOUND_NOLOAD(isWORDCHAR_A,
1621 			     isWORDCHAR_A(tmp),
1622 			     isWORDCHAR_A((U8*)s));
1623 	    break;
1624 	case NBOUND:
1625 	    FBC_NBOUND(isWORDCHAR,
1626 		       isALNUM_uni(tmp),
1627 		       cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
1628 	    break;
1629 	case NBOUNDA:
1630 	    FBC_NBOUND_NOLOAD(isWORDCHAR_A,
1631 			      isWORDCHAR_A(tmp),
1632 			      isWORDCHAR_A((U8*)s));
1633 	    break;
1634 	case BOUNDU:
1635 	    FBC_BOUND(isWORDCHAR_L1,
1636 		      isALNUM_uni(tmp),
1637 		      cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
1638 	    break;
1639 	case NBOUNDU:
1640 	    FBC_NBOUND(isWORDCHAR_L1,
1641 		       isALNUM_uni(tmp),
1642 		       cBOOL(swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target)));
1643 	    break;
1644 	case ALNUML:
1645 	    REXEC_FBC_CSCAN_TAINT(
1646 		isALNUM_LC_utf8((U8*)s),
1647 		isALNUM_LC(*s)
1648 	    );
1649 	    break;
1650 	case ALNUMU:
1651 	    REXEC_FBC_CSCAN_PRELOAD(
1652 		LOAD_UTF8_CHARCLASS_ALNUM(),
1653 		swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
1654                 isWORDCHAR_L1((U8) *s)
1655 	    );
1656 	    break;
1657 	case ALNUM:
1658 	    REXEC_FBC_CSCAN_PRELOAD(
1659 		LOAD_UTF8_CHARCLASS_ALNUM(),
1660 		swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
1661                 isWORDCHAR((U8) *s)
1662 	    );
1663 	    break;
1664 	case ALNUMA:
1665 	    /* Don't need to worry about utf8, as it can match only a single
1666 	     * byte invariant character */
1667 	    REXEC_FBC_CLASS_SCAN( isWORDCHAR_A(*s));
1668 	    break;
1669 	case NALNUMU:
1670 	    REXEC_FBC_CSCAN_PRELOAD(
1671 		LOAD_UTF8_CHARCLASS_ALNUM(),
1672 		!swash_fetch(PL_utf8_alnum,(U8*)s, utf8_target),
1673                 ! isWORDCHAR_L1((U8) *s)
1674 	    );
1675 	    break;
1676 	case NALNUM:
1677 	    REXEC_FBC_CSCAN_PRELOAD(
1678 		LOAD_UTF8_CHARCLASS_ALNUM(),
1679 		!swash_fetch(PL_utf8_alnum, (U8*)s, utf8_target),
1680                 ! isALNUM(*s)
1681 	    );
1682 	    break;
1683 	case NALNUMA:
1684 	    REXEC_FBC_CSCAN(
1685 		!isWORDCHAR_A(*s),
1686 		!isWORDCHAR_A(*s)
1687 	    );
1688 	    break;
1689 	case NALNUML:
1690 	    REXEC_FBC_CSCAN_TAINT(
1691 		!isALNUM_LC_utf8((U8*)s),
1692 		!isALNUM_LC(*s)
1693 	    );
1694 	    break;
1695 	case SPACEU:
1696 	    REXEC_FBC_CSCAN_PRELOAD(
1697 		LOAD_UTF8_CHARCLASS_SPACE(),
1698 		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target),
1699                 isSPACE_L1((U8) *s)
1700 	    );
1701 	    break;
1702 	case SPACE:
1703 	    REXEC_FBC_CSCAN_PRELOAD(
1704 		LOAD_UTF8_CHARCLASS_SPACE(),
1705 		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target),
1706                 isSPACE((U8) *s)
1707 	    );
1708 	    break;
1709 	case SPACEA:
1710 	    /* Don't need to worry about utf8, as it can match only a single
1711 	     * byte invariant character */
1712 	    REXEC_FBC_CLASS_SCAN( isSPACE_A(*s));
1713 	    break;
1714 	case SPACEL:
1715 	    REXEC_FBC_CSCAN_TAINT(
1716 		isSPACE_LC_utf8((U8*)s),
1717 		isSPACE_LC(*s)
1718 	    );
1719 	    break;
1720 	case NSPACEU:
1721 	    REXEC_FBC_CSCAN_PRELOAD(
1722 		LOAD_UTF8_CHARCLASS_SPACE(),
1723 		!( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target)),
1724                 ! isSPACE_L1((U8) *s)
1725 	    );
1726 	    break;
1727 	case NSPACE:
1728 	    REXEC_FBC_CSCAN_PRELOAD(
1729 		LOAD_UTF8_CHARCLASS_SPACE(),
1730 		!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, utf8_target)),
1731                 ! isSPACE((U8) *s)
1732 	    );
1733 	    break;
1734 	case NSPACEA:
1735 	    REXEC_FBC_CSCAN(
1736 		!isSPACE_A(*s),
1737 		!isSPACE_A(*s)
1738 	    );
1739 	    break;
1740 	case NSPACEL:
1741 	    REXEC_FBC_CSCAN_TAINT(
1742 		!isSPACE_LC_utf8((U8*)s),
1743 		!isSPACE_LC(*s)
1744 	    );
1745 	    break;
1746 	case DIGIT:
1747 	    REXEC_FBC_CSCAN_PRELOAD(
1748 		LOAD_UTF8_CHARCLASS_DIGIT(),
1749 		swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
1750 		isDIGIT(*s)
1751 	    );
1752 	    break;
1753 	case DIGITA:
1754 	    /* Don't need to worry about utf8, as it can match only a single
1755 	     * byte invariant character */
1756 	    REXEC_FBC_CLASS_SCAN( isDIGIT_A(*s));
1757 	    break;
1758 	case DIGITL:
1759 	    REXEC_FBC_CSCAN_TAINT(
1760 		isDIGIT_LC_utf8((U8*)s),
1761 		isDIGIT_LC(*s)
1762 	    );
1763 	    break;
1764 	case NDIGIT:
1765 	    REXEC_FBC_CSCAN_PRELOAD(
1766 		LOAD_UTF8_CHARCLASS_DIGIT(),
1767 		!swash_fetch(PL_utf8_digit,(U8*)s, utf8_target),
1768 		!isDIGIT(*s)
1769 	    );
1770 	    break;
1771 	case NDIGITA:
1772 	    REXEC_FBC_CSCAN(
1773 		!isDIGIT_A(*s),
1774 		!isDIGIT_A(*s)
1775 	    );
1776 	    break;
1777 	case NDIGITL:
1778 	    REXEC_FBC_CSCAN_TAINT(
1779 		!isDIGIT_LC_utf8((U8*)s),
1780 		!isDIGIT_LC(*s)
1781 	    );
1782 	    break;
1783 	case LNBREAK:
1784 	    REXEC_FBC_CSCAN(
1785 		is_LNBREAK_utf8(s),
1786 		is_LNBREAK_latin1(s)
1787 	    );
1788 	    break;
1789 	case VERTWS:
1790 	    REXEC_FBC_CSCAN(
1791 		is_VERTWS_utf8(s),
1792 		is_VERTWS_latin1(s)
1793 	    );
1794 	    break;
1795 	case NVERTWS:
1796 	    REXEC_FBC_CSCAN(
1797 		!is_VERTWS_utf8(s),
1798 		!is_VERTWS_latin1(s)
1799 	    );
1800 	    break;
1801 	case HORIZWS:
1802 	    REXEC_FBC_CSCAN(
1803 		is_HORIZWS_utf8(s),
1804 		is_HORIZWS_latin1(s)
1805 	    );
1806 	    break;
1807 	case NHORIZWS:
1808 	    REXEC_FBC_CSCAN(
1809 		!is_HORIZWS_utf8(s),
1810 		!is_HORIZWS_latin1(s)
1811 	    );
1812 	    break;
1813 	case AHOCORASICKC:
1814 	case AHOCORASICK:
1815 	    {
1816 	        DECL_TRIE_TYPE(c);
1817                 /* what trie are we using right now */
1818         	reg_ac_data *aho
1819         	    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1820         	reg_trie_data *trie
1821 		    = (reg_trie_data*)progi->data->data[ aho->trie ];
1822 		HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1823 
1824 		const char *last_start = strend - trie->minlen;
1825 #ifdef DEBUGGING
1826 		const char *real_start = s;
1827 #endif
1828 		STRLEN maxlen = trie->maxlen;
1829 		SV *sv_points;
1830 		U8 **points; /* map of where we were in the input string
1831 		                when reading a given char. For ASCII this
1832 		                is unnecessary overhead as the relationship
1833 		                is always 1:1, but for Unicode, especially
1834 		                case folded Unicode this is not true. */
1835 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1836 		U8 *bitmap=NULL;
1837 
1838 
1839                 GET_RE_DEBUG_FLAGS_DECL;
1840 
1841                 /* We can't just allocate points here. We need to wrap it in
1842                  * an SV so it gets freed properly if there is a croak while
1843                  * running the match */
1844                 ENTER;
1845 	        SAVETMPS;
1846                 sv_points=newSV(maxlen * sizeof(U8 *));
1847                 SvCUR_set(sv_points,
1848                     maxlen * sizeof(U8 *));
1849                 SvPOK_on(sv_points);
1850                 sv_2mortal(sv_points);
1851                 points=(U8**)SvPV_nolen(sv_points );
1852                 if ( trie_type != trie_utf8_fold
1853                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1854                 {
1855                     if (trie->bitmap)
1856                         bitmap=(U8*)trie->bitmap;
1857                     else
1858                         bitmap=(U8*)ANYOF_BITMAP(c);
1859                 }
1860                 /* this is the Aho-Corasick algorithm modified a touch
1861                    to include special handling for long "unknown char"
1862                    sequences. The basic idea being that we use AC as long
1863                    as we are dealing with a possible matching char, when
1864                    we encounter an unknown char (and we have not encountered
1865                    an accepting state) we scan forward until we find a legal
1866                    starting char.
1867                    AC matching is basically that of trie matching, except
1868                    that when we encounter a failing transition, we fall back
1869                    to the current states "fail state", and try the current char
1870                    again, a process we repeat until we reach the root state,
1871                    state 1, or a legal transition. If we fail on the root state
1872                    then we can either terminate if we have reached an accepting
1873                    state previously, or restart the entire process from the beginning
1874                    if we have not.
1875 
1876                  */
1877                 while (s <= last_start) {
1878                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1879                     U8 *uc = (U8*)s;
1880                     U16 charid = 0;
1881                     U32 base = 1;
1882                     U32 state = 1;
1883                     UV uvc = 0;
1884                     STRLEN len = 0;
1885                     STRLEN foldlen = 0;
1886                     U8 *uscan = (U8*)NULL;
1887                     U8 *leftmost = NULL;
1888 #ifdef DEBUGGING
1889                     U32 accepted_word= 0;
1890 #endif
1891                     U32 pointpos = 0;
1892 
1893                     while ( state && uc <= (U8*)strend ) {
1894                         int failed=0;
1895                         U32 word = aho->states[ state ].wordnum;
1896 
1897                         if( state==1 ) {
1898                             if ( bitmap ) {
1899                                 DEBUG_TRIE_EXECUTE_r(
1900                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1901                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1902                                             (char *)uc, utf8_target );
1903                                         PerlIO_printf( Perl_debug_log,
1904                                             " Scanning for legal start char...\n");
1905                                     }
1906                                 );
1907 				if (utf8_target) {
1908 				    while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1909 					uc += UTF8SKIP(uc);
1910 				    }
1911 				} else {
1912 				    while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1913 					uc++;
1914 				    }
1915 				}
1916                                 s= (char *)uc;
1917                             }
1918                             if (uc >(U8*)last_start) break;
1919                         }
1920 
1921                         if ( word ) {
1922                             U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
1923                             if (!leftmost || lpos < leftmost) {
1924                                 DEBUG_r(accepted_word=word);
1925                                 leftmost= lpos;
1926                             }
1927                             if (base==0) break;
1928 
1929                         }
1930                         points[pointpos++ % maxlen]= uc;
1931 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1932 					     uscan, len, uvc, charid, foldlen,
1933 					     foldbuf, uniflags);
1934                         DEBUG_TRIE_EXECUTE_r({
1935                             dump_exec_pos( (char *)uc, c, strend, real_start,
1936                                 s,   utf8_target );
1937                             PerlIO_printf(Perl_debug_log,
1938                                 " Charid:%3u CP:%4"UVxf" ",
1939                                  charid, uvc);
1940                         });
1941 
1942                         do {
1943 #ifdef DEBUGGING
1944                             word = aho->states[ state ].wordnum;
1945 #endif
1946                             base = aho->states[ state ].trans.base;
1947 
1948                             DEBUG_TRIE_EXECUTE_r({
1949                                 if (failed)
1950                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1951                                         s,   utf8_target );
1952                                 PerlIO_printf( Perl_debug_log,
1953                                     "%sState: %4"UVxf", word=%"UVxf,
1954                                     failed ? " Fail transition to " : "",
1955                                     (UV)state, (UV)word);
1956                             });
1957                             if ( base ) {
1958                                 U32 tmp;
1959 				I32 offset;
1960                                 if (charid &&
1961 				     ( ((offset = base + charid
1962 					- 1 - trie->uniquecharcount)) >= 0)
1963                                      && ((U32)offset < trie->lasttrans)
1964                                      && trie->trans[offset].check == state
1965                                      && (tmp=trie->trans[offset].next))
1966                                 {
1967                                     DEBUG_TRIE_EXECUTE_r(
1968                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1969                                     state = tmp;
1970                                     break;
1971                                 }
1972                                 else {
1973                                     DEBUG_TRIE_EXECUTE_r(
1974                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1975                                     failed = 1;
1976                                     state = aho->fail[state];
1977                                 }
1978                             }
1979                             else {
1980                                 /* we must be accepting here */
1981                                 DEBUG_TRIE_EXECUTE_r(
1982                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1983                                 failed = 1;
1984                                 break;
1985                             }
1986                         } while(state);
1987                         uc += len;
1988                         if (failed) {
1989                             if (leftmost)
1990                                 break;
1991                             if (!state) state = 1;
1992                         }
1993                     }
1994                     if ( aho->states[ state ].wordnum ) {
1995                         U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
1996                         if (!leftmost || lpos < leftmost) {
1997                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1998                             leftmost = lpos;
1999                         }
2000                     }
2001                     if (leftmost) {
2002                         s = (char*)leftmost;
2003                         DEBUG_TRIE_EXECUTE_r({
2004                             PerlIO_printf(
2005                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
2006                                 (UV)accepted_word, (IV)(s - real_start)
2007                             );
2008                         });
2009                         if (!reginfo || regtry(reginfo, &s)) {
2010                             FREETMPS;
2011 		            LEAVE;
2012                             goto got_it;
2013                         }
2014                         s = HOPc(s,1);
2015                         DEBUG_TRIE_EXECUTE_r({
2016                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
2017                         });
2018                     } else {
2019                         DEBUG_TRIE_EXECUTE_r(
2020                             PerlIO_printf( Perl_debug_log,"No match.\n"));
2021                         break;
2022                     }
2023                 }
2024                 FREETMPS;
2025                 LEAVE;
2026 	    }
2027 	    break;
2028 	default:
2029 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
2030 	    break;
2031 	}
2032 	return 0;
2033       got_it:
2034 	return s;
2035 }
2036 
2037 
2038 /*
2039  - regexec_flags - match a regexp against a string
2040  */
2041 I32
2042 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
2043 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
2044 /* strend: pointer to null at end of string */
2045 /* strbeg: real beginning of string */
2046 /* minend: end of match must be >=minend after stringarg. */
2047 /* data: May be used for some additional optimizations.
2048          Currently its only used, with a U32 cast, for transmitting
2049          the ganch offset when doing a /g match. This will change */
2050 /* nosave: For optimizations. */
2051 {
2052     dVAR;
2053     struct regexp *const prog = (struct regexp *)SvANY(rx);
2054     /*register*/ char *s;
2055     register regnode *c;
2056     /*register*/ char *startpos = stringarg;
2057     I32 minlen;		/* must match at least this many chars */
2058     I32 dontbother = 0;	/* how many characters not to try at end */
2059     I32 end_shift = 0;			/* Same for the end. */		/* CC */
2060     I32 scream_pos = -1;		/* Internal iterator of scream. */
2061     char *scream_olds = NULL;
2062     const bool utf8_target = cBOOL(DO_UTF8(sv));
2063     I32 multiline;
2064     RXi_GET_DECL(prog,progi);
2065     regmatch_info reginfo;  /* create some info to pass to regtry etc */
2066     regexp_paren_pair *swap = NULL;
2067     GET_RE_DEBUG_FLAGS_DECL;
2068 
2069     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
2070     PERL_UNUSED_ARG(data);
2071 
2072     /* Be paranoid... */
2073     if (prog == NULL || startpos == NULL) {
2074 	Perl_croak(aTHX_ "NULL regexp parameter");
2075 	return 0;
2076     }
2077 
2078     multiline = prog->extflags & RXf_PMf_MULTILINE;
2079     reginfo.prog = rx;	 /* Yes, sorry that this is confusing.  */
2080 
2081     RX_MATCH_UTF8_set(rx, utf8_target);
2082     DEBUG_EXECUTE_r(
2083         debug_start_match(rx, utf8_target, startpos, strend,
2084         "Matching");
2085     );
2086 
2087     minlen = prog->minlen;
2088 
2089     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
2090         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2091 			      "String too short [regexec_flags]...\n"));
2092 	goto phooey;
2093     }
2094 
2095 
2096     /* Check validity of program. */
2097     if (UCHARAT(progi->program) != REG_MAGIC) {
2098 	Perl_croak(aTHX_ "corrupted regexp program");
2099     }
2100 
2101     PL_reg_flags = 0;
2102     PL_reg_eval_set = 0;
2103     PL_reg_maxiter = 0;
2104 
2105     if (RX_UTF8(rx))
2106 	PL_reg_flags |= RF_utf8;
2107 
2108     /* Mark beginning of line for ^ and lookbehind. */
2109     reginfo.bol = startpos; /* XXX not used ??? */
2110     PL_bostr  = strbeg;
2111     reginfo.sv = sv;
2112 
2113     /* Mark end of line for $ (and such) */
2114     PL_regeol = strend;
2115 
2116     /* see how far we have to get to not match where we matched before */
2117     reginfo.till = startpos+minend;
2118 
2119     /* If there is a "must appear" string, look for it. */
2120     s = startpos;
2121 
2122     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
2123 	MAGIC *mg;
2124 	if (flags & REXEC_IGNOREPOS){	/* Means: check only at start */
2125 	    reginfo.ganch = startpos + prog->gofs;
2126 	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
2127 	      "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
2128 	} else if (sv && SvTYPE(sv) >= SVt_PVMG
2129 		  && SvMAGIC(sv)
2130 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
2131 		  && mg->mg_len >= 0) {
2132 	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
2133 	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
2134 		"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
2135 
2136 	    if (prog->extflags & RXf_ANCH_GPOS) {
2137 	        if (s > reginfo.ganch)
2138 		    goto phooey;
2139 		s = reginfo.ganch - prog->gofs;
2140 	        DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
2141 		     "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
2142 		if (s < strbeg)
2143 		    goto phooey;
2144 	    }
2145 	}
2146 	else if (data) {
2147 	    reginfo.ganch = strbeg + PTR2UV(data);
2148             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
2149 		 "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
2150 
2151 	} else {				/* pos() not defined */
2152 	    reginfo.ganch = strbeg;
2153             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
2154 		 "GPOS: reginfo.ganch = strbeg\n"));
2155 	}
2156     }
2157     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
2158         /* We have to be careful. If the previous successful match
2159            was from this regex we don't want a subsequent partially
2160            successful match to clobber the old results.
2161            So when we detect this possibility we add a swap buffer
2162            to the re, and switch the buffer each match. If we fail
2163            we switch it back, otherwise we leave it swapped.
2164         */
2165         swap = prog->offs;
2166         /* do we need a save destructor here for eval dies? */
2167         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
2168     }
2169     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
2170 	re_scream_pos_data d;
2171 
2172 	d.scream_olds = &scream_olds;
2173 	d.scream_pos = &scream_pos;
2174 	s = re_intuit_start(rx, sv, s, strend, flags, &d);
2175 	if (!s) {
2176 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
2177 	    goto phooey;	/* not present */
2178 	}
2179     }
2180 
2181 
2182 
2183     /* Simplest case:  anchored match need be tried only once. */
2184     /*  [unless only anchor is BOL and multiline is set] */
2185     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
2186 	if (s == startpos && regtry(&reginfo, &startpos))
2187 	    goto got_it;
2188 	else if (multiline || (prog->intflags & PREGf_IMPLICIT)
2189 		 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
2190 	{
2191 	    char *end;
2192 
2193 	    if (minlen)
2194 		dontbother = minlen - 1;
2195 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
2196 	    /* for multiline we only have to try after newlines */
2197 	    if (prog->check_substr || prog->check_utf8) {
2198                 /* because of the goto we can not easily reuse the macros for bifurcating the
2199                    unicode/non-unicode match modes here like we do elsewhere - demerphq */
2200                 if (utf8_target) {
2201                     if (s == startpos)
2202                         goto after_try_utf8;
2203                     while (1) {
2204                         if (regtry(&reginfo, &s)) {
2205                             goto got_it;
2206                         }
2207                       after_try_utf8:
2208                         if (s > end) {
2209                             goto phooey;
2210                         }
2211                         if (prog->extflags & RXf_USE_INTUIT) {
2212                             s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
2213                             if (!s) {
2214                                 goto phooey;
2215                             }
2216                         }
2217                         else {
2218                             s += UTF8SKIP(s);
2219                         }
2220                     }
2221                 } /* end search for check string in unicode */
2222                 else {
2223                     if (s == startpos) {
2224                         goto after_try_latin;
2225                     }
2226                     while (1) {
2227                         if (regtry(&reginfo, &s)) {
2228                             goto got_it;
2229                         }
2230                       after_try_latin:
2231                         if (s > end) {
2232                             goto phooey;
2233                         }
2234                         if (prog->extflags & RXf_USE_INTUIT) {
2235                             s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2236                             if (!s) {
2237                                 goto phooey;
2238                             }
2239                         }
2240                         else {
2241                             s++;
2242                         }
2243                     }
2244                 } /* end search for check string in latin*/
2245 	    } /* end search for check string */
2246 	    else { /* search for newline */
2247 		if (s > startpos) {
2248                     /*XXX: The s-- is almost definitely wrong here under unicode - demeprhq*/
2249 		    s--;
2250 		}
2251 		/* We can use a more efficient search as newlines are the same in unicode as they are in latin */
2252 		while (s <= end) { /* note it could be possible to match at the end of the string */
2253 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
2254 			if (regtry(&reginfo, &s))
2255 			    goto got_it;
2256 		    }
2257 		}
2258 	    } /* end search for newline */
2259 	} /* end anchored/multiline check string search */
2260 	goto phooey;
2261     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2262     {
2263         /* the warning about reginfo.ganch being used without initialization
2264            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2265            and we only enter this block when the same bit is set. */
2266         char *tmp_s = reginfo.ganch - prog->gofs;
2267 
2268 	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2269 	    goto got_it;
2270 	goto phooey;
2271     }
2272 
2273     /* Messy cases:  unanchored match. */
2274     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2275 	/* we have /x+whatever/ */
2276 	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
2277 	char ch;
2278 #ifdef DEBUGGING
2279 	int did_match = 0;
2280 #endif
2281 	if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
2282 	    utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
2283 	ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
2284 
2285 	if (utf8_target) {
2286 	    REXEC_FBC_SCAN(
2287 		if (*s == ch) {
2288 		    DEBUG_EXECUTE_r( did_match = 1 );
2289 		    if (regtry(&reginfo, &s)) goto got_it;
2290 		    s += UTF8SKIP(s);
2291 		    while (s < strend && *s == ch)
2292 			s += UTF8SKIP(s);
2293 		}
2294 	    );
2295 	}
2296 	else {
2297 	    REXEC_FBC_SCAN(
2298 		if (*s == ch) {
2299 		    DEBUG_EXECUTE_r( did_match = 1 );
2300 		    if (regtry(&reginfo, &s)) goto got_it;
2301 		    s++;
2302 		    while (s < strend && *s == ch)
2303 			s++;
2304 		}
2305 	    );
2306 	}
2307 	DEBUG_EXECUTE_r(if (!did_match)
2308 		PerlIO_printf(Perl_debug_log,
2309                                   "Did not find anchored character...\n")
2310                );
2311     }
2312     else if (prog->anchored_substr != NULL
2313 	      || prog->anchored_utf8 != NULL
2314 	      || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2315 		  && prog->float_max_offset < strend - s)) {
2316 	SV *must;
2317 	I32 back_max;
2318 	I32 back_min;
2319 	char *last;
2320 	char *last1;		/* Last position checked before */
2321 #ifdef DEBUGGING
2322 	int did_match = 0;
2323 #endif
2324 	if (prog->anchored_substr || prog->anchored_utf8) {
2325 	    if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
2326 		utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
2327 	    must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
2328 	    back_max = back_min = prog->anchored_offset;
2329 	} else {
2330 	    if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
2331 		utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
2332 	    must = utf8_target ? prog->float_utf8 : prog->float_substr;
2333 	    back_max = prog->float_max_offset;
2334 	    back_min = prog->float_min_offset;
2335 	}
2336 
2337 
2338 	if (must == &PL_sv_undef)
2339 	    /* could not downgrade utf8 check substring, so must fail */
2340 	    goto phooey;
2341 
2342         if (back_min<0) {
2343 	    last = strend;
2344 	} else {
2345             last = HOP3c(strend,	/* Cannot start after this */
2346         	  -(I32)(CHR_SVLEN(must)
2347         		 - (SvTAIL(must) != 0) + back_min), strbeg);
2348         }
2349 	if (s > PL_bostr)
2350 	    last1 = HOPc(s, -1);
2351 	else
2352 	    last1 = s - 1;	/* bogus */
2353 
2354 	/* XXXX check_substr already used to find "s", can optimize if
2355 	   check_substr==must. */
2356 	scream_pos = -1;
2357 	dontbother = end_shift;
2358 	strend = HOPc(strend, -dontbother);
2359 	while ( (s <= last) &&
2360 		((flags & REXEC_SCREAM) && SvSCREAM(sv)
2361 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2362 				    end_shift, &scream_pos, 0))
2363 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2364 				  (unsigned char*)strend, must,
2365 				  multiline ? FBMrf_MULTILINE : 0))) ) {
2366 	    /* we may be pointing at the wrong string */
2367 	    if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2368 		s = strbeg + (s - SvPVX_const(sv));
2369 	    DEBUG_EXECUTE_r( did_match = 1 );
2370 	    if (HOPc(s, -back_max) > last1) {
2371 		last1 = HOPc(s, -back_min);
2372 		s = HOPc(s, -back_max);
2373 	    }
2374 	    else {
2375 		char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2376 
2377 		last1 = HOPc(s, -back_min);
2378 		s = t;
2379 	    }
2380 	    if (utf8_target) {
2381 		while (s <= last1) {
2382 		    if (regtry(&reginfo, &s))
2383 			goto got_it;
2384 		    s += UTF8SKIP(s);
2385 		}
2386 	    }
2387 	    else {
2388 		while (s <= last1) {
2389 		    if (regtry(&reginfo, &s))
2390 			goto got_it;
2391 		    s++;
2392 		}
2393 	    }
2394 	}
2395 	DEBUG_EXECUTE_r(if (!did_match) {
2396             RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
2397                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2398             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2399 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
2400 			       ? "anchored" : "floating"),
2401                 quoted, RE_SV_TAIL(must));
2402         });
2403 	goto phooey;
2404     }
2405     else if ( (c = progi->regstclass) ) {
2406 	if (minlen) {
2407 	    const OPCODE op = OP(progi->regstclass);
2408 	    /* don't bother with what can't match */
2409 	    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2410 	        strend = HOPc(strend, -(minlen - 1));
2411 	}
2412 	DEBUG_EXECUTE_r({
2413 	    SV * const prop = sv_newmortal();
2414 	    regprop(prog, prop, c);
2415 	    {
2416 		RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
2417 		    s,strend-s,60);
2418 		PerlIO_printf(Perl_debug_log,
2419 		    "Matching stclass %.*s against %s (%d bytes)\n",
2420 		    (int)SvCUR(prop), SvPVX_const(prop),
2421 		     quoted, (int)(strend - s));
2422 	    }
2423 	});
2424         if (find_byclass(prog, c, s, strend, &reginfo))
2425 	    goto got_it;
2426 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2427     }
2428     else {
2429 	dontbother = 0;
2430 	if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2431 	    /* Trim the end. */
2432 	    char *last= NULL;
2433 	    SV* float_real;
2434 
2435 	    if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
2436 		utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
2437 	    float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
2438 
2439 	    if ((flags & REXEC_SCREAM) && SvSCREAM(sv)) {
2440 		last = screaminstr(sv, float_real, s - strbeg,
2441 				   end_shift, &scream_pos, 1); /* last one */
2442 		if (!last)
2443 		    last = scream_olds; /* Only one occurrence. */
2444 		/* we may be pointing at the wrong string */
2445 		else if (RXp_MATCH_COPIED(prog))
2446 		    s = strbeg + (s - SvPVX_const(sv));
2447 	    }
2448 	    else {
2449 		STRLEN len;
2450                 const char * const little = SvPV_const(float_real, len);
2451 		if (SvTAIL(float_real)) {
2452 		    /* This means that float_real contains an artificial \n on the end
2453 		     * due to the presence of something like this: /foo$/
2454 		     * where we can match both "foo" and "foo\n" at the end of the string.
2455 		     * So we have to compare the end of the string first against the float_real
2456 		     * without the \n and then against the full float_real with the string.
2457 		     * We have to watch out for cases where the string might be smaller
2458 		     * than the float_real or the float_real without the \n.
2459 		     */
2460 		    char *checkpos= strend - len;
2461 		    DEBUG_OPTIMISE_r(
2462 			PerlIO_printf(Perl_debug_log,
2463 			    "%sChecking for float_real.%s\n",
2464 			    PL_colors[4], PL_colors[5]));
2465 		    if (checkpos + 1 < strbeg) {
2466 			/* can't match, even if we remove the trailing \n string is too short to match */
2467 			DEBUG_EXECUTE_r(
2468 			    PerlIO_printf(Perl_debug_log,
2469 				"%sString shorter than required trailing substring, cannot match.%s\n",
2470 				PL_colors[4], PL_colors[5]));
2471 			goto phooey;
2472 		    } else if (memEQ(checkpos + 1, little, len - 1)) {
2473 			/* can match, the end of the string matches without the "\n" */
2474 			last = checkpos + 1;
2475 		    } else if (checkpos < strbeg) {
2476 			/* cant match, string is too short when the "\n" is included */
2477 			DEBUG_EXECUTE_r(
2478 			    PerlIO_printf(Perl_debug_log,
2479 				"%sString does not contain required trailing substring, cannot match.%s\n",
2480 				PL_colors[4], PL_colors[5]));
2481 			goto phooey;
2482 		    } else if (!multiline) {
2483 			/* non multiline match, so compare with the "\n" at the end of the string */
2484 			if (memEQ(checkpos, little, len)) {
2485 			    last= checkpos;
2486 			} else {
2487 			    DEBUG_EXECUTE_r(
2488 				PerlIO_printf(Perl_debug_log,
2489 				    "%sString does not contain required trailing substring, cannot match.%s\n",
2490 				    PL_colors[4], PL_colors[5]));
2491 			    goto phooey;
2492 			}
2493 		    } else {
2494 			/* multiline match, so we have to search for a place where the full string is located */
2495 			goto find_last;
2496 		    }
2497 		} else {
2498 		  find_last:
2499 		    if (len)
2500 			last = rninstr(s, strend, little, little + len);
2501 		    else
2502 			last = strend;	/* matching "$" */
2503 		}
2504 	    }
2505 	    if (!last) {
2506 		/* at one point this block contained a comment which was probably
2507 		 * incorrect, which said that this was a "should not happen" case.
2508 		 * Even if it was true when it was written I am pretty sure it is
2509 		 * not anymore, so I have removed the comment and replaced it with
2510 		 * this one. Yves */
2511 		DEBUG_EXECUTE_r(
2512 		    PerlIO_printf(Perl_debug_log,
2513 			"String does not contain required substring, cannot match.\n"
2514 	            ));
2515 		goto phooey;
2516 	    }
2517 	    dontbother = strend - last + prog->float_min_offset;
2518 	}
2519 	if (minlen && (dontbother < minlen))
2520 	    dontbother = minlen - 1;
2521 	strend -= dontbother; 		   /* this one's always in bytes! */
2522 	/* We don't know much -- general case. */
2523 	if (utf8_target) {
2524 	    for (;;) {
2525 		if (regtry(&reginfo, &s))
2526 		    goto got_it;
2527 		if (s >= strend)
2528 		    break;
2529 		s += UTF8SKIP(s);
2530 	    };
2531 	}
2532 	else {
2533 	    do {
2534 		if (regtry(&reginfo, &s))
2535 		    goto got_it;
2536 	    } while (s++ < strend);
2537 	}
2538     }
2539 
2540     /* Failure. */
2541     goto phooey;
2542 
2543 got_it:
2544     Safefree(swap);
2545     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2546 
2547     if (PL_reg_eval_set)
2548 	restore_pos(aTHX_ prog);
2549     if (RXp_PAREN_NAMES(prog))
2550         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2551 
2552     /* make sure $`, $&, $', and $digit will work later */
2553     if ( !(flags & REXEC_NOT_FIRST) ) {
2554 	RX_MATCH_COPY_FREE(rx);
2555 	if (flags & REXEC_COPY_STR) {
2556 	    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2557 #ifdef PERL_OLD_COPY_ON_WRITE
2558 	    if ((SvIsCOW(sv)
2559 		 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2560 		if (DEBUG_C_TEST) {
2561 		    PerlIO_printf(Perl_debug_log,
2562 				  "Copy on write: regexp capture, type %d\n",
2563 				  (int) SvTYPE(sv));
2564 		}
2565 		prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2566 		prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2567 		assert (SvPOKp(prog->saved_copy));
2568 	    } else
2569 #endif
2570 	    {
2571 		RX_MATCH_COPIED_on(rx);
2572 		s = savepvn(strbeg, i);
2573 		prog->subbeg = s;
2574 	    }
2575 	    prog->sublen = i;
2576 	}
2577 	else {
2578 	    prog->subbeg = strbeg;
2579 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2580 	}
2581     }
2582 
2583     return 1;
2584 
2585 phooey:
2586     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2587 			  PL_colors[4], PL_colors[5]));
2588     if (PL_reg_eval_set)
2589 	restore_pos(aTHX_ prog);
2590     if (swap) {
2591         /* we failed :-( roll it back */
2592         Safefree(prog->offs);
2593         prog->offs = swap;
2594     }
2595 
2596     return 0;
2597 }
2598 
2599 
2600 /*
2601  - regtry - try match at specific point
2602  */
2603 STATIC I32			/* 0 failure, 1 success */
2604 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2605 {
2606     dVAR;
2607     CHECKPOINT lastcp;
2608     REGEXP *const rx = reginfo->prog;
2609     regexp *const prog = (struct regexp *)SvANY(rx);
2610     RXi_GET_DECL(prog,progi);
2611     GET_RE_DEBUG_FLAGS_DECL;
2612 
2613     PERL_ARGS_ASSERT_REGTRY;
2614 
2615     reginfo->cutpoint=NULL;
2616 
2617     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2618 	MAGIC *mg;
2619 
2620 	PL_reg_eval_set = RS_init;
2621 	DEBUG_EXECUTE_r(DEBUG_s(
2622 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2623 			  (IV)(PL_stack_sp - PL_stack_base));
2624 	    ));
2625 	SAVESTACK_CXPOS();
2626 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2627 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2628 	SAVETMPS;
2629 	/* Apparently this is not needed, judging by wantarray. */
2630 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2631 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2632 
2633 	if (reginfo->sv) {
2634 	    /* Make $_ available to executed code. */
2635 	    if (reginfo->sv != DEFSV) {
2636 		SAVE_DEFSV;
2637 		DEFSV_set(reginfo->sv);
2638 	    }
2639 
2640 	    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2641 		  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2642 		/* prepare for quick setting of pos */
2643 #ifdef PERL_OLD_COPY_ON_WRITE
2644 		if (SvIsCOW(reginfo->sv))
2645 		    sv_force_normal_flags(reginfo->sv, 0);
2646 #endif
2647 		mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2648 				 &PL_vtbl_mglob, NULL, 0);
2649 		mg->mg_len = -1;
2650 	    }
2651 	    PL_reg_magic    = mg;
2652 	    PL_reg_oldpos   = mg->mg_len;
2653 	    SAVEDESTRUCTOR_X(restore_pos, prog);
2654         }
2655         if (!PL_reg_curpm) {
2656 	    Newxz(PL_reg_curpm, 1, PMOP);
2657 #ifdef USE_ITHREADS
2658             {
2659 		SV* const repointer = &PL_sv_undef;
2660                 /* this regexp is also owned by the new PL_reg_curpm, which
2661 		   will try to free it.  */
2662                 av_push(PL_regex_padav, repointer);
2663                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2664                 PL_regex_pad = AvARRAY(PL_regex_padav);
2665             }
2666 #endif
2667         }
2668 #ifdef USE_ITHREADS
2669 	/* It seems that non-ithreads works both with and without this code.
2670 	   So for efficiency reasons it seems best not to have the code
2671 	   compiled when it is not needed.  */
2672 	/* This is safe against NULLs: */
2673 	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2674 	/* PM_reg_curpm owns a reference to this regexp.  */
2675 	(void)ReREFCNT_inc(rx);
2676 #endif
2677 	PM_SETRE(PL_reg_curpm, rx);
2678 	PL_reg_oldcurpm = PL_curpm;
2679 	PL_curpm = PL_reg_curpm;
2680 	if (RXp_MATCH_COPIED(prog)) {
2681 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2682 		since it may be needed if this match fails.  Thus
2683 		$` inside (?{}) could fail... */
2684 	    PL_reg_oldsaved = prog->subbeg;
2685 	    PL_reg_oldsavedlen = prog->sublen;
2686 #ifdef PERL_OLD_COPY_ON_WRITE
2687 	    PL_nrs = prog->saved_copy;
2688 #endif
2689 	    RXp_MATCH_COPIED_off(prog);
2690 	}
2691 	else
2692 	    PL_reg_oldsaved = NULL;
2693 	prog->subbeg = PL_bostr;
2694 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2695     }
2696     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2697     prog->offs[0].start = *startpos - PL_bostr;
2698     PL_reginput = *startpos;
2699     PL_reglastparen = &prog->lastparen;
2700     PL_reglastcloseparen = &prog->lastcloseparen;
2701     prog->lastparen = 0;
2702     prog->lastcloseparen = 0;
2703     PL_regsize = 0;
2704     PL_regoffs = prog->offs;
2705     if (PL_reg_start_tmpl <= prog->nparens) {
2706 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2707         if(PL_reg_start_tmp)
2708             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2709         else
2710             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2711     }
2712 
2713     /* XXXX What this code is doing here?!!!  There should be no need
2714        to do this again and again, PL_reglastparen should take care of
2715        this!  --ilya*/
2716 
2717     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2718      * Actually, the code in regcppop() (which Ilya may be meaning by
2719      * PL_reglastparen), is not needed at all by the test suite
2720      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2721      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2722      * Meanwhile, this code *is* needed for the
2723      * above-mentioned test suite tests to succeed.  The common theme
2724      * on those tests seems to be returning null fields from matches.
2725      * --jhi updated by dapm */
2726 #if 1
2727     if (prog->nparens) {
2728 	regexp_paren_pair *pp = PL_regoffs;
2729 	register I32 i;
2730 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2731 	    ++pp;
2732 	    pp->start = -1;
2733 	    pp->end = -1;
2734 	}
2735     }
2736 #endif
2737     REGCP_SET(lastcp);
2738     if (regmatch(reginfo, progi->program + 1)) {
2739 	PL_regoffs[0].end = PL_reginput - PL_bostr;
2740 	return 1;
2741     }
2742     if (reginfo->cutpoint)
2743         *startpos= reginfo->cutpoint;
2744     REGCP_UNWIND(lastcp);
2745     return 0;
2746 }
2747 
2748 
2749 #define sayYES goto yes
2750 #define sayNO goto no
2751 #define sayNO_SILENT goto no_silent
2752 
2753 /* we dont use STMT_START/END here because it leads to
2754    "unreachable code" warnings, which are bogus, but distracting. */
2755 #define CACHEsayNO \
2756     if (ST.cache_mask) \
2757        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2758     sayNO
2759 
2760 /* this is used to determine how far from the left messages like
2761    'failed...' are printed. It should be set such that messages
2762    are inline with the regop output that created them.
2763 */
2764 #define REPORT_CODE_OFF 32
2765 
2766 
2767 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2768 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2769 
2770 #define SLAB_FIRST(s) (&(s)->states[0])
2771 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2772 
2773 /* grab a new slab and return the first slot in it */
2774 
2775 STATIC regmatch_state *
2776 S_push_slab(pTHX)
2777 {
2778 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2779     dMY_CXT;
2780 #endif
2781     regmatch_slab *s = PL_regmatch_slab->next;
2782     if (!s) {
2783 	Newx(s, 1, regmatch_slab);
2784 	s->prev = PL_regmatch_slab;
2785 	s->next = NULL;
2786 	PL_regmatch_slab->next = s;
2787     }
2788     PL_regmatch_slab = s;
2789     return SLAB_FIRST(s);
2790 }
2791 
2792 
2793 /* push a new state then goto it */
2794 
2795 #define PUSH_STATE_GOTO(state, node) \
2796     scan = node; \
2797     st->resume_state = state; \
2798     goto push_state;
2799 
2800 /* push a new state with success backtracking, then goto it */
2801 
2802 #define PUSH_YES_STATE_GOTO(state, node) \
2803     scan = node; \
2804     st->resume_state = state; \
2805     goto push_yes_state;
2806 
2807 
2808 
2809 /*
2810 
2811 regmatch() - main matching routine
2812 
2813 This is basically one big switch statement in a loop. We execute an op,
2814 set 'next' to point the next op, and continue. If we come to a point which
2815 we may need to backtrack to on failure such as (A|B|C), we push a
2816 backtrack state onto the backtrack stack. On failure, we pop the top
2817 state, and re-enter the loop at the state indicated. If there are no more
2818 states to pop, we return failure.
2819 
2820 Sometimes we also need to backtrack on success; for example /A+/, where
2821 after successfully matching one A, we need to go back and try to
2822 match another one; similarly for lookahead assertions: if the assertion
2823 completes successfully, we backtrack to the state just before the assertion
2824 and then carry on.  In these cases, the pushed state is marked as
2825 'backtrack on success too'. This marking is in fact done by a chain of
2826 pointers, each pointing to the previous 'yes' state. On success, we pop to
2827 the nearest yes state, discarding any intermediate failure-only states.
2828 Sometimes a yes state is pushed just to force some cleanup code to be
2829 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2830 it to free the inner regex.
2831 
2832 Note that failure backtracking rewinds the cursor position, while
2833 success backtracking leaves it alone.
2834 
2835 A pattern is complete when the END op is executed, while a subpattern
2836 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2837 ops trigger the "pop to last yes state if any, otherwise return true"
2838 behaviour.
2839 
2840 A common convention in this function is to use A and B to refer to the two
2841 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2842 the subpattern to be matched possibly multiple times, while B is the entire
2843 rest of the pattern. Variable and state names reflect this convention.
2844 
2845 The states in the main switch are the union of ops and failure/success of
2846 substates associated with with that op.  For example, IFMATCH is the op
2847 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2848 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2849 successfully matched A and IFMATCH_A_fail is a state saying that we have
2850 just failed to match A. Resume states always come in pairs. The backtrack
2851 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2852 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2853 on success or failure.
2854 
2855 The struct that holds a backtracking state is actually a big union, with
2856 one variant for each major type of op. The variable st points to the
2857 top-most backtrack struct. To make the code clearer, within each
2858 block of code we #define ST to alias the relevant union.
2859 
2860 Here's a concrete example of a (vastly oversimplified) IFMATCH
2861 implementation:
2862 
2863     switch (state) {
2864     ....
2865 
2866 #define ST st->u.ifmatch
2867 
2868     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2869 	ST.foo = ...; // some state we wish to save
2870 	...
2871 	// push a yes backtrack state with a resume value of
2872 	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2873 	// first node of A:
2874 	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2875 	// NOTREACHED
2876 
2877     case IFMATCH_A: // we have successfully executed A; now continue with B
2878 	next = B;
2879 	bar = ST.foo; // do something with the preserved value
2880 	break;
2881 
2882     case IFMATCH_A_fail: // A failed, so the assertion failed
2883 	...;   // do some housekeeping, then ...
2884 	sayNO; // propagate the failure
2885 
2886 #undef ST
2887 
2888     ...
2889     }
2890 
2891 For any old-timers reading this who are familiar with the old recursive
2892 approach, the code above is equivalent to:
2893 
2894     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2895     {
2896 	int foo = ...
2897 	...
2898 	if (regmatch(A)) {
2899 	    next = B;
2900 	    bar = foo;
2901 	    break;
2902 	}
2903 	...;   // do some housekeeping, then ...
2904 	sayNO; // propagate the failure
2905     }
2906 
2907 The topmost backtrack state, pointed to by st, is usually free. If you
2908 want to claim it, populate any ST.foo fields in it with values you wish to
2909 save, then do one of
2910 
2911 	PUSH_STATE_GOTO(resume_state, node);
2912 	PUSH_YES_STATE_GOTO(resume_state, node);
2913 
2914 which sets that backtrack state's resume value to 'resume_state', pushes a
2915 new free entry to the top of the backtrack stack, then goes to 'node'.
2916 On backtracking, the free slot is popped, and the saved state becomes the
2917 new free state. An ST.foo field in this new top state can be temporarily
2918 accessed to retrieve values, but once the main loop is re-entered, it
2919 becomes available for reuse.
2920 
2921 Note that the depth of the backtrack stack constantly increases during the
2922 left-to-right execution of the pattern, rather than going up and down with
2923 the pattern nesting. For example the stack is at its maximum at Z at the
2924 end of the pattern, rather than at X in the following:
2925 
2926     /(((X)+)+)+....(Y)+....Z/
2927 
2928 The only exceptions to this are lookahead/behind assertions and the cut,
2929 (?>A), which pop all the backtrack states associated with A before
2930 continuing.
2931 
2932 Backtrack state structs are allocated in slabs of about 4K in size.
2933 PL_regmatch_state and st always point to the currently active state,
2934 and PL_regmatch_slab points to the slab currently containing
2935 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2936 allocated, and is never freed until interpreter destruction. When the slab
2937 is full, a new one is allocated and chained to the end. At exit from
2938 regmatch(), slabs allocated since entry are freed.
2939 
2940 */
2941 
2942 
2943 #define DEBUG_STATE_pp(pp)				    \
2944     DEBUG_STATE_r({					    \
2945 	DUMP_EXEC_POS(locinput, scan, utf8_target);		    \
2946 	PerlIO_printf(Perl_debug_log,			    \
2947 	    "    %*s"pp" %s%s%s%s%s\n",			    \
2948 	    depth*2, "",				    \
2949 	    PL_reg_name[st->resume_state],                     \
2950 	    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2951 	    ((st==yes_state) ? "Y" : ""),                   \
2952 	    ((st==mark_state) ? "M" : ""),                  \
2953 	    ((st==yes_state||st==mark_state) ? "]" : "")    \
2954 	);                                                  \
2955     });
2956 
2957 
2958 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2959 
2960 #ifdef DEBUGGING
2961 
2962 STATIC void
2963 S_debug_start_match(pTHX_ const REGEXP *prog, const bool utf8_target,
2964     const char *start, const char *end, const char *blurb)
2965 {
2966     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2967 
2968     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2969 
2970     if (!PL_colorset)
2971             reginitcolors();
2972     {
2973         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2974             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2975 
2976         RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
2977             start, end - start, 60);
2978 
2979         PerlIO_printf(Perl_debug_log,
2980             "%s%s REx%s %s against %s\n",
2981 		       PL_colors[4], blurb, PL_colors[5], s0, s1);
2982 
2983         if (utf8_target||utf8_pat)
2984             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2985                 utf8_pat ? "pattern" : "",
2986                 utf8_pat && utf8_target ? " and " : "",
2987                 utf8_target ? "string" : ""
2988             );
2989     }
2990 }
2991 
2992 STATIC void
2993 S_dump_exec_pos(pTHX_ const char *locinput,
2994                       const regnode *scan,
2995                       const char *loc_regeol,
2996                       const char *loc_bostr,
2997                       const char *loc_reg_starttry,
2998                       const bool utf8_target)
2999 {
3000     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
3001     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
3002     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
3003     /* The part of the string before starttry has one color
3004        (pref0_len chars), between starttry and current
3005        position another one (pref_len - pref0_len chars),
3006        after the current position the third one.
3007        We assume that pref0_len <= pref_len, otherwise we
3008        decrease pref0_len.  */
3009     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
3010 	? (5 + taill) - l : locinput - loc_bostr;
3011     int pref0_len;
3012 
3013     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
3014 
3015     while (utf8_target && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
3016 	pref_len++;
3017     pref0_len = pref_len  - (locinput - loc_reg_starttry);
3018     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
3019 	l = ( loc_regeol - locinput > (5 + taill) - pref_len
3020 	      ? (5 + taill) - pref_len : loc_regeol - locinput);
3021     while (utf8_target && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
3022 	l--;
3023     if (pref0_len < 0)
3024 	pref0_len = 0;
3025     if (pref0_len > pref_len)
3026 	pref0_len = pref_len;
3027     {
3028 	const int is_uni = (utf8_target && OP(scan) != CANY) ? 1 : 0;
3029 
3030 	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
3031 	    (locinput - pref_len),pref0_len, 60, 4, 5);
3032 
3033 	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
3034 		    (locinput - pref_len + pref0_len),
3035 		    pref_len - pref0_len, 60, 2, 3);
3036 
3037 	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
3038 		    locinput, loc_regeol - locinput, 10, 0, 1);
3039 
3040 	const STRLEN tlen=len0+len1+len2;
3041 	PerlIO_printf(Perl_debug_log,
3042 		    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
3043 		    (IV)(locinput - loc_bostr),
3044 		    len0, s0,
3045 		    len1, s1,
3046 		    (docolor ? "" : "> <"),
3047 		    len2, s2,
3048 		    (int)(tlen > 19 ? 0 :  19 - tlen),
3049 		    "");
3050     }
3051 }
3052 
3053 #endif
3054 
3055 /* reg_check_named_buff_matched()
3056  * Checks to see if a named buffer has matched. The data array of
3057  * buffer numbers corresponding to the buffer is expected to reside
3058  * in the regexp->data->data array in the slot stored in the ARG() of
3059  * node involved. Note that this routine doesn't actually care about the
3060  * name, that information is not preserved from compilation to execution.
3061  * Returns the index of the leftmost defined buffer with the given name
3062  * or 0 if non of the buffers matched.
3063  */
3064 STATIC I32
3065 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
3066 {
3067     I32 n;
3068     RXi_GET_DECL(rex,rexi);
3069     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
3070     I32 *nums=(I32*)SvPVX(sv_dat);
3071 
3072     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
3073 
3074     for ( n=0; n<SvIVX(sv_dat); n++ ) {
3075         if ((I32)*PL_reglastparen >= nums[n] &&
3076             PL_regoffs[nums[n]].end != -1)
3077         {
3078             return nums[n];
3079         }
3080     }
3081     return 0;
3082 }
3083 
3084 
3085 /* free all slabs above current one  - called during LEAVE_SCOPE */
3086 
3087 STATIC void
3088 S_clear_backtrack_stack(pTHX_ void *p)
3089 {
3090     regmatch_slab *s = PL_regmatch_slab->next;
3091     PERL_UNUSED_ARG(p);
3092 
3093     if (!s)
3094 	return;
3095     PL_regmatch_slab->next = NULL;
3096     while (s) {
3097 	regmatch_slab * const osl = s;
3098 	s = s->next;
3099 	Safefree(osl);
3100     }
3101 }
3102 
3103 
3104 #define SETREX(Re1,Re2) \
3105     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
3106     Re1 = (Re2)
3107 
3108 STATIC I32			/* 0 failure, 1 success */
3109 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
3110 {
3111 #if PERL_VERSION < 9 && !defined(PERL_CORE)
3112     dMY_CXT;
3113 #endif
3114     dVAR;
3115     register const bool utf8_target = PL_reg_match_utf8;
3116     const U32 uniflags = UTF8_ALLOW_DEFAULT;
3117     REGEXP *rex_sv = reginfo->prog;
3118     regexp *rex = (struct regexp *)SvANY(rex_sv);
3119     RXi_GET_DECL(rex,rexi);
3120     I32	oldsave;
3121     /* the current state. This is a cached copy of PL_regmatch_state */
3122     register regmatch_state *st;
3123     /* cache heavy used fields of st in registers */
3124     register regnode *scan;
3125     register regnode *next;
3126     register U32 n = 0;	/* general value; init to avoid compiler warning */
3127     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
3128     register char *locinput = PL_reginput;
3129     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
3130 
3131     bool result = 0;	    /* return value of S_regmatch */
3132     int depth = 0;	    /* depth of backtrack stack */
3133     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
3134     const U32 max_nochange_depth =
3135         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
3136         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
3137     regmatch_state *yes_state = NULL; /* state to pop to on success of
3138 							    subpattern */
3139     /* mark_state piggy backs on the yes_state logic so that when we unwind
3140        the stack on success we can update the mark_state as we go */
3141     regmatch_state *mark_state = NULL; /* last mark state we have seen */
3142     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
3143     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
3144     U32 state_num;
3145     bool no_final = 0;      /* prevent failure from backtracking? */
3146     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
3147     char *startpoint = PL_reginput;
3148     SV *popmark = NULL;     /* are we looking for a mark? */
3149     SV *sv_commit = NULL;   /* last mark name seen in failure */
3150     SV *sv_yes_mark = NULL; /* last mark name we have seen
3151                                during a successful match */
3152     U32 lastopen = 0;       /* last open we saw */
3153     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
3154     SV* const oreplsv = GvSV(PL_replgv);
3155     /* these three flags are set by various ops to signal information to
3156      * the very next op. They have a useful lifetime of exactly one loop
3157      * iteration, and are not preserved or restored by state pushes/pops
3158      */
3159     bool sw = 0;	    /* the condition value in (?(cond)a|b) */
3160     bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
3161     int logical = 0;	    /* the following EVAL is:
3162 				0: (?{...})
3163 				1: (?(?{...})X|Y)
3164 				2: (??{...})
3165 			       or the following IFMATCH/UNLESSM is:
3166 			        false: plain (?=foo)
3167 				true:  used as a condition: (?(?=foo))
3168 			    */
3169 #ifdef DEBUGGING
3170     GET_RE_DEBUG_FLAGS_DECL;
3171 #endif
3172 
3173     PERL_ARGS_ASSERT_REGMATCH;
3174 
3175     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
3176 	    PerlIO_printf(Perl_debug_log,"regmatch start\n");
3177     }));
3178     /* on first ever call to regmatch, allocate first slab */
3179     if (!PL_regmatch_slab) {
3180 	Newx(PL_regmatch_slab, 1, regmatch_slab);
3181 	PL_regmatch_slab->prev = NULL;
3182 	PL_regmatch_slab->next = NULL;
3183 	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
3184     }
3185 
3186     oldsave = PL_savestack_ix;
3187     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
3188     SAVEVPTR(PL_regmatch_slab);
3189     SAVEVPTR(PL_regmatch_state);
3190 
3191     /* grab next free state slot */
3192     st = ++PL_regmatch_state;
3193     if (st >  SLAB_LAST(PL_regmatch_slab))
3194 	st = PL_regmatch_state = S_push_slab(aTHX);
3195 
3196     /* Note that nextchr is a byte even in UTF */
3197     nextchr = UCHARAT(locinput);
3198     scan = prog;
3199     while (scan != NULL) {
3200 
3201         DEBUG_EXECUTE_r( {
3202 	    SV * const prop = sv_newmortal();
3203 	    regnode *rnext=regnext(scan);
3204 	    DUMP_EXEC_POS( locinput, scan, utf8_target );
3205 	    regprop(rex, prop, scan);
3206 
3207 	    PerlIO_printf(Perl_debug_log,
3208 		    "%3"IVdf":%*s%s(%"IVdf")\n",
3209 		    (IV)(scan - rexi->program), depth*2, "",
3210 		    SvPVX_const(prop),
3211 		    (PL_regkind[OP(scan)] == END || !rnext) ?
3212 		        0 : (IV)(rnext - rexi->program));
3213 	});
3214 
3215 	next = scan + NEXT_OFF(scan);
3216 	if (next == scan)
3217 	    next = NULL;
3218 	state_num = OP(scan);
3219 
3220       reenter_switch:
3221 
3222 	assert(PL_reglastparen == &rex->lastparen);
3223 	assert(PL_reglastcloseparen == &rex->lastcloseparen);
3224 	assert(PL_regoffs == rex->offs);
3225 
3226 	switch (state_num) {
3227 	case BOL:
3228 	    if (locinput == PL_bostr)
3229 	    {
3230 		/* reginfo->till = reginfo->bol; */
3231 		break;
3232 	    }
3233 	    sayNO;
3234 	case MBOL:
3235 	    if (locinput == PL_bostr ||
3236 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
3237 	    {
3238 		break;
3239 	    }
3240 	    sayNO;
3241 	case SBOL:
3242 	    if (locinput == PL_bostr)
3243 		break;
3244 	    sayNO;
3245 	case GPOS:
3246 	    if (locinput == reginfo->ganch)
3247 		break;
3248 	    sayNO;
3249 
3250 	case KEEPS:
3251 	    /* update the startpoint */
3252 	    st->u.keeper.val = PL_regoffs[0].start;
3253 	    PL_reginput = locinput;
3254 	    PL_regoffs[0].start = locinput - PL_bostr;
3255 	    PUSH_STATE_GOTO(KEEPS_next, next);
3256 	    /*NOT-REACHED*/
3257 	case KEEPS_next_fail:
3258 	    /* rollback the start point change */
3259 	    PL_regoffs[0].start = st->u.keeper.val;
3260 	    sayNO_SILENT;
3261 	    /*NOT-REACHED*/
3262 	case EOL:
3263 		goto seol;
3264 	case MEOL:
3265 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3266 		sayNO;
3267 	    break;
3268 	case SEOL:
3269 	  seol:
3270 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3271 		sayNO;
3272 	    if (PL_regeol - locinput > 1)
3273 		sayNO;
3274 	    break;
3275 	case EOS:
3276 	    if (PL_regeol != locinput)
3277 		sayNO;
3278 	    break;
3279 	case SANY:
3280 	    if (!nextchr && locinput >= PL_regeol)
3281 		sayNO;
3282  	    if (utf8_target) {
3283 	        locinput += PL_utf8skip[nextchr];
3284 		if (locinput > PL_regeol)
3285  		    sayNO;
3286  		nextchr = UCHARAT(locinput);
3287  	    }
3288  	    else
3289  		nextchr = UCHARAT(++locinput);
3290 	    break;
3291 	case CANY:
3292 	    if (!nextchr && locinput >= PL_regeol)
3293 		sayNO;
3294 	    nextchr = UCHARAT(++locinput);
3295 	    break;
3296 	case REG_ANY:
3297 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3298 		sayNO;
3299 	    if (utf8_target) {
3300 		locinput += PL_utf8skip[nextchr];
3301 		if (locinput > PL_regeol)
3302 		    sayNO;
3303 		nextchr = UCHARAT(locinput);
3304 	    }
3305 	    else
3306 		nextchr = UCHARAT(++locinput);
3307 	    break;
3308 
3309 #undef  ST
3310 #define ST st->u.trie
3311         case TRIEC:
3312             /* In this case the charclass data is available inline so
3313                we can fail fast without a lot of extra overhead.
3314              */
3315             if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3316                 DEBUG_EXECUTE_r(
3317                     PerlIO_printf(Perl_debug_log,
3318                               "%*s  %sfailed to match trie start class...%s\n",
3319                               REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3320                 );
3321                 sayNO_SILENT;
3322                 /* NOTREACHED */
3323             }
3324             /* FALL THROUGH */
3325 	case TRIE:
3326 	    /* the basic plan of execution of the trie is:
3327 	     * At the beginning, run though all the states, and
3328 	     * find the longest-matching word. Also remember the position
3329 	     * of the shortest matching word. For example, this pattern:
3330 	     *    1  2 3 4    5
3331 	     *    ab|a|x|abcd|abc
3332 	     * when matched against the string "abcde", will generate
3333 	     * accept states for all words except 3, with the longest
3334 	     * matching word being 4, and the shortest being 1 (with
3335 	     * the position being after char 1 of the string).
3336 	     *
3337 	     * Then for each matching word, in word order (i.e. 1,2,4,5),
3338 	     * we run the remainder of the pattern; on each try setting
3339 	     * the current position to the character following the word,
3340 	     * returning to try the next word on failure.
3341 	     *
3342 	     * We avoid having to build a list of words at runtime by
3343 	     * using a compile-time structure, wordinfo[].prev, which
3344 	     * gives, for each word, the previous accepting word (if any).
3345 	     * In the case above it would contain the mappings 1->2, 2->0,
3346 	     * 3->0, 4->5, 5->1.  We can use this table to generate, from
3347 	     * the longest word (4 above), a list of all words, by
3348 	     * following the list of prev pointers; this gives us the
3349 	     * unordered list 4,5,1,2. Then given the current word we have
3350 	     * just tried, we can go through the list and find the
3351 	     * next-biggest word to try (so if we just failed on word 2,
3352 	     * the next in the list is 4).
3353 	     *
3354 	     * Since at runtime we don't record the matching position in
3355 	     * the string for each word, we have to work that out for
3356 	     * each word we're about to process. The wordinfo table holds
3357 	     * the character length of each word; given that we recorded
3358 	     * at the start: the position of the shortest word and its
3359 	     * length in chars, we just need to move the pointer the
3360 	     * difference between the two char lengths. Depending on
3361 	     * Unicode status and folding, that's cheap or expensive.
3362 	     *
3363 	     * This algorithm is optimised for the case where are only a
3364 	     * small number of accept states, i.e. 0,1, or maybe 2.
3365 	     * With lots of accepts states, and having to try all of them,
3366 	     * it becomes quadratic on number of accept states to find all
3367 	     * the next words.
3368 	     */
3369 
3370 	    {
3371                 /* what type of TRIE am I? (utf8 makes this contextual) */
3372                 DECL_TRIE_TYPE(scan);
3373 
3374                 /* what trie are we using right now */
3375 		reg_trie_data * const trie
3376         	    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3377 		HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3378                 U32 state = trie->startstate;
3379 
3380                 if (trie->bitmap && !TRIE_BITMAP_TEST(trie,*locinput) ) {
3381         	    if (trie->states[ state ].wordnum) {
3382         	         DEBUG_EXECUTE_r(
3383                             PerlIO_printf(Perl_debug_log,
3384                         	          "%*s  %smatched empty string...%s\n",
3385                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3386                         );
3387 			if (!trie->jump)
3388 			    break;
3389         	    } else {
3390         	        DEBUG_EXECUTE_r(
3391                             PerlIO_printf(Perl_debug_log,
3392                         	          "%*s  %sfailed to match trie start class...%s\n",
3393                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3394                         );
3395         	        sayNO_SILENT;
3396         	   }
3397                 }
3398 
3399             {
3400 		U8 *uc = ( U8* )locinput;
3401 
3402 		STRLEN len = 0;
3403 		STRLEN foldlen = 0;
3404 		U8 *uscan = (U8*)NULL;
3405 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3406 		U32 charcount = 0; /* how many input chars we have matched */
3407 		U32 accepted = 0; /* have we seen any accepting states? */
3408 
3409 		ST.B = next;
3410 		ST.jump = trie->jump;
3411 		ST.me = scan;
3412 		ST.firstpos = NULL;
3413 		ST.longfold = FALSE; /* char longer if folded => it's harder */
3414 		ST.nextword = 0;
3415 
3416 		/* fully traverse the TRIE; note the position of the
3417 		   shortest accept state and the wordnum of the longest
3418 		   accept state */
3419 
3420 		while ( state && uc <= (U8*)PL_regeol ) {
3421                     U32 base = trie->states[ state ].trans.base;
3422                     UV uvc = 0;
3423                     U16 charid = 0;
3424 		    U16 wordnum;
3425                     wordnum = trie->states[ state ].wordnum;
3426 
3427 		    if (wordnum) { /* it's an accept state */
3428 			if (!accepted) {
3429 			    accepted = 1;
3430 			    /* record first match position */
3431 			    if (ST.longfold) {
3432 				ST.firstpos = (U8*)locinput;
3433 				ST.firstchars = 0;
3434 			    }
3435 			    else {
3436 				ST.firstpos = uc;
3437 				ST.firstchars = charcount;
3438 			    }
3439 			}
3440 			if (!ST.nextword || wordnum < ST.nextword)
3441 			    ST.nextword = wordnum;
3442 			ST.topword = wordnum;
3443 		    }
3444 
3445 		    DEBUG_TRIE_EXECUTE_r({
3446 		                DUMP_EXEC_POS( (char *)uc, scan, utf8_target );
3447 			        PerlIO_printf( Perl_debug_log,
3448 			            "%*s  %sState: %4"UVxf" Accepted: %c ",
3449 			            2+depth * 2, "", PL_colors[4],
3450 			            (UV)state, (accepted ? 'Y' : 'N'));
3451 		    });
3452 
3453 		    /* read a char and goto next state */
3454 		    if ( base ) {
3455 			I32 offset;
3456 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3457 					     uscan, len, uvc, charid, foldlen,
3458 					     foldbuf, uniflags);
3459 			charcount++;
3460 			if (foldlen>0)
3461 			    ST.longfold = TRUE;
3462 			if (charid &&
3463 			     ( ((offset =
3464 			      base + charid - 1 - trie->uniquecharcount)) >= 0)
3465 
3466 			     && ((U32)offset < trie->lasttrans)
3467 			     && trie->trans[offset].check == state)
3468 			{
3469 			    state = trie->trans[offset].next;
3470 			}
3471 			else {
3472 			    state = 0;
3473 			}
3474 			uc += len;
3475 
3476 		    }
3477 		    else {
3478 			state = 0;
3479 		    }
3480 		    DEBUG_TRIE_EXECUTE_r(
3481 		        PerlIO_printf( Perl_debug_log,
3482 		            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3483 		            charid, uvc, (UV)state, PL_colors[5] );
3484 		    );
3485 		}
3486 		if (!accepted)
3487 		   sayNO;
3488 
3489 		/* calculate total number of accept states */
3490 		{
3491 		    U16 w = ST.topword;
3492 		    accepted = 0;
3493 		    while (w) {
3494 			w = trie->wordinfo[w].prev;
3495 			accepted++;
3496 		    }
3497 		    ST.accepted = accepted;
3498 		}
3499 
3500 		DEBUG_EXECUTE_r(
3501 		    PerlIO_printf( Perl_debug_log,
3502 			"%*s  %sgot %"IVdf" possible matches%s\n",
3503 			REPORT_CODE_OFF + depth * 2, "",
3504 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3505 		);
3506 		goto trie_first_try; /* jump into the fail handler */
3507 	    }}
3508 	    /* NOTREACHED */
3509 
3510 	case TRIE_next_fail: /* we failed - try next alternative */
3511             if ( ST.jump) {
3512                 REGCP_UNWIND(ST.cp);
3513 	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
3514 		    PL_regoffs[n].end = -1;
3515 	        *PL_reglastparen = n;
3516 	    }
3517 	    if (!--ST.accepted) {
3518 	        DEBUG_EXECUTE_r({
3519 		    PerlIO_printf( Perl_debug_log,
3520 			"%*s  %sTRIE failed...%s\n",
3521 			REPORT_CODE_OFF+depth*2, "",
3522 			PL_colors[4],
3523 			PL_colors[5] );
3524 		});
3525 		sayNO_SILENT;
3526 	    }
3527 	    {
3528 		/* Find next-highest word to process.  Note that this code
3529 		 * is O(N^2) per trie run (O(N) per branch), so keep tight */
3530 		register U16 min = 0;
3531 		register U16 word;
3532 		register U16 const nextword = ST.nextword;
3533 		register reg_trie_wordinfo * const wordinfo
3534 		    = ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
3535 		for (word=ST.topword; word; word=wordinfo[word].prev) {
3536 		    if (word > nextword && (!min || word < min))
3537 			min = word;
3538 		}
3539 		ST.nextword = min;
3540 	    }
3541 
3542           trie_first_try:
3543             if (do_cutgroup) {
3544                 do_cutgroup = 0;
3545                 no_final = 0;
3546             }
3547 
3548             if ( ST.jump) {
3549                 ST.lastparen = *PL_reglastparen;
3550 	        REGCP_SET(ST.cp);
3551             }
3552 
3553 	    /* find start char of end of current word */
3554 	    {
3555 		U32 chars; /* how many chars to skip */
3556 		U8 *uc = ST.firstpos;
3557 		reg_trie_data * const trie
3558 		    = (reg_trie_data*)rexi->data->data[ARG(ST.me)];
3559 
3560 		assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
3561 			    >=  ST.firstchars);
3562 		chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
3563 			    - ST.firstchars;
3564 
3565 		if (ST.longfold) {
3566 		    /* the hard option - fold each char in turn and find
3567 		     * its folded length (which may be different */
3568 		    U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
3569 		    STRLEN foldlen;
3570 		    STRLEN len;
3571 		    UV uvc;
3572 		    U8 *uscan;
3573 
3574 		    while (chars) {
3575 			if (utf8_target) {
3576 			    uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
3577 						    uniflags);
3578 			    uc += len;
3579 			}
3580 			else {
3581 			    uvc = *uc;
3582 			    uc++;
3583 			}
3584 			uvc = to_uni_fold(uvc, foldbuf, &foldlen);
3585 			uscan = foldbuf;
3586 			while (foldlen) {
3587 			    if (!--chars)
3588 				break;
3589 			    uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
3590 					    uniflags);
3591 			    uscan += len;
3592 			    foldlen -= len;
3593 			}
3594 		    }
3595 		}
3596 		else {
3597 		    if (utf8_target)
3598 			while (chars--)
3599 			    uc += UTF8SKIP(uc);
3600 		    else
3601 			uc += chars;
3602 		}
3603 		PL_reginput = (char *)uc;
3604 	    }
3605 
3606 	    scan = (ST.jump && ST.jump[ST.nextword])
3607 			? ST.me + ST.jump[ST.nextword]
3608 			: ST.B;
3609 
3610 	    DEBUG_EXECUTE_r({
3611 		PerlIO_printf( Perl_debug_log,
3612 		    "%*s  %sTRIE matched word #%d, continuing%s\n",
3613 		    REPORT_CODE_OFF+depth*2, "",
3614 		    PL_colors[4],
3615 		    ST.nextword,
3616 		    PL_colors[5]
3617 		    );
3618 	    });
3619 
3620 	    if (ST.accepted > 1 || has_cutgroup) {
3621 		PUSH_STATE_GOTO(TRIE_next, scan);
3622 		/* NOTREACHED */
3623 	    }
3624 	    /* only one choice left - just continue */
3625 	    DEBUG_EXECUTE_r({
3626 		AV *const trie_words
3627 		    = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3628 		SV ** const tmp = av_fetch( trie_words,
3629 		    ST.nextword-1, 0 );
3630 		SV *sv= tmp ? sv_newmortal() : NULL;
3631 
3632 		PerlIO_printf( Perl_debug_log,
3633 		    "%*s  %sonly one match left, short-circuiting: #%d <%s>%s\n",
3634 		    REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3635 		    ST.nextword,
3636 		    tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3637 			    PL_colors[0], PL_colors[1],
3638 			    (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)|PERL_PV_ESCAPE_NONASCII
3639 			)
3640 		    : "not compiled under -Dr",
3641 		    PL_colors[5] );
3642 	    });
3643 
3644 	    locinput = PL_reginput;
3645 	    nextchr = UCHARAT(locinput);
3646 	    continue; /* execute rest of RE */
3647 	    /* NOTREACHED */
3648 #undef  ST
3649 
3650 	case EXACT: {
3651 	    char *s = STRING(scan);
3652 	    ln = STR_LEN(scan);
3653 	    if (utf8_target != UTF_PATTERN) {
3654 		/* The target and the pattern have differing utf8ness. */
3655 		char *l = locinput;
3656 		const char * const e = s + ln;
3657 
3658 		if (utf8_target) {
3659 		    /* The target is utf8, the pattern is not utf8. */
3660 		    while (s < e) {
3661 			STRLEN ulen;
3662 			if (l >= PL_regeol)
3663 			     sayNO;
3664 			if (NATIVE_TO_UNI(*(U8*)s) !=
3665 			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3666 					    uniflags))
3667 			     sayNO;
3668 			l += ulen;
3669 			s ++;
3670 		    }
3671 		}
3672 		else {
3673 		    /* The target is not utf8, the pattern is utf8. */
3674 		    while (s < e) {
3675 			STRLEN ulen;
3676 			if (l >= PL_regeol)
3677 			    sayNO;
3678 			if (NATIVE_TO_UNI(*((U8*)l)) !=
3679 			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3680 					   uniflags))
3681 			    sayNO;
3682 			s += ulen;
3683 			l ++;
3684 		    }
3685 		}
3686 		locinput = l;
3687 		nextchr = UCHARAT(locinput);
3688 		break;
3689 	    }
3690 	    /* The target and the pattern have the same utf8ness. */
3691 	    /* Inline the first character, for speed. */
3692 	    if (UCHARAT(s) != nextchr)
3693 		sayNO;
3694 	    if (PL_regeol - locinput < ln)
3695 		sayNO;
3696 	    if (ln > 1 && memNE(s, locinput, ln))
3697 		sayNO;
3698 	    locinput += ln;
3699 	    nextchr = UCHARAT(locinput);
3700 	    break;
3701 	    }
3702 	case EXACTFL: {
3703 	    re_fold_t folder;
3704 	    const U8 * fold_array;
3705 	    const char * s;
3706 	    U32 fold_utf8_flags;
3707 
3708 	    PL_reg_flags |= RF_tainted;
3709 	    folder = foldEQ_locale;
3710 	    fold_array = PL_fold_locale;
3711 	    fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
3712 	    goto do_exactf;
3713 
3714 	case EXACTFU_SS:
3715 	case EXACTFU_TRICKYFOLD:
3716 	case EXACTFU:
3717 	    folder = foldEQ_latin1;
3718 	    fold_array = PL_fold_latin1;
3719 	    fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
3720 	    goto do_exactf;
3721 
3722 	case EXACTFA:
3723 	    folder = foldEQ_latin1;
3724 	    fold_array = PL_fold_latin1;
3725 	    fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
3726 	    goto do_exactf;
3727 
3728 	case EXACTF:
3729 	    folder = foldEQ;
3730 	    fold_array = PL_fold;
3731 	    fold_utf8_flags = 0;
3732 
3733 	  do_exactf:
3734 	    s = STRING(scan);
3735 	    ln = STR_LEN(scan);
3736 
3737 	    if (utf8_target || UTF_PATTERN || state_num == EXACTFU_SS) {
3738 	      /* Either target or the pattern are utf8, or has the issue where
3739 	       * the fold lengths may differ. */
3740 		const char * const l = locinput;
3741 		char *e = PL_regeol;
3742 
3743 		if (! foldEQ_utf8_flags(s, 0,  ln, cBOOL(UTF_PATTERN),
3744 			                l, &e, 0,  utf8_target, fold_utf8_flags))
3745 		{
3746 		    sayNO;
3747 		}
3748 		locinput = e;
3749 		nextchr = UCHARAT(locinput);
3750 		break;
3751 	    }
3752 
3753 	    /* Neither the target nor the pattern are utf8 */
3754 	    if (UCHARAT(s) != nextchr &&
3755 		UCHARAT(s) != fold_array[nextchr])
3756 	    {
3757 		sayNO;
3758 	    }
3759 	    if (PL_regeol - locinput < ln)
3760 		sayNO;
3761 	    if (ln > 1 && ! folder(s, locinput, ln))
3762 		sayNO;
3763 	    locinput += ln;
3764 	    nextchr = UCHARAT(locinput);
3765 	    break;
3766 	}
3767 
3768 	/* XXX Could improve efficiency by separating these all out using a
3769 	 * macro or in-line function.  At that point regcomp.c would no longer
3770 	 * have to set the FLAGS fields of these */
3771 	case BOUNDL:
3772 	case NBOUNDL:
3773 	    PL_reg_flags |= RF_tainted;
3774 	    /* FALL THROUGH */
3775 	case BOUND:
3776 	case BOUNDU:
3777 	case BOUNDA:
3778 	case NBOUND:
3779 	case NBOUNDU:
3780 	case NBOUNDA:
3781 	    /* was last char in word? */
3782 	    if (utf8_target
3783 		&& FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
3784 		&& FLAGS(scan) != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
3785 	    {
3786 		if (locinput == PL_bostr)
3787 		    ln = '\n';
3788 		else {
3789 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3790 
3791 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3792 		}
3793 		if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
3794 		    ln = isALNUM_uni(ln);
3795 		    LOAD_UTF8_CHARCLASS_ALNUM();
3796 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
3797 		}
3798 		else {
3799 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3800 		    n = isALNUM_LC_utf8((U8*)locinput);
3801 		}
3802 	    }
3803 	    else {
3804 
3805 		/* Here the string isn't utf8, or is utf8 and only ascii
3806 		 * characters are to match \w.  In the latter case looking at
3807 		 * the byte just prior to the current one may be just the final
3808 		 * byte of a multi-byte character.  This is ok.  There are two
3809 		 * cases:
3810 		 * 1) it is a single byte character, and then the test is doing
3811 		 *	just what it's supposed to.
3812 		 * 2) it is a multi-byte character, in which case the final
3813 		 *	byte is never mistakable for ASCII, and so the test
3814 		 *	will say it is not a word character, which is the
3815 		 *	correct answer. */
3816 		ln = (locinput != PL_bostr) ?
3817 		    UCHARAT(locinput - 1) : '\n';
3818 		switch (FLAGS(scan)) {
3819 		    case REGEX_UNICODE_CHARSET:
3820 			ln = isWORDCHAR_L1(ln);
3821 			n = isWORDCHAR_L1(nextchr);
3822 			break;
3823 		    case REGEX_LOCALE_CHARSET:
3824 			ln = isALNUM_LC(ln);
3825 			n = isALNUM_LC(nextchr);
3826 			break;
3827 		    case REGEX_DEPENDS_CHARSET:
3828 			ln = isALNUM(ln);
3829 			n = isALNUM(nextchr);
3830 			break;
3831 		    case REGEX_ASCII_RESTRICTED_CHARSET:
3832 		    case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
3833 			ln = isWORDCHAR_A(ln);
3834 			n = isWORDCHAR_A(nextchr);
3835 			break;
3836 		    default:
3837 			Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
3838 			break;
3839 		}
3840 	    }
3841 	    /* Note requires that all BOUNDs be lower than all NBOUNDs in
3842 	     * regcomp.sym */
3843 	    if (((!ln) == (!n)) == (OP(scan) < NBOUND))
3844 		    sayNO;
3845 	    break;
3846 	case ANYOFV:
3847 	case ANYOF:
3848 	    if (utf8_target || state_num == ANYOFV) {
3849 	        STRLEN inclasslen = PL_regeol - locinput;
3850 		if (locinput >= PL_regeol)
3851 		    sayNO;
3852 
3853 	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
3854 		    sayNO;
3855 		locinput += inclasslen;
3856 		nextchr = UCHARAT(locinput);
3857 		break;
3858 	    }
3859 	    else {
3860 		if (nextchr < 0)
3861 		    nextchr = UCHARAT(locinput);
3862 		if (!nextchr && locinput >= PL_regeol)
3863 		    sayNO;
3864 		if (!REGINCLASS(rex, scan, (U8*)locinput))
3865 		    sayNO;
3866 		nextchr = UCHARAT(++locinput);
3867 		break;
3868 	    }
3869 	    break;
3870 	/* Special char classes - The defines start on line 129 or so */
3871         CCC_TRY_U(ALNUM,  NALNUM,  isWORDCHAR,
3872 		  ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
3873 		  ALNUMU, NALNUMU, isWORDCHAR_L1,
3874 		  ALNUMA, NALNUMA, isWORDCHAR_A,
3875 		  alnum, "a");
3876 
3877         CCC_TRY_U(SPACE,  NSPACE,  isSPACE,
3878 		  SPACEL, NSPACEL, isSPACE_LC, isSPACE_LC_utf8,
3879 		  SPACEU, NSPACEU, isSPACE_L1,
3880 		  SPACEA, NSPACEA, isSPACE_A,
3881 		  space, " ");
3882 
3883         CCC_TRY(DIGIT,  NDIGIT,  isDIGIT,
3884 		DIGITL, NDIGITL, isDIGIT_LC, isDIGIT_LC_utf8,
3885 		DIGITA, NDIGITA, isDIGIT_A,
3886 		digit, "0");
3887 
3888 	case CLUMP: /* Match \X: logical Unicode character.  This is defined as
3889 		       a Unicode extended Grapheme Cluster */
3890 	    /* From http://www.unicode.org/reports/tr29 (5.2 version).  An
3891 	      extended Grapheme Cluster is:
3892 
3893 	       CR LF
3894 	       | Prepend* Begin Extend*
3895 	       | .
3896 
3897 	       Begin is (Hangul-syllable | ! Control)
3898 	       Extend is (Grapheme_Extend | Spacing_Mark)
3899 	       Control is [ GCB_Control CR LF ]
3900 
3901 	       The discussion below shows how the code for CLUMP is derived
3902 	       from this regex.  Note that most of these concepts are from
3903 	       property values of the Grapheme Cluster Boundary (GCB) property.
3904 	       No code point can have multiple property values for a given
3905 	       property.  Thus a code point in Prepend can't be in Control, but
3906 	       it must be in !Control.  This is why Control above includes
3907 	       GCB_Control plus CR plus LF.  The latter two are used in the GCB
3908 	       property separately, and so can't be in GCB_Control, even though
3909 	       they logically are controls.  Control is not the same as gc=cc,
3910 	       but includes format and other characters as well.
3911 
3912 	       The Unicode definition of Hangul-syllable is:
3913 		   L+
3914 		   | (L* ( ( V | LV ) V* | LVT ) T*)
3915 		   | T+
3916 		  )
3917 	       Each of these is a value for the GCB property, and hence must be
3918 	       disjoint, so the order they are tested is immaterial, so the
3919 	       above can safely be changed to
3920 		   T+
3921 		   | L+
3922 		   | (L* ( LVT | ( V | LV ) V*) T*)
3923 
3924 	       The last two terms can be combined like this:
3925 		   L* ( L
3926 		        | (( LVT | ( V | LV ) V*) T*))
3927 
3928 	       And refactored into this:
3929 		   L* (L | LVT T* | V  V* T* | LV  V* T*)
3930 
3931 	       That means that if we have seen any L's at all we can quit
3932 	       there, but if the next character is an LVT, a V, or an LV we
3933 	       should keep going.
3934 
3935 	       There is a subtlety with Prepend* which showed up in testing.
3936 	       Note that the Begin, and only the Begin is required in:
3937 	        | Prepend* Begin Extend*
3938 	       Also, Begin contains '! Control'.  A Prepend must be a
3939 	       '!  Control', which means it must also be a Begin.  What it
3940 	       comes down to is that if we match Prepend* and then find no
3941 	       suitable Begin afterwards, that if we backtrack the last
3942 	       Prepend, that one will be a suitable Begin.
3943 	    */
3944 
3945 	    if (locinput >= PL_regeol)
3946 		sayNO;
3947 	    if  (! utf8_target) {
3948 
3949 		/* Match either CR LF  or '.', as all the other possibilities
3950 		 * require utf8 */
3951 		locinput++;	    /* Match the . or CR */
3952 		if (nextchr == '\r' /* And if it was CR, and the next is LF,
3953 				       match the LF */
3954 		    && locinput < PL_regeol
3955 		    && UCHARAT(locinput) == '\n') locinput++;
3956 	    }
3957 	    else {
3958 
3959 		/* Utf8: See if is ( CR LF ); already know that locinput <
3960 		 * PL_regeol, so locinput+1 is in bounds */
3961 		if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
3962 		    locinput += 2;
3963 		}
3964 		else {
3965 		    /* In case have to backtrack to beginning, then match '.' */
3966 		    char *starting = locinput;
3967 
3968 		    /* In case have to backtrack the last prepend */
3969 		    char *previous_prepend = 0;
3970 
3971 		    LOAD_UTF8_CHARCLASS_GCB();
3972 
3973 		    /* Match (prepend)* */
3974 		    while (locinput < PL_regeol
3975 			   && swash_fetch(PL_utf8_X_prepend,
3976 					  (U8*)locinput, utf8_target))
3977 		    {
3978 			previous_prepend = locinput;
3979 			locinput += UTF8SKIP(locinput);
3980 		    }
3981 
3982 		    /* As noted above, if we matched a prepend character, but
3983 		     * the next thing won't match, back off the last prepend we
3984 		     * matched, as it is guaranteed to match the begin */
3985 		    if (previous_prepend
3986 			&& (locinput >=  PL_regeol
3987 			    || ! swash_fetch(PL_utf8_X_begin,
3988 					     (U8*)locinput, utf8_target)))
3989 		    {
3990 			locinput = previous_prepend;
3991 		    }
3992 
3993 		    /* Note that here we know PL_regeol > locinput, as we
3994 		     * tested that upon input to this switch case, and if we
3995 		     * moved locinput forward, we tested the result just above
3996 		     * and it either passed, or we backed off so that it will
3997 		     * now pass */
3998 		    if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
3999 
4000 			/* Here did not match the required 'Begin' in the
4001 			 * second term.  So just match the very first
4002 			 * character, the '.' of the final term of the regex */
4003 			locinput = starting + UTF8SKIP(starting);
4004 		    } else {
4005 
4006 			/* Here is the beginning of a character that can have
4007 			 * an extender.  It is either a hangul syllable, or a
4008 			 * non-control */
4009 			if (swash_fetch(PL_utf8_X_non_hangul,
4010 					(U8*)locinput, utf8_target))
4011 			{
4012 
4013 			    /* Here not a Hangul syllable, must be a
4014 			     * ('!  * Control') */
4015 			    locinput += UTF8SKIP(locinput);
4016 			} else {
4017 
4018 			    /* Here is a Hangul syllable.  It can be composed
4019 			     * of several individual characters.  One
4020 			     * possibility is T+ */
4021 			    if (swash_fetch(PL_utf8_X_T,
4022 					    (U8*)locinput, utf8_target))
4023 			    {
4024 				while (locinput < PL_regeol
4025 					&& swash_fetch(PL_utf8_X_T,
4026 							(U8*)locinput, utf8_target))
4027 				{
4028 				    locinput += UTF8SKIP(locinput);
4029 				}
4030 			    } else {
4031 
4032 				/* Here, not T+, but is a Hangul.  That means
4033 				 * it is one of the others: L, LV, LVT or V,
4034 				 * and matches:
4035 				 * L* (L | LVT T* | V  V* T* | LV  V* T*) */
4036 
4037 				/* Match L*           */
4038 				while (locinput < PL_regeol
4039 					&& swash_fetch(PL_utf8_X_L,
4040 							(U8*)locinput, utf8_target))
4041 				{
4042 				    locinput += UTF8SKIP(locinput);
4043 				}
4044 
4045 				/* Here, have exhausted L*.  If the next
4046 				 * character is not an LV, LVT nor V, it means
4047 				 * we had to have at least one L, so matches L+
4048 				 * in the original equation, we have a complete
4049 				 * hangul syllable.  Are done. */
4050 
4051 				if (locinput < PL_regeol
4052 				    && swash_fetch(PL_utf8_X_LV_LVT_V,
4053 						    (U8*)locinput, utf8_target))
4054 				{
4055 
4056 				    /* Otherwise keep going.  Must be LV, LVT
4057 				     * or V.  See if LVT */
4058 				    if (swash_fetch(PL_utf8_X_LVT,
4059 						    (U8*)locinput, utf8_target))
4060 				    {
4061 					locinput += UTF8SKIP(locinput);
4062 				    } else {
4063 
4064 					/* Must be  V or LV.  Take it, then
4065 					 * match V*     */
4066 					locinput += UTF8SKIP(locinput);
4067 					while (locinput < PL_regeol
4068 						&& swash_fetch(PL_utf8_X_V,
4069 							 (U8*)locinput, utf8_target))
4070 					{
4071 					    locinput += UTF8SKIP(locinput);
4072 					}
4073 				    }
4074 
4075 				    /* And any of LV, LVT, or V can be followed
4076 				     * by T*            */
4077 				    while (locinput < PL_regeol
4078 					   && swash_fetch(PL_utf8_X_T,
4079 							   (U8*)locinput,
4080 							   utf8_target))
4081 				    {
4082 					locinput += UTF8SKIP(locinput);
4083 				    }
4084 				}
4085 			    }
4086 			}
4087 
4088 			/* Match any extender */
4089 			while (locinput < PL_regeol
4090 				&& swash_fetch(PL_utf8_X_extend,
4091 						(U8*)locinput, utf8_target))
4092 			{
4093 			    locinput += UTF8SKIP(locinput);
4094 			}
4095 		    }
4096 		}
4097 		if (locinput > PL_regeol) sayNO;
4098 	    }
4099 	    nextchr = UCHARAT(locinput);
4100 	    break;
4101 
4102 	case NREFFL:
4103 	{   /* The capture buffer cases.  The ones beginning with N for the
4104 	       named buffers just convert to the equivalent numbered and
4105 	       pretend they were called as the corresponding numbered buffer
4106 	       op.  */
4107 	    /* don't initialize these in the declaration, it makes C++
4108 	       unhappy */
4109 	    char *s;
4110 	    char type;
4111 	    re_fold_t folder;
4112 	    const U8 *fold_array;
4113 	    UV utf8_fold_flags;
4114 
4115 	    PL_reg_flags |= RF_tainted;
4116 	    folder = foldEQ_locale;
4117 	    fold_array = PL_fold_locale;
4118 	    type = REFFL;
4119 	    utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
4120 	    goto do_nref;
4121 
4122 	case NREFFA:
4123 	    folder = foldEQ_latin1;
4124 	    fold_array = PL_fold_latin1;
4125 	    type = REFFA;
4126 	    utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
4127 	    goto do_nref;
4128 
4129 	case NREFFU:
4130 	    folder = foldEQ_latin1;
4131 	    fold_array = PL_fold_latin1;
4132 	    type = REFFU;
4133 	    utf8_fold_flags = 0;
4134 	    goto do_nref;
4135 
4136 	case NREFF:
4137 	    folder = foldEQ;
4138 	    fold_array = PL_fold;
4139 	    type = REFF;
4140 	    utf8_fold_flags = 0;
4141 	    goto do_nref;
4142 
4143 	case NREF:
4144 	    type = REF;
4145 	    folder = NULL;
4146 	    fold_array = NULL;
4147 	    utf8_fold_flags = 0;
4148 	  do_nref:
4149 
4150 	    /* For the named back references, find the corresponding buffer
4151 	     * number */
4152 	    n = reg_check_named_buff_matched(rex,scan);
4153 
4154             if ( ! n ) {
4155                 sayNO;
4156 	    }
4157 	    goto do_nref_ref_common;
4158 
4159 	case REFFL:
4160 	    PL_reg_flags |= RF_tainted;
4161 	    folder = foldEQ_locale;
4162 	    fold_array = PL_fold_locale;
4163 	    utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
4164 	    goto do_ref;
4165 
4166 	case REFFA:
4167 	    folder = foldEQ_latin1;
4168 	    fold_array = PL_fold_latin1;
4169 	    utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
4170 	    goto do_ref;
4171 
4172 	case REFFU:
4173 	    folder = foldEQ_latin1;
4174 	    fold_array = PL_fold_latin1;
4175 	    utf8_fold_flags = 0;
4176 	    goto do_ref;
4177 
4178 	case REFF:
4179 	    folder = foldEQ;
4180 	    fold_array = PL_fold;
4181 	    utf8_fold_flags = 0;
4182 	    goto do_ref;
4183 
4184         case REF:
4185 	    folder = NULL;
4186 	    fold_array = NULL;
4187 	    utf8_fold_flags = 0;
4188 
4189 	  do_ref:
4190 	    type = OP(scan);
4191 	    n = ARG(scan);  /* which paren pair */
4192 
4193 	  do_nref_ref_common:
4194 	    ln = PL_regoffs[n].start;
4195 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
4196 	    if (*PL_reglastparen < n || ln == -1)
4197 		sayNO;			/* Do not match unless seen CLOSEn. */
4198 	    if (ln == PL_regoffs[n].end)
4199 		break;
4200 
4201 	    s = PL_bostr + ln;
4202 	    if (type != REF	/* REF can do byte comparison */
4203 		&& (utf8_target || type == REFFU))
4204 	    { /* XXX handle REFFL better */
4205 		char * limit = PL_regeol;
4206 
4207 		/* This call case insensitively compares the entire buffer
4208 		    * at s, with the current input starting at locinput, but
4209 		    * not going off the end given by PL_regeol, and returns in
4210 		    * limit upon success, how much of the current input was
4211 		    * matched */
4212 		if (! foldEQ_utf8_flags(s, NULL, PL_regoffs[n].end - ln, utf8_target,
4213 				    locinput, &limit, 0, utf8_target, utf8_fold_flags))
4214 		{
4215 		    sayNO;
4216 		}
4217 		locinput = limit;
4218 		nextchr = UCHARAT(locinput);
4219 		break;
4220 	    }
4221 
4222 	    /* Not utf8:  Inline the first character, for speed. */
4223 	    if (UCHARAT(s) != nextchr &&
4224 		(type == REF ||
4225 		 UCHARAT(s) != fold_array[nextchr]))
4226 		sayNO;
4227 	    ln = PL_regoffs[n].end - ln;
4228 	    if (locinput + ln > PL_regeol)
4229 		sayNO;
4230 	    if (ln > 1 && (type == REF
4231 			   ? memNE(s, locinput, ln)
4232 			   : ! folder(s, locinput, ln)))
4233 		sayNO;
4234 	    locinput += ln;
4235 	    nextchr = UCHARAT(locinput);
4236 	    break;
4237 	}
4238 	case NOTHING:
4239 	case TAIL:
4240 	    break;
4241 	case BACK:
4242 	    break;
4243 
4244 #undef  ST
4245 #define ST st->u.eval
4246 	{
4247 	    SV *ret;
4248 	    REGEXP *re_sv;
4249             regexp *re;
4250             regexp_internal *rei;
4251             regnode *startpoint;
4252 
4253 	case GOSTART:
4254 	case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
4255 	    if (cur_eval && cur_eval->locinput==locinput) {
4256                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
4257                     Perl_croak(aTHX_ "Infinite recursion in regex");
4258                 if ( ++nochange_depth > max_nochange_depth )
4259                     Perl_croak(aTHX_
4260                         "Pattern subroutine nesting without pos change"
4261                         " exceeded limit in regex");
4262             } else {
4263                 nochange_depth = 0;
4264             }
4265 	    re_sv = rex_sv;
4266             re = rex;
4267             rei = rexi;
4268             (void)ReREFCNT_inc(rex_sv);
4269             if (OP(scan)==GOSUB) {
4270                 startpoint = scan + ARG2L(scan);
4271                 ST.close_paren = ARG(scan);
4272             } else {
4273                 startpoint = rei->program+1;
4274                 ST.close_paren = 0;
4275             }
4276             goto eval_recurse_doit;
4277             /* NOTREACHED */
4278         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
4279             if (cur_eval && cur_eval->locinput==locinput) {
4280 		if ( ++nochange_depth > max_nochange_depth )
4281                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
4282             } else {
4283                 nochange_depth = 0;
4284             }
4285 	    {
4286 		/* execute the code in the {...} */
4287 		dSP;
4288 		SV ** const before = SP;
4289 		OP_4tree * const oop = PL_op;
4290 		COP * const ocurcop = PL_curcop;
4291 		PAD *old_comppad;
4292 		char *saved_regeol = PL_regeol;
4293 		struct re_save_state saved_state;
4294 
4295 		/* To not corrupt the existing regex state while executing the
4296 		 * eval we would normally put it on the save stack, like with
4297 		 * save_re_context. However, re-evals have a weird scoping so we
4298 		 * can't just add ENTER/LEAVE here. With that, things like
4299 		 *
4300 		 *    (?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a})
4301 		 *
4302 		 * would break, as they expect the localisation to be unwound
4303 		 * only when the re-engine backtracks through the bit that
4304 		 * localised it.
4305 		 *
4306 		 * What we do instead is just saving the state in a local c
4307 		 * variable.
4308 		 */
4309 		Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
4310 
4311 		n = ARG(scan);
4312 		PL_op = (OP_4tree*)rexi->data->data[n];
4313 		DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
4314 		    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
4315 		/* wrap the call in two SAVECOMPPADs. This ensures that
4316 		 * when the save stack is eventually unwound, all the
4317 		 * accumulated SAVEt_CLEARSV's will be processed with
4318 		 * interspersed SAVEt_COMPPAD's to ensure that lexicals
4319 		 * are cleared in the right pad */
4320 		SAVECOMPPAD();
4321 		PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
4322 		PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
4323 
4324                 if (sv_yes_mark) {
4325                     SV *sv_mrk = get_sv("REGMARK", 1);
4326                     sv_setsv(sv_mrk, sv_yes_mark);
4327                 }
4328 
4329 		CALLRUNOPS(aTHX);			/* Scalar context. */
4330 		SPAGAIN;
4331 		if (SP == before)
4332 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
4333 		else {
4334 		    ret = POPs;
4335 		    PUTBACK;
4336 		}
4337 
4338 		Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
4339 
4340 		PL_op = oop;
4341 		SAVECOMPPAD();
4342 		PAD_RESTORE_LOCAL(old_comppad);
4343 		PL_curcop = ocurcop;
4344 		PL_regeol = saved_regeol;
4345 		if (!logical) {
4346 		    /* /(?{...})/ */
4347 		    sv_setsv(save_scalar(PL_replgv), ret);
4348 		    break;
4349 		}
4350 	    }
4351 	    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
4352 		logical = 0;
4353 		{
4354 		    /* extract RE object from returned value; compiling if
4355 		     * necessary */
4356 		    MAGIC *mg = NULL;
4357 		    REGEXP *rx = NULL;
4358 
4359 		    if (SvROK(ret)) {
4360 			SV *const sv = SvRV(ret);
4361 
4362 			if (SvTYPE(sv) == SVt_REGEXP) {
4363 			    rx = (REGEXP*) sv;
4364 			} else if (SvSMAGICAL(sv)) {
4365 			    mg = mg_find(sv, PERL_MAGIC_qr);
4366 			    assert(mg);
4367 			}
4368 		    } else if (SvTYPE(ret) == SVt_REGEXP) {
4369 			rx = (REGEXP*) ret;
4370 		    } else if (SvSMAGICAL(ret)) {
4371 			if (SvGMAGICAL(ret)) {
4372 			    /* I don't believe that there is ever qr magic
4373 			       here.  */
4374 			    assert(!mg_find(ret, PERL_MAGIC_qr));
4375 			    sv_unmagic(ret, PERL_MAGIC_qr);
4376 			}
4377 			else {
4378 			    mg = mg_find(ret, PERL_MAGIC_qr);
4379 			    /* testing suggests mg only ends up non-NULL for
4380 			       scalars who were upgraded and compiled in the
4381 			       else block below. In turn, this is only
4382 			       triggered in the "postponed utf8 string" tests
4383 			       in t/op/pat.t  */
4384 			}
4385 		    }
4386 
4387 		    if (mg) {
4388 			rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
4389 			assert(rx);
4390 		    }
4391 		    if (rx) {
4392 			rx = reg_temp_copy(NULL, rx);
4393 		    }
4394 		    else {
4395 			U32 pm_flags = 0;
4396 			const I32 osize = PL_regsize;
4397 
4398 			if (DO_UTF8(ret)) {
4399 			    assert (SvUTF8(ret));
4400 			} else if (SvUTF8(ret)) {
4401 			    /* Not doing UTF-8, despite what the SV says. Is
4402 			       this only if we're trapped in use 'bytes'?  */
4403 			    /* Make a copy of the octet sequence, but without
4404 			       the flag on, as the compiler now honours the
4405 			       SvUTF8 flag on ret.  */
4406 			    STRLEN len;
4407 			    const char *const p = SvPV(ret, len);
4408 			    ret = newSVpvn_flags(p, len, SVs_TEMP);
4409 			}
4410 			rx = CALLREGCOMP(ret, pm_flags);
4411 			if (!(SvFLAGS(ret)
4412 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
4413 				 | SVs_GMG))) {
4414 			    /* This isn't a first class regexp. Instead, it's
4415 			       caching a regexp onto an existing, Perl visible
4416 			       scalar.  */
4417 			    sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
4418 			}
4419 			PL_regsize = osize;
4420 		    }
4421 		    re_sv = rx;
4422 		    re = (struct regexp *)SvANY(rx);
4423 		}
4424                 RXp_MATCH_COPIED_off(re);
4425                 re->subbeg = rex->subbeg;
4426                 re->sublen = rex->sublen;
4427 		rei = RXi_GET(re);
4428                 DEBUG_EXECUTE_r(
4429                     debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
4430                         "Matching embedded");
4431 		);
4432 		startpoint = rei->program + 1;
4433                	ST.close_paren = 0; /* only used for GOSUB */
4434                	/* borrowed from regtry */
4435                 if (PL_reg_start_tmpl <= re->nparens) {
4436                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
4437                     if(PL_reg_start_tmp)
4438                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4439                     else
4440                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4441                 }
4442 
4443         eval_recurse_doit: /* Share code with GOSUB below this line */
4444 		/* run the pattern returned from (??{...}) */
4445 		ST.cp = regcppush(0);	/* Save *all* the positions. */
4446 		REGCP_SET(ST.lastcp);
4447 
4448 		PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
4449 
4450 		/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
4451 		PL_reglastparen = &re->lastparen;
4452 		PL_reglastcloseparen = &re->lastcloseparen;
4453 		re->lastparen = 0;
4454 		re->lastcloseparen = 0;
4455 
4456 		PL_reginput = locinput;
4457 		PL_regsize = 0;
4458 
4459 		/* XXXX This is too dramatic a measure... */
4460 		PL_reg_maxiter = 0;
4461 
4462 		ST.toggle_reg_flags = PL_reg_flags;
4463 		if (RX_UTF8(re_sv))
4464 		    PL_reg_flags |= RF_utf8;
4465 		else
4466 		    PL_reg_flags &= ~RF_utf8;
4467 		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
4468 
4469 		ST.prev_rex = rex_sv;
4470 		ST.prev_curlyx = cur_curlyx;
4471 		SETREX(rex_sv,re_sv);
4472 		rex = re;
4473 		rexi = rei;
4474 		cur_curlyx = NULL;
4475 		ST.B = next;
4476 		ST.prev_eval = cur_eval;
4477 		cur_eval = st;
4478 		/* now continue from first node in postoned RE */
4479 		PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4480 		/* NOTREACHED */
4481 	    }
4482 	    /* logical is 1,   /(?(?{...})X|Y)/ */
4483 	    sw = cBOOL(SvTRUE(ret));
4484 	    logical = 0;
4485 	    break;
4486 	}
4487 
4488 	case EVAL_AB: /* cleanup after a successful (??{A})B */
4489 	    /* note: this is called twice; first after popping B, then A */
4490 	    PL_reg_flags ^= ST.toggle_reg_flags;
4491 	    ReREFCNT_dec(rex_sv);
4492 	    SETREX(rex_sv,ST.prev_rex);
4493 	    rex = (struct regexp *)SvANY(rex_sv);
4494 	    rexi = RXi_GET(rex);
4495 	    regcpblow(ST.cp);
4496 	    cur_eval = ST.prev_eval;
4497 	    cur_curlyx = ST.prev_curlyx;
4498 
4499 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4500 	    PL_reglastparen = &rex->lastparen;
4501 	    PL_reglastcloseparen = &rex->lastcloseparen;
4502 	    /* also update PL_regoffs */
4503 	    PL_regoffs = rex->offs;
4504 
4505 	    /* XXXX This is too dramatic a measure... */
4506 	    PL_reg_maxiter = 0;
4507             if ( nochange_depth )
4508 	        nochange_depth--;
4509 	    sayYES;
4510 
4511 
4512 	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
4513 	    /* note: this is called twice; first after popping B, then A */
4514 	    PL_reg_flags ^= ST.toggle_reg_flags;
4515 	    ReREFCNT_dec(rex_sv);
4516 	    SETREX(rex_sv,ST.prev_rex);
4517 	    rex = (struct regexp *)SvANY(rex_sv);
4518 	    rexi = RXi_GET(rex);
4519 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4520 	    PL_reglastparen = &rex->lastparen;
4521 	    PL_reglastcloseparen = &rex->lastcloseparen;
4522 
4523 	    PL_reginput = locinput;
4524 	    REGCP_UNWIND(ST.lastcp);
4525 	    regcppop(rex);
4526 	    cur_eval = ST.prev_eval;
4527 	    cur_curlyx = ST.prev_curlyx;
4528 	    /* XXXX This is too dramatic a measure... */
4529 	    PL_reg_maxiter = 0;
4530 	    if ( nochange_depth )
4531 	        nochange_depth--;
4532 	    sayNO_SILENT;
4533 #undef ST
4534 
4535 	case OPEN:
4536 	    n = ARG(scan);  /* which paren pair */
4537 	    PL_reg_start_tmp[n] = locinput;
4538 	    if (n > PL_regsize)
4539 		PL_regsize = n;
4540             lastopen = n;
4541 	    break;
4542 	case CLOSE:
4543 	    n = ARG(scan);  /* which paren pair */
4544 	    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
4545 	    PL_regoffs[n].end = locinput - PL_bostr;
4546 	    /*if (n > PL_regsize)
4547 		PL_regsize = n;*/
4548 	    if (n > *PL_reglastparen)
4549 		*PL_reglastparen = n;
4550 	    *PL_reglastcloseparen = n;
4551             if (cur_eval && cur_eval->u.eval.close_paren == n) {
4552 	        goto fake_end;
4553 	    }
4554 	    break;
4555         case ACCEPT:
4556             if (ARG(scan)){
4557                 regnode *cursor;
4558                 for (cursor=scan;
4559                      cursor && OP(cursor)!=END;
4560                      cursor=regnext(cursor))
4561                 {
4562                     if ( OP(cursor)==CLOSE ){
4563                         n = ARG(cursor);
4564                         if ( n <= lastopen ) {
4565                             PL_regoffs[n].start
4566 				= PL_reg_start_tmp[n] - PL_bostr;
4567                             PL_regoffs[n].end = locinput - PL_bostr;
4568                             /*if (n > PL_regsize)
4569                             PL_regsize = n;*/
4570                             if (n > *PL_reglastparen)
4571                                 *PL_reglastparen = n;
4572                             *PL_reglastcloseparen = n;
4573                             if ( n == ARG(scan) || (cur_eval &&
4574                                 cur_eval->u.eval.close_paren == n))
4575                                 break;
4576                         }
4577                     }
4578                 }
4579             }
4580 	    goto fake_end;
4581 	    /*NOTREACHED*/
4582 	case GROUPP:
4583 	    n = ARG(scan);  /* which paren pair */
4584 	    sw = cBOOL(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
4585 	    break;
4586 	case NGROUPP:
4587 	    /* reg_check_named_buff_matched returns 0 for no match */
4588 	    sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
4589 	    break;
4590         case INSUBP:
4591             n = ARG(scan);
4592             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
4593             break;
4594         case DEFINEP:
4595             sw = 0;
4596             break;
4597 	case IFTHEN:
4598 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
4599 	    if (sw)
4600 		next = NEXTOPER(NEXTOPER(scan));
4601 	    else {
4602 		next = scan + ARG(scan);
4603 		if (OP(next) == IFTHEN) /* Fake one. */
4604 		    next = NEXTOPER(NEXTOPER(next));
4605 	    }
4606 	    break;
4607 	case LOGICAL:
4608 	    logical = scan->flags;
4609 	    break;
4610 
4611 /*******************************************************************
4612 
4613 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
4614 pattern, where A and B are subpatterns. (For simple A, CURLYM or
4615 STAR/PLUS/CURLY/CURLYN are used instead.)
4616 
4617 A*B is compiled as <CURLYX><A><WHILEM><B>
4618 
4619 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
4620 state, which contains the current count, initialised to -1. It also sets
4621 cur_curlyx to point to this state, with any previous value saved in the
4622 state block.
4623 
4624 CURLYX then jumps straight to the WHILEM op, rather than executing A,
4625 since the pattern may possibly match zero times (i.e. it's a while {} loop
4626 rather than a do {} while loop).
4627 
4628 Each entry to WHILEM represents a successful match of A. The count in the
4629 CURLYX block is incremented, another WHILEM state is pushed, and execution
4630 passes to A or B depending on greediness and the current count.
4631 
4632 For example, if matching against the string a1a2a3b (where the aN are
4633 substrings that match /A/), then the match progresses as follows: (the
4634 pushed states are interspersed with the bits of strings matched so far):
4635 
4636     <CURLYX cnt=-1>
4637     <CURLYX cnt=0><WHILEM>
4638     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4639     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4640     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4641     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4642 
4643 (Contrast this with something like CURLYM, which maintains only a single
4644 backtrack state:
4645 
4646     <CURLYM cnt=0> a1
4647     a1 <CURLYM cnt=1> a2
4648     a1 a2 <CURLYM cnt=2> a3
4649     a1 a2 a3 <CURLYM cnt=3> b
4650 )
4651 
4652 Each WHILEM state block marks a point to backtrack to upon partial failure
4653 of A or B, and also contains some minor state data related to that
4654 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4655 overall state, such as the count, and pointers to the A and B ops.
4656 
4657 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4658 must always point to the *current* CURLYX block, the rules are:
4659 
4660 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4661 and set cur_curlyx to point the new block.
4662 
4663 When popping the CURLYX block after a successful or unsuccessful match,
4664 restore the previous cur_curlyx.
4665 
4666 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4667 to the outer one saved in the CURLYX block.
4668 
4669 When popping the WHILEM block after a successful or unsuccessful B match,
4670 restore the previous cur_curlyx.
4671 
4672 Here's an example for the pattern (AI* BI)*BO
4673 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4674 
4675 cur_
4676 curlyx backtrack stack
4677 ------ ---------------
4678 NULL
4679 CO     <CO prev=NULL> <WO>
4680 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4681 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4682 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4683 
4684 At this point the pattern succeeds, and we work back down the stack to
4685 clean up, restoring as we go:
4686 
4687 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4688 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4689 CO     <CO prev=NULL> <WO>
4690 NULL
4691 
4692 *******************************************************************/
4693 
4694 #define ST st->u.curlyx
4695 
4696 	case CURLYX:    /* start of /A*B/  (for complex A) */
4697 	{
4698 	    /* No need to save/restore up to this paren */
4699 	    I32 parenfloor = scan->flags;
4700 
4701 	    assert(next); /* keep Coverity happy */
4702 	    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4703 		next += ARG(next);
4704 
4705 	    /* XXXX Probably it is better to teach regpush to support
4706 	       parenfloor > PL_regsize... */
4707 	    if (parenfloor > (I32)*PL_reglastparen)
4708 		parenfloor = *PL_reglastparen; /* Pessimization... */
4709 
4710 	    ST.prev_curlyx= cur_curlyx;
4711 	    cur_curlyx = st;
4712 	    ST.cp = PL_savestack_ix;
4713 
4714 	    /* these fields contain the state of the current curly.
4715 	     * they are accessed by subsequent WHILEMs */
4716 	    ST.parenfloor = parenfloor;
4717 	    ST.me = scan;
4718 	    ST.B = next;
4719 	    ST.minmod = minmod;
4720 	    minmod = 0;
4721 	    ST.count = -1;	/* this will be updated by WHILEM */
4722 	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4723 
4724 	    PL_reginput = locinput;
4725 	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4726 	    /* NOTREACHED */
4727 	}
4728 
4729 	case CURLYX_end: /* just finished matching all of A*B */
4730 	    cur_curlyx = ST.prev_curlyx;
4731 	    sayYES;
4732 	    /* NOTREACHED */
4733 
4734 	case CURLYX_end_fail: /* just failed to match all of A*B */
4735 	    regcpblow(ST.cp);
4736 	    cur_curlyx = ST.prev_curlyx;
4737 	    sayNO;
4738 	    /* NOTREACHED */
4739 
4740 
4741 #undef ST
4742 #define ST st->u.whilem
4743 
4744 	case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4745 	{
4746 	    /* see the discussion above about CURLYX/WHILEM */
4747 	    I32 n;
4748 	    int min = ARG1(cur_curlyx->u.curlyx.me);
4749 	    int max = ARG2(cur_curlyx->u.curlyx.me);
4750 	    regnode *A = NEXTOPER(cur_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS;
4751 
4752 	    assert(cur_curlyx); /* keep Coverity happy */
4753 	    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4754 	    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4755 	    ST.cache_offset = 0;
4756 	    ST.cache_mask = 0;
4757 
4758 	    PL_reginput = locinput;
4759 
4760 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4761 		  "%*s  whilem: matched %ld out of %d..%d\n",
4762 		  REPORT_CODE_OFF+depth*2, "", (long)n, min, max)
4763 	    );
4764 
4765 	    /* First just match a string of min A's. */
4766 
4767 	    if (n < min) {
4768 		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4769 		cur_curlyx->u.curlyx.lastloc = locinput;
4770 		REGCP_SET(ST.lastcp);
4771 
4772 		PUSH_STATE_GOTO(WHILEM_A_pre, A);
4773 		/* NOTREACHED */
4774 	    }
4775 
4776 	    /* If degenerate A matches "", assume A done. */
4777 
4778 	    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4779 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4780 		   "%*s  whilem: empty match detected, trying continuation...\n",
4781 		   REPORT_CODE_OFF+depth*2, "")
4782 		);
4783 		goto do_whilem_B_max;
4784 	    }
4785 
4786 	    /* super-linear cache processing */
4787 
4788 	    if (scan->flags) {
4789 
4790 		if (!PL_reg_maxiter) {
4791 		    /* start the countdown: Postpone detection until we
4792 		     * know the match is not *that* much linear. */
4793 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4794 		    /* possible overflow for long strings and many CURLYX's */
4795 		    if (PL_reg_maxiter < 0)
4796 			PL_reg_maxiter = I32_MAX;
4797 		    PL_reg_leftiter = PL_reg_maxiter;
4798 		}
4799 
4800 		if (PL_reg_leftiter-- == 0) {
4801 		    /* initialise cache */
4802 		    const I32 size = (PL_reg_maxiter + 7)/8;
4803 		    if (PL_reg_poscache) {
4804 			if ((I32)PL_reg_poscache_size < size) {
4805 			    Renew(PL_reg_poscache, size, char);
4806 			    PL_reg_poscache_size = size;
4807 			}
4808 			Zero(PL_reg_poscache, size, char);
4809 		    }
4810 		    else {
4811 			PL_reg_poscache_size = size;
4812 			Newxz(PL_reg_poscache, size, char);
4813 		    }
4814 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4815       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4816 			      PL_colors[4], PL_colors[5])
4817 		    );
4818 		}
4819 
4820 		if (PL_reg_leftiter < 0) {
4821 		    /* have we already failed at this position? */
4822 		    I32 offset, mask;
4823 		    offset  = (scan->flags & 0xf) - 1
4824 		  		+ (locinput - PL_bostr)  * (scan->flags>>4);
4825 		    mask    = 1 << (offset % 8);
4826 		    offset /= 8;
4827 		    if (PL_reg_poscache[offset] & mask) {
4828 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4829 			    "%*s  whilem: (cache) already tried at this position...\n",
4830 			    REPORT_CODE_OFF+depth*2, "")
4831 			);
4832 			sayNO; /* cache records failure */
4833 		    }
4834 		    ST.cache_offset = offset;
4835 		    ST.cache_mask   = mask;
4836 		}
4837 	    }
4838 
4839 	    /* Prefer B over A for minimal matching. */
4840 
4841 	    if (cur_curlyx->u.curlyx.minmod) {
4842 		ST.save_curlyx = cur_curlyx;
4843 		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4844 		ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4845 		REGCP_SET(ST.lastcp);
4846 		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4847 		/* NOTREACHED */
4848 	    }
4849 
4850 	    /* Prefer A over B for maximal matching. */
4851 
4852 	    if (n < max) { /* More greed allowed? */
4853 		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4854 		cur_curlyx->u.curlyx.lastloc = locinput;
4855 		REGCP_SET(ST.lastcp);
4856 		PUSH_STATE_GOTO(WHILEM_A_max, A);
4857 		/* NOTREACHED */
4858 	    }
4859 	    goto do_whilem_B_max;
4860 	}
4861 	/* NOTREACHED */
4862 
4863 	case WHILEM_B_min: /* just matched B in a minimal match */
4864 	case WHILEM_B_max: /* just matched B in a maximal match */
4865 	    cur_curlyx = ST.save_curlyx;
4866 	    sayYES;
4867 	    /* NOTREACHED */
4868 
4869 	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4870 	    cur_curlyx = ST.save_curlyx;
4871 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4872 	    cur_curlyx->u.curlyx.count--;
4873 	    CACHEsayNO;
4874 	    /* NOTREACHED */
4875 
4876 	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4877 	    /* FALL THROUGH */
4878 	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4879 	    REGCP_UNWIND(ST.lastcp);
4880 	    regcppop(rex);
4881 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4882 	    cur_curlyx->u.curlyx.count--;
4883 	    CACHEsayNO;
4884 	    /* NOTREACHED */
4885 
4886 	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4887 	    REGCP_UNWIND(ST.lastcp);
4888 	    regcppop(rex);	/* Restore some previous $<digit>s? */
4889 	    PL_reginput = locinput;
4890 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4891 		"%*s  whilem: failed, trying continuation...\n",
4892 		REPORT_CODE_OFF+depth*2, "")
4893 	    );
4894 	  do_whilem_B_max:
4895 	    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4896 		&& ckWARN(WARN_REGEXP)
4897 		&& !(PL_reg_flags & RF_warned))
4898 	    {
4899 		PL_reg_flags |= RF_warned;
4900 		Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4901 		     "Complex regular subexpression recursion limit (%d) "
4902 		     "exceeded",
4903 		     REG_INFTY - 1);
4904 	    }
4905 
4906 	    /* now try B */
4907 	    ST.save_curlyx = cur_curlyx;
4908 	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4909 	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4910 	    /* NOTREACHED */
4911 
4912 	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4913 	    cur_curlyx = ST.save_curlyx;
4914 	    REGCP_UNWIND(ST.lastcp);
4915 	    regcppop(rex);
4916 
4917 	    if (cur_curlyx->u.curlyx.count >= /*max*/ARG2(cur_curlyx->u.curlyx.me)) {
4918 		/* Maximum greed exceeded */
4919 		if (cur_curlyx->u.curlyx.count >= REG_INFTY
4920 		    && ckWARN(WARN_REGEXP)
4921 		    && !(PL_reg_flags & RF_warned))
4922 		{
4923 		    PL_reg_flags |= RF_warned;
4924 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4925 			"Complex regular subexpression recursion "
4926 			"limit (%d) exceeded",
4927 			REG_INFTY - 1);
4928 		}
4929 		cur_curlyx->u.curlyx.count--;
4930 		CACHEsayNO;
4931 	    }
4932 
4933 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4934 		"%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4935 	    );
4936 	    /* Try grabbing another A and see if it helps. */
4937 	    PL_reginput = locinput;
4938 	    cur_curlyx->u.curlyx.lastloc = locinput;
4939 	    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4940 	    REGCP_SET(ST.lastcp);
4941 	    PUSH_STATE_GOTO(WHILEM_A_min,
4942 		/*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
4943 	    /* NOTREACHED */
4944 
4945 #undef  ST
4946 #define ST st->u.branch
4947 
4948 	case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
4949 	    next = scan + ARG(scan);
4950 	    if (next == scan)
4951 		next = NULL;
4952 	    scan = NEXTOPER(scan);
4953 	    /* FALL THROUGH */
4954 
4955 	case BRANCH:	    /*  /(...|A|...)/ */
4956 	    scan = NEXTOPER(scan); /* scan now points to inner node */
4957 	    ST.lastparen = *PL_reglastparen;
4958 	    ST.next_branch = next;
4959 	    REGCP_SET(ST.cp);
4960 	    PL_reginput = locinput;
4961 
4962 	    /* Now go into the branch */
4963 	    if (has_cutgroup) {
4964 	        PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4965 	    } else {
4966 	        PUSH_STATE_GOTO(BRANCH_next, scan);
4967 	    }
4968 	    /* NOTREACHED */
4969         case CUTGROUP:
4970             PL_reginput = locinput;
4971             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4972                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4973             PUSH_STATE_GOTO(CUTGROUP_next,next);
4974             /* NOTREACHED */
4975         case CUTGROUP_next_fail:
4976             do_cutgroup = 1;
4977             no_final = 1;
4978             if (st->u.mark.mark_name)
4979                 sv_commit = st->u.mark.mark_name;
4980             sayNO;
4981             /* NOTREACHED */
4982         case BRANCH_next:
4983             sayYES;
4984             /* NOTREACHED */
4985 	case BRANCH_next_fail: /* that branch failed; try the next, if any */
4986 	    if (do_cutgroup) {
4987 	        do_cutgroup = 0;
4988 	        no_final = 0;
4989 	    }
4990 	    REGCP_UNWIND(ST.cp);
4991 	    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4992 		PL_regoffs[n].end = -1;
4993 	    *PL_reglastparen = n;
4994 	    /*dmq: *PL_reglastcloseparen = n; */
4995 	    scan = ST.next_branch;
4996 	    /* no more branches? */
4997 	    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4998 	        DEBUG_EXECUTE_r({
4999 		    PerlIO_printf( Perl_debug_log,
5000 			"%*s  %sBRANCH failed...%s\n",
5001 			REPORT_CODE_OFF+depth*2, "",
5002 			PL_colors[4],
5003 			PL_colors[5] );
5004 		});
5005 		sayNO_SILENT;
5006             }
5007 	    continue; /* execute next BRANCH[J] op */
5008 	    /* NOTREACHED */
5009 
5010 	case MINMOD:
5011 	    minmod = 1;
5012 	    break;
5013 
5014 #undef  ST
5015 #define ST st->u.curlym
5016 
5017 	case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
5018 
5019 	    /* This is an optimisation of CURLYX that enables us to push
5020 	     * only a single backtracking state, no matter how many matches
5021 	     * there are in {m,n}. It relies on the pattern being constant
5022 	     * length, with no parens to influence future backrefs
5023 	     */
5024 
5025 	    ST.me = scan;
5026 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
5027 
5028 	    /* if paren positive, emulate an OPEN/CLOSE around A */
5029 	    if (ST.me->flags) {
5030 		U32 paren = ST.me->flags;
5031 		if (paren > PL_regsize)
5032 		    PL_regsize = paren;
5033 		if (paren > *PL_reglastparen)
5034 		    *PL_reglastparen = paren;
5035 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
5036 	    }
5037 	    ST.A = scan;
5038 	    ST.B = next;
5039 	    ST.alen = 0;
5040 	    ST.count = 0;
5041 	    ST.minmod = minmod;
5042 	    minmod = 0;
5043 	    ST.c1 = CHRTEST_UNINIT;
5044 	    REGCP_SET(ST.cp);
5045 
5046 	    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
5047 		goto curlym_do_B;
5048 
5049 	  curlym_do_A: /* execute the A in /A{m,n}B/  */
5050 	    PL_reginput = locinput;
5051 	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
5052 	    /* NOTREACHED */
5053 
5054 	case CURLYM_A: /* we've just matched an A */
5055 	    locinput = st->locinput;
5056 	    nextchr = UCHARAT(locinput);
5057 
5058 	    ST.count++;
5059 	    /* after first match, determine A's length: u.curlym.alen */
5060 	    if (ST.count == 1) {
5061 		if (PL_reg_match_utf8) {
5062 		    char *s = locinput;
5063 		    while (s < PL_reginput) {
5064 			ST.alen++;
5065 			s += UTF8SKIP(s);
5066 		    }
5067 		}
5068 		else {
5069 		    ST.alen = PL_reginput - locinput;
5070 		}
5071 		if (ST.alen == 0)
5072 		    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
5073 	    }
5074 	    DEBUG_EXECUTE_r(
5075 		PerlIO_printf(Perl_debug_log,
5076 			  "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
5077 			  (int)(REPORT_CODE_OFF+(depth*2)), "",
5078 			  (IV) ST.count, (IV)ST.alen)
5079 	    );
5080 
5081 	    locinput = PL_reginput;
5082 
5083 	    if (cur_eval && cur_eval->u.eval.close_paren &&
5084 	        cur_eval->u.eval.close_paren == (U32)ST.me->flags)
5085 	        goto fake_end;
5086 
5087 	    {
5088 		I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
5089 		if ( max == REG_INFTY || ST.count < max )
5090 		    goto curlym_do_A; /* try to match another A */
5091 	    }
5092 	    goto curlym_do_B; /* try to match B */
5093 
5094 	case CURLYM_A_fail: /* just failed to match an A */
5095 	    REGCP_UNWIND(ST.cp);
5096 
5097 	    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
5098 	        || (cur_eval && cur_eval->u.eval.close_paren &&
5099 	            cur_eval->u.eval.close_paren == (U32)ST.me->flags))
5100 		sayNO;
5101 
5102 	  curlym_do_B: /* execute the B in /A{m,n}B/  */
5103 	    PL_reginput = locinput;
5104 	    if (ST.c1 == CHRTEST_UNINIT) {
5105 		/* calculate c1 and c2 for possible match of 1st char
5106 		 * following curly */
5107 		ST.c1 = ST.c2 = CHRTEST_VOID;
5108 		if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
5109 		    regnode *text_node = ST.B;
5110 		    if (! HAS_TEXT(text_node))
5111 			FIND_NEXT_IMPT(text_node);
5112 	            /* this used to be
5113 
5114 	                (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
5115 
5116 	            	But the former is redundant in light of the latter.
5117 
5118 	            	if this changes back then the macro for
5119 	            	IS_TEXT and friends need to change.
5120 	             */
5121 		    if (PL_regkind[OP(text_node)] == EXACT)
5122 		    {
5123 
5124 			ST.c1 = (U8)*STRING(text_node);
5125 			switch (OP(text_node)) {
5126 			    case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
5127 			    case EXACTFA:
5128 			    case EXACTFU_SS:
5129 			    case EXACTFU_TRICKYFOLD:
5130 			    case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
5131 			    case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
5132 			    default: ST.c2 = ST.c1;
5133 			}
5134 		    }
5135 		}
5136 	    }
5137 
5138 	    DEBUG_EXECUTE_r(
5139 		PerlIO_printf(Perl_debug_log,
5140 		    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
5141 		    (int)(REPORT_CODE_OFF+(depth*2)),
5142 		    "", (IV)ST.count)
5143 		);
5144 	    if (ST.c1 != CHRTEST_VOID
5145 		    && UCHARAT(PL_reginput) != ST.c1
5146 		    && UCHARAT(PL_reginput) != ST.c2)
5147 	    {
5148 		/* simulate B failing */
5149 		DEBUG_OPTIMISE_r(
5150 		    PerlIO_printf(Perl_debug_log,
5151 		        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
5152 		        (int)(REPORT_CODE_OFF+(depth*2)),"",
5153 		        (IV)ST.c1,(IV)ST.c2
5154 		));
5155 		state_num = CURLYM_B_fail;
5156 		goto reenter_switch;
5157 	    }
5158 
5159 	    if (ST.me->flags) {
5160 		/* mark current A as captured */
5161 		I32 paren = ST.me->flags;
5162 		if (ST.count) {
5163 		    PL_regoffs[paren].start
5164 			= HOPc(PL_reginput, -ST.alen) - PL_bostr;
5165 		    PL_regoffs[paren].end = PL_reginput - PL_bostr;
5166 		    /*dmq: *PL_reglastcloseparen = paren; */
5167 		}
5168 		else
5169 		    PL_regoffs[paren].end = -1;
5170 		if (cur_eval && cur_eval->u.eval.close_paren &&
5171 		    cur_eval->u.eval.close_paren == (U32)ST.me->flags)
5172 		{
5173 		    if (ST.count)
5174 	                goto fake_end;
5175 	            else
5176 	                sayNO;
5177 	        }
5178 	    }
5179 
5180 	    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
5181 	    /* NOTREACHED */
5182 
5183 	case CURLYM_B_fail: /* just failed to match a B */
5184 	    REGCP_UNWIND(ST.cp);
5185 	    if (ST.minmod) {
5186 		I32 max = ARG2(ST.me);
5187 		if (max != REG_INFTY && ST.count == max)
5188 		    sayNO;
5189 		goto curlym_do_A; /* try to match a further A */
5190 	    }
5191 	    /* backtrack one A */
5192 	    if (ST.count == ARG1(ST.me) /* min */)
5193 		sayNO;
5194 	    ST.count--;
5195 	    locinput = HOPc(locinput, -ST.alen);
5196 	    goto curlym_do_B; /* try to match B */
5197 
5198 #undef ST
5199 #define ST st->u.curly
5200 
5201 #define CURLY_SETPAREN(paren, success) \
5202     if (paren) { \
5203 	if (success) { \
5204 	    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
5205 	    PL_regoffs[paren].end = locinput - PL_bostr; \
5206 	    *PL_reglastcloseparen = paren; \
5207 	} \
5208 	else \
5209 	    PL_regoffs[paren].end = -1; \
5210     }
5211 
5212 	case STAR:		/*  /A*B/ where A is width 1 */
5213 	    ST.paren = 0;
5214 	    ST.min = 0;
5215 	    ST.max = REG_INFTY;
5216 	    scan = NEXTOPER(scan);
5217 	    goto repeat;
5218 	case PLUS:		/*  /A+B/ where A is width 1 */
5219 	    ST.paren = 0;
5220 	    ST.min = 1;
5221 	    ST.max = REG_INFTY;
5222 	    scan = NEXTOPER(scan);
5223 	    goto repeat;
5224 	case CURLYN:		/*  /(A){m,n}B/ where A is width 1 */
5225 	    ST.paren = scan->flags;	/* Which paren to set */
5226 	    if (ST.paren > PL_regsize)
5227 		PL_regsize = ST.paren;
5228 	    if (ST.paren > *PL_reglastparen)
5229 		*PL_reglastparen = ST.paren;
5230 	    ST.min = ARG1(scan);  /* min to match */
5231 	    ST.max = ARG2(scan);  /* max to match */
5232 	    if (cur_eval && cur_eval->u.eval.close_paren &&
5233 	        cur_eval->u.eval.close_paren == (U32)ST.paren) {
5234 	        ST.min=1;
5235 	        ST.max=1;
5236 	    }
5237             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
5238 	    goto repeat;
5239 	case CURLY:		/*  /A{m,n}B/ where A is width 1 */
5240 	    ST.paren = 0;
5241 	    ST.min = ARG1(scan);  /* min to match */
5242 	    ST.max = ARG2(scan);  /* max to match */
5243 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
5244 	  repeat:
5245 	    /*
5246 	    * Lookahead to avoid useless match attempts
5247 	    * when we know what character comes next.
5248 	    *
5249 	    * Used to only do .*x and .*?x, but now it allows
5250 	    * for )'s, ('s and (?{ ... })'s to be in the way
5251 	    * of the quantifier and the EXACT-like node.  -- japhy
5252 	    */
5253 
5254 	    if (ST.min > ST.max) /* XXX make this a compile-time check? */
5255 		sayNO;
5256 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
5257 		U8 *s;
5258 		regnode *text_node = next;
5259 
5260 		if (! HAS_TEXT(text_node))
5261 		    FIND_NEXT_IMPT(text_node);
5262 
5263 		if (! HAS_TEXT(text_node))
5264 		    ST.c1 = ST.c2 = CHRTEST_VOID;
5265 		else {
5266 		    if ( PL_regkind[OP(text_node)] != EXACT ) {
5267 			ST.c1 = ST.c2 = CHRTEST_VOID;
5268 			goto assume_ok_easy;
5269 		    }
5270 		    else
5271 			s = (U8*)STRING(text_node);
5272 
5273                     /*  Currently we only get here when
5274 
5275                         PL_rekind[OP(text_node)] == EXACT
5276 
5277                         if this changes back then the macro for IS_TEXT and
5278                         friends need to change. */
5279 		    if (!UTF_PATTERN) {
5280 			ST.c1 = *s;
5281 			switch (OP(text_node)) {
5282 			    case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
5283 			    case EXACTFA:
5284 			    case EXACTFU_SS:
5285 			    case EXACTFU_TRICKYFOLD:
5286 			    case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
5287 			    case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
5288 			    default: ST.c2 = ST.c1; break;
5289 			}
5290 		    }
5291 		    else { /* UTF_PATTERN */
5292 			if (IS_TEXTFU(text_node) || IS_TEXTF(text_node)) {
5293 			     STRLEN ulen1, ulen2;
5294 			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
5295 			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
5296 
5297 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
5298 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
5299 #ifdef EBCDIC
5300 			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
5301 						    ckWARN(WARN_UTF8) ?
5302                                                     0 : UTF8_ALLOW_ANY);
5303 			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
5304                                                     ckWARN(WARN_UTF8) ?
5305                                                     0 : UTF8_ALLOW_ANY);
5306 #else
5307 			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
5308 						    uniflags);
5309 			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
5310 						    uniflags);
5311 #endif
5312 			}
5313 			else {
5314 			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
5315 						     uniflags);
5316 			}
5317 		    }
5318 		}
5319 	    }
5320 	    else
5321 		ST.c1 = ST.c2 = CHRTEST_VOID;
5322 	assume_ok_easy:
5323 
5324 	    ST.A = scan;
5325 	    ST.B = next;
5326 	    PL_reginput = locinput;
5327 	    if (minmod) {
5328 		minmod = 0;
5329 		if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
5330 		    sayNO;
5331 		ST.count = ST.min;
5332 		locinput = PL_reginput;
5333 		REGCP_SET(ST.cp);
5334 		if (ST.c1 == CHRTEST_VOID)
5335 		    goto curly_try_B_min;
5336 
5337 		ST.oldloc = locinput;
5338 
5339 		/* set ST.maxpos to the furthest point along the
5340 		 * string that could possibly match */
5341 		if  (ST.max == REG_INFTY) {
5342 		    ST.maxpos = PL_regeol - 1;
5343 		    if (utf8_target)
5344 			while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
5345 			    ST.maxpos--;
5346 		}
5347 		else if (utf8_target) {
5348 		    int m = ST.max - ST.min;
5349 		    for (ST.maxpos = locinput;
5350 			 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
5351 			ST.maxpos += UTF8SKIP(ST.maxpos);
5352 		}
5353 		else {
5354 		    ST.maxpos = locinput + ST.max - ST.min;
5355 		    if (ST.maxpos >= PL_regeol)
5356 			ST.maxpos = PL_regeol - 1;
5357 		}
5358 		goto curly_try_B_min_known;
5359 
5360 	    }
5361 	    else {
5362 		ST.count = regrepeat(rex, ST.A, ST.max, depth);
5363 		locinput = PL_reginput;
5364 		if (ST.count < ST.min)
5365 		    sayNO;
5366 		if ((ST.count > ST.min)
5367 		    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
5368 		{
5369 		    /* A{m,n} must come at the end of the string, there's
5370 		     * no point in backing off ... */
5371 		    ST.min = ST.count;
5372 		    /* ...except that $ and \Z can match before *and* after
5373 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
5374 		       We may back off by one in this case. */
5375 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
5376 			ST.min--;
5377 		}
5378 		REGCP_SET(ST.cp);
5379 		goto curly_try_B_max;
5380 	    }
5381 	    /* NOTREACHED */
5382 
5383 
5384 	case CURLY_B_min_known_fail:
5385 	    /* failed to find B in a non-greedy match where c1,c2 valid */
5386 	    if (ST.paren && ST.count)
5387 		PL_regoffs[ST.paren].end = -1;
5388 
5389 	    PL_reginput = locinput;	/* Could be reset... */
5390 	    REGCP_UNWIND(ST.cp);
5391 	    /* Couldn't or didn't -- move forward. */
5392 	    ST.oldloc = locinput;
5393 	    if (utf8_target)
5394 		locinput += UTF8SKIP(locinput);
5395 	    else
5396 		locinput++;
5397 	    ST.count++;
5398 	  curly_try_B_min_known:
5399 	     /* find the next place where 'B' could work, then call B */
5400 	    {
5401 		int n;
5402 		if (utf8_target) {
5403 		    n = (ST.oldloc == locinput) ? 0 : 1;
5404 		    if (ST.c1 == ST.c2) {
5405 			STRLEN len;
5406 			/* set n to utf8_distance(oldloc, locinput) */
5407 			while (locinput <= ST.maxpos &&
5408 			       utf8n_to_uvchr((U8*)locinput,
5409 					      UTF8_MAXBYTES, &len,
5410 					      uniflags) != (UV)ST.c1) {
5411 			    locinput += len;
5412 			    n++;
5413 			}
5414 		    }
5415 		    else {
5416 			/* set n to utf8_distance(oldloc, locinput) */
5417 			while (locinput <= ST.maxpos) {
5418 			    STRLEN len;
5419 			    const UV c = utf8n_to_uvchr((U8*)locinput,
5420 						  UTF8_MAXBYTES, &len,
5421 						  uniflags);
5422 			    if (c == (UV)ST.c1 || c == (UV)ST.c2)
5423 				break;
5424 			    locinput += len;
5425 			    n++;
5426 			}
5427 		    }
5428 		}
5429 		else {
5430 		    if (ST.c1 == ST.c2) {
5431 			while (locinput <= ST.maxpos &&
5432 			       UCHARAT(locinput) != ST.c1)
5433 			    locinput++;
5434 		    }
5435 		    else {
5436 			while (locinput <= ST.maxpos
5437 			       && UCHARAT(locinput) != ST.c1
5438 			       && UCHARAT(locinput) != ST.c2)
5439 			    locinput++;
5440 		    }
5441 		    n = locinput - ST.oldloc;
5442 		}
5443 		if (locinput > ST.maxpos)
5444 		    sayNO;
5445 		/* PL_reginput == oldloc now */
5446 		if (n) {
5447 		    ST.count += n;
5448 		    if (regrepeat(rex, ST.A, n, depth) < n)
5449 			sayNO;
5450 		}
5451 		PL_reginput = locinput;
5452 		CURLY_SETPAREN(ST.paren, ST.count);
5453 		if (cur_eval && cur_eval->u.eval.close_paren &&
5454 		    cur_eval->u.eval.close_paren == (U32)ST.paren) {
5455 		    goto fake_end;
5456 	        }
5457 		PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
5458 	    }
5459 	    /* NOTREACHED */
5460 
5461 
5462 	case CURLY_B_min_fail:
5463 	    /* failed to find B in a non-greedy match where c1,c2 invalid */
5464 	    if (ST.paren && ST.count)
5465 		PL_regoffs[ST.paren].end = -1;
5466 
5467 	    REGCP_UNWIND(ST.cp);
5468 	    /* failed -- move forward one */
5469 	    PL_reginput = locinput;
5470 	    if (regrepeat(rex, ST.A, 1, depth)) {
5471 		ST.count++;
5472 		locinput = PL_reginput;
5473 		if (ST.count <= ST.max || (ST.max == REG_INFTY &&
5474 			ST.count > 0)) /* count overflow ? */
5475 		{
5476 		  curly_try_B_min:
5477 		    CURLY_SETPAREN(ST.paren, ST.count);
5478 		    if (cur_eval && cur_eval->u.eval.close_paren &&
5479 		        cur_eval->u.eval.close_paren == (U32)ST.paren) {
5480                         goto fake_end;
5481                     }
5482 		    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
5483 		}
5484 	    }
5485 	    sayNO;
5486 	    /* NOTREACHED */
5487 
5488 
5489 	curly_try_B_max:
5490 	    /* a successful greedy match: now try to match B */
5491             if (cur_eval && cur_eval->u.eval.close_paren &&
5492                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
5493                 goto fake_end;
5494             }
5495 	    {
5496 		UV c = 0;
5497 		if (ST.c1 != CHRTEST_VOID)
5498 		    c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
5499 					   UTF8_MAXBYTES, 0, uniflags)
5500 				: (UV) UCHARAT(PL_reginput);
5501 		/* If it could work, try it. */
5502 		if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
5503 		    CURLY_SETPAREN(ST.paren, ST.count);
5504 		    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
5505 		    /* NOTREACHED */
5506 		}
5507 	    }
5508 	    /* FALL THROUGH */
5509 	case CURLY_B_max_fail:
5510 	    /* failed to find B in a greedy match */
5511 	    if (ST.paren && ST.count)
5512 		PL_regoffs[ST.paren].end = -1;
5513 
5514 	    REGCP_UNWIND(ST.cp);
5515 	    /*  back up. */
5516 	    if (--ST.count < ST.min)
5517 		sayNO;
5518 	    PL_reginput = locinput = HOPc(locinput, -1);
5519 	    goto curly_try_B_max;
5520 
5521 #undef ST
5522 
5523 	case END:
5524 	    fake_end:
5525 	    if (cur_eval) {
5526 		/* we've just finished A in /(??{A})B/; now continue with B */
5527 		I32 tmpix;
5528 		st->u.eval.toggle_reg_flags
5529 			    = cur_eval->u.eval.toggle_reg_flags;
5530 		PL_reg_flags ^= st->u.eval.toggle_reg_flags;
5531 
5532 		st->u.eval.prev_rex = rex_sv;		/* inner */
5533 		SETREX(rex_sv,cur_eval->u.eval.prev_rex);
5534 		rex = (struct regexp *)SvANY(rex_sv);
5535 		rexi = RXi_GET(rex);
5536 		cur_curlyx = cur_eval->u.eval.prev_curlyx;
5537 		(void)ReREFCNT_inc(rex_sv);
5538 		st->u.eval.cp = regcppush(0);	/* Save *all* the positions. */
5539 
5540 		/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
5541 		PL_reglastparen = &rex->lastparen;
5542 		PL_reglastcloseparen = &rex->lastcloseparen;
5543 
5544 		REGCP_SET(st->u.eval.lastcp);
5545 		PL_reginput = locinput;
5546 
5547 		/* Restore parens of the outer rex without popping the
5548 		 * savestack */
5549 		tmpix = PL_savestack_ix;
5550 		PL_savestack_ix = cur_eval->u.eval.lastcp;
5551 		regcppop(rex);
5552 		PL_savestack_ix = tmpix;
5553 
5554 		st->u.eval.prev_eval = cur_eval;
5555 		cur_eval = cur_eval->u.eval.prev_eval;
5556 		DEBUG_EXECUTE_r(
5557 		    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
5558 				      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
5559                 if ( nochange_depth )
5560 	            nochange_depth--;
5561 
5562                 PUSH_YES_STATE_GOTO(EVAL_AB,
5563 			st->u.eval.prev_eval->u.eval.B); /* match B */
5564 	    }
5565 
5566 	    if (locinput < reginfo->till) {
5567 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
5568 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
5569 				      PL_colors[4],
5570 				      (long)(locinput - PL_reg_starttry),
5571 				      (long)(reginfo->till - PL_reg_starttry),
5572 				      PL_colors[5]));
5573 
5574 		sayNO_SILENT;		/* Cannot match: too short. */
5575 	    }
5576 	    PL_reginput = locinput;	/* put where regtry can find it */
5577 	    sayYES;			/* Success! */
5578 
5579 	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
5580 	    DEBUG_EXECUTE_r(
5581 	    PerlIO_printf(Perl_debug_log,
5582 		"%*s  %ssubpattern success...%s\n",
5583 		REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
5584 	    PL_reginput = locinput;	/* put where regtry can find it */
5585 	    sayYES;			/* Success! */
5586 
5587 #undef  ST
5588 #define ST st->u.ifmatch
5589 
5590 	case SUSPEND:	/* (?>A) */
5591 	    ST.wanted = 1;
5592 	    PL_reginput = locinput;
5593 	    goto do_ifmatch;
5594 
5595 	case UNLESSM:	/* -ve lookaround: (?!A), or with flags, (?<!A) */
5596 	    ST.wanted = 0;
5597 	    goto ifmatch_trivial_fail_test;
5598 
5599 	case IFMATCH:	/* +ve lookaround: (?=A), or with flags, (?<=A) */
5600 	    ST.wanted = 1;
5601 	  ifmatch_trivial_fail_test:
5602 	    if (scan->flags) {
5603 		char * const s = HOPBACKc(locinput, scan->flags);
5604 		if (!s) {
5605 		    /* trivial fail */
5606 		    if (logical) {
5607 			logical = 0;
5608 			sw = 1 - cBOOL(ST.wanted);
5609 		    }
5610 		    else if (ST.wanted)
5611 			sayNO;
5612 		    next = scan + ARG(scan);
5613 		    if (next == scan)
5614 			next = NULL;
5615 		    break;
5616 		}
5617 		PL_reginput = s;
5618 	    }
5619 	    else
5620 		PL_reginput = locinput;
5621 
5622 	  do_ifmatch:
5623 	    ST.me = scan;
5624 	    ST.logical = logical;
5625 	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
5626 
5627 	    /* execute body of (?...A) */
5628 	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
5629 	    /* NOTREACHED */
5630 
5631 	case IFMATCH_A_fail: /* body of (?...A) failed */
5632 	    ST.wanted = !ST.wanted;
5633 	    /* FALL THROUGH */
5634 
5635 	case IFMATCH_A: /* body of (?...A) succeeded */
5636 	    if (ST.logical) {
5637 		sw = cBOOL(ST.wanted);
5638 	    }
5639 	    else if (!ST.wanted)
5640 		sayNO;
5641 
5642 	    if (OP(ST.me) == SUSPEND)
5643 		locinput = PL_reginput;
5644 	    else {
5645 		locinput = PL_reginput = st->locinput;
5646 		nextchr = UCHARAT(locinput);
5647 	    }
5648 	    scan = ST.me + ARG(ST.me);
5649 	    if (scan == ST.me)
5650 		scan = NULL;
5651 	    continue; /* execute B */
5652 
5653 #undef ST
5654 
5655 	case LONGJMP:
5656 	    next = scan + ARG(scan);
5657 	    if (next == scan)
5658 		next = NULL;
5659 	    break;
5660 	case COMMIT:
5661 	    reginfo->cutpoint = PL_regeol;
5662 	    /* FALLTHROUGH */
5663 	case PRUNE:
5664 	    PL_reginput = locinput;
5665 	    if (!scan->flags)
5666 	        sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5667 	    PUSH_STATE_GOTO(COMMIT_next,next);
5668 	    /* NOTREACHED */
5669 	case COMMIT_next_fail:
5670 	    no_final = 1;
5671 	    /* FALLTHROUGH */
5672 	case OPFAIL:
5673 	    sayNO;
5674 	    /* NOTREACHED */
5675 
5676 #define ST st->u.mark
5677         case MARKPOINT:
5678             ST.prev_mark = mark_state;
5679             ST.mark_name = sv_commit = sv_yes_mark
5680                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5681             mark_state = st;
5682             ST.mark_loc = PL_reginput = locinput;
5683             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5684             /* NOTREACHED */
5685         case MARKPOINT_next:
5686             mark_state = ST.prev_mark;
5687             sayYES;
5688             /* NOTREACHED */
5689         case MARKPOINT_next_fail:
5690             if (popmark && sv_eq(ST.mark_name,popmark))
5691             {
5692                 if (ST.mark_loc > startpoint)
5693 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5694                 popmark = NULL; /* we found our mark */
5695                 sv_commit = ST.mark_name;
5696 
5697                 DEBUG_EXECUTE_r({
5698                         PerlIO_printf(Perl_debug_log,
5699 		            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5700 		            REPORT_CODE_OFF+depth*2, "",
5701 		            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5702 		});
5703             }
5704             mark_state = ST.prev_mark;
5705             sv_yes_mark = mark_state ?
5706                 mark_state->u.mark.mark_name : NULL;
5707             sayNO;
5708             /* NOTREACHED */
5709         case SKIP:
5710             PL_reginput = locinput;
5711             if (scan->flags) {
5712                 /* (*SKIP) : if we fail we cut here*/
5713                 ST.mark_name = NULL;
5714                 ST.mark_loc = locinput;
5715                 PUSH_STATE_GOTO(SKIP_next,next);
5716             } else {
5717                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5718                    otherwise do nothing.  Meaning we need to scan
5719                  */
5720                 regmatch_state *cur = mark_state;
5721                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5722 
5723                 while (cur) {
5724                     if ( sv_eq( cur->u.mark.mark_name,
5725                                 find ) )
5726                     {
5727                         ST.mark_name = find;
5728                         PUSH_STATE_GOTO( SKIP_next, next );
5729                     }
5730                     cur = cur->u.mark.prev_mark;
5731                 }
5732             }
5733             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5734             break;
5735 	case SKIP_next_fail:
5736 	    if (ST.mark_name) {
5737 	        /* (*CUT:NAME) - Set up to search for the name as we
5738 	           collapse the stack*/
5739 	        popmark = ST.mark_name;
5740 	    } else {
5741 	        /* (*CUT) - No name, we cut here.*/
5742 	        if (ST.mark_loc > startpoint)
5743 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5744 	        /* but we set sv_commit to latest mark_name if there
5745 	           is one so they can test to see how things lead to this
5746 	           cut */
5747                 if (mark_state)
5748                     sv_commit=mark_state->u.mark.mark_name;
5749             }
5750             no_final = 1;
5751             sayNO;
5752             /* NOTREACHED */
5753 #undef ST
5754         case LNBREAK:
5755             if ((n=is_LNBREAK(locinput,utf8_target))) {
5756                 locinput += n;
5757                 nextchr = UCHARAT(locinput);
5758             } else
5759                 sayNO;
5760             break;
5761 
5762 #define CASE_CLASS(nAmE)                              \
5763         case nAmE:                                    \
5764 	    if (locinput >= PL_regeol)                \
5765 		sayNO;                                \
5766             if ((n=is_##nAmE(locinput,utf8_target))) {    \
5767                 locinput += n;                        \
5768                 nextchr = UCHARAT(locinput);          \
5769             } else                                    \
5770                 sayNO;                                \
5771             break;                                    \
5772         case N##nAmE:                                 \
5773 	    if (locinput >= PL_regeol)                \
5774 		sayNO;                                \
5775             if ((n=is_##nAmE(locinput,utf8_target))) {    \
5776                 sayNO;                                \
5777             } else {                                  \
5778                 locinput += UTF8SKIP(locinput);       \
5779                 nextchr = UCHARAT(locinput);          \
5780             }                                         \
5781             break
5782 
5783         CASE_CLASS(VERTWS);
5784         CASE_CLASS(HORIZWS);
5785 #undef CASE_CLASS
5786 
5787 	default:
5788 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5789 			  PTR2UV(scan), OP(scan));
5790 	    Perl_croak(aTHX_ "regexp memory corruption");
5791 
5792 	} /* end switch */
5793 
5794         /* switch break jumps here */
5795 	scan = next; /* prepare to execute the next op and ... */
5796 	continue;    /* ... jump back to the top, reusing st */
5797 	/* NOTREACHED */
5798 
5799       push_yes_state:
5800 	/* push a state that backtracks on success */
5801 	st->u.yes.prev_yes_state = yes_state;
5802 	yes_state = st;
5803 	/* FALL THROUGH */
5804       push_state:
5805 	/* push a new regex state, then continue at scan  */
5806 	{
5807 	    regmatch_state *newst;
5808 
5809 	    DEBUG_STACK_r({
5810 	        regmatch_state *cur = st;
5811 	        regmatch_state *curyes = yes_state;
5812 	        int curd = depth;
5813 	        regmatch_slab *slab = PL_regmatch_slab;
5814                 for (;curd > -1;cur--,curd--) {
5815                     if (cur < SLAB_FIRST(slab)) {
5816                 	slab = slab->prev;
5817                 	cur = SLAB_LAST(slab);
5818                     }
5819                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5820                         REPORT_CODE_OFF + 2 + depth * 2,"",
5821                         curd, PL_reg_name[cur->resume_state],
5822                         (curyes == cur) ? "yes" : ""
5823                     );
5824                     if (curyes == cur)
5825 	                curyes = cur->u.yes.prev_yes_state;
5826                 }
5827             } else
5828                 DEBUG_STATE_pp("push")
5829             );
5830 	    depth++;
5831 	    st->locinput = locinput;
5832 	    newst = st+1;
5833 	    if (newst >  SLAB_LAST(PL_regmatch_slab))
5834 		newst = S_push_slab(aTHX);
5835 	    PL_regmatch_state = newst;
5836 
5837 	    locinput = PL_reginput;
5838 	    nextchr = UCHARAT(locinput);
5839 	    st = newst;
5840 	    continue;
5841 	    /* NOTREACHED */
5842 	}
5843     }
5844 
5845     /*
5846     * We get here only if there's trouble -- normally "case END" is
5847     * the terminating point.
5848     */
5849     Perl_croak(aTHX_ "corrupted regexp pointers");
5850     /*NOTREACHED*/
5851     sayNO;
5852 
5853 yes:
5854     if (yes_state) {
5855 	/* we have successfully completed a subexpression, but we must now
5856 	 * pop to the state marked by yes_state and continue from there */
5857 	assert(st != yes_state);
5858 #ifdef DEBUGGING
5859 	while (st != yes_state) {
5860 	    st--;
5861 	    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5862 		PL_regmatch_slab = PL_regmatch_slab->prev;
5863 		st = SLAB_LAST(PL_regmatch_slab);
5864 	    }
5865 	    DEBUG_STATE_r({
5866 	        if (no_final) {
5867 	            DEBUG_STATE_pp("pop (no final)");
5868 	        } else {
5869 	            DEBUG_STATE_pp("pop (yes)");
5870 	        }
5871 	    });
5872 	    depth--;
5873 	}
5874 #else
5875 	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5876 	    || yes_state > SLAB_LAST(PL_regmatch_slab))
5877 	{
5878 	    /* not in this slab, pop slab */
5879 	    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5880 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5881 	    st = SLAB_LAST(PL_regmatch_slab);
5882 	}
5883 	depth -= (st - yes_state);
5884 #endif
5885 	st = yes_state;
5886 	yes_state = st->u.yes.prev_yes_state;
5887 	PL_regmatch_state = st;
5888 
5889         if (no_final) {
5890             locinput= st->locinput;
5891             nextchr = UCHARAT(locinput);
5892         }
5893 	state_num = st->resume_state + no_final;
5894 	goto reenter_switch;
5895     }
5896 
5897     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5898 			  PL_colors[4], PL_colors[5]));
5899 
5900     if (PL_reg_eval_set) {
5901 	/* each successfully executed (?{...}) block does the equivalent of
5902 	 *   local $^R = do {...}
5903 	 * When popping the save stack, all these locals would be undone;
5904 	 * bypass this by setting the outermost saved $^R to the latest
5905 	 * value */
5906 	if (oreplsv != GvSV(PL_replgv))
5907 	    sv_setsv(oreplsv, GvSV(PL_replgv));
5908     }
5909     result = 1;
5910     goto final_exit;
5911 
5912 no:
5913     DEBUG_EXECUTE_r(
5914 	PerlIO_printf(Perl_debug_log,
5915             "%*s  %sfailed...%s\n",
5916             REPORT_CODE_OFF+depth*2, "",
5917             PL_colors[4], PL_colors[5])
5918 	);
5919 
5920 no_silent:
5921     if (no_final) {
5922         if (yes_state) {
5923             goto yes;
5924         } else {
5925             goto final_exit;
5926         }
5927     }
5928     if (depth) {
5929 	/* there's a previous state to backtrack to */
5930 	st--;
5931 	if (st < SLAB_FIRST(PL_regmatch_slab)) {
5932 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5933 	    st = SLAB_LAST(PL_regmatch_slab);
5934 	}
5935 	PL_regmatch_state = st;
5936 	locinput= st->locinput;
5937 	nextchr = UCHARAT(locinput);
5938 
5939 	DEBUG_STATE_pp("pop");
5940 	depth--;
5941 	if (yes_state == st)
5942 	    yes_state = st->u.yes.prev_yes_state;
5943 
5944 	state_num = st->resume_state + 1; /* failure = success + 1 */
5945 	goto reenter_switch;
5946     }
5947     result = 0;
5948 
5949   final_exit:
5950     if (rex->intflags & PREGf_VERBARG_SEEN) {
5951         SV *sv_err = get_sv("REGERROR", 1);
5952         SV *sv_mrk = get_sv("REGMARK", 1);
5953         if (result) {
5954             sv_commit = &PL_sv_no;
5955             if (!sv_yes_mark)
5956                 sv_yes_mark = &PL_sv_yes;
5957         } else {
5958             if (!sv_commit)
5959                 sv_commit = &PL_sv_yes;
5960             sv_yes_mark = &PL_sv_no;
5961         }
5962         sv_setsv(sv_err, sv_commit);
5963         sv_setsv(sv_mrk, sv_yes_mark);
5964     }
5965 
5966     /* clean up; in particular, free all slabs above current one */
5967     LEAVE_SCOPE(oldsave);
5968 
5969     return result;
5970 }
5971 
5972 /*
5973  - regrepeat - repeatedly match something simple, report how many
5974  */
5975 /*
5976  * [This routine now assumes that it will only match on things of length 1.
5977  * That was true before, but now we assume scan - reginput is the count,
5978  * rather than incrementing count on every character.  [Er, except utf8.]]
5979  */
5980 STATIC I32
5981 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5982 {
5983     dVAR;
5984     register char *scan;
5985     register I32 c;
5986     register char *loceol = PL_regeol;
5987     register I32 hardcount = 0;
5988     register bool utf8_target = PL_reg_match_utf8;
5989     UV utf8_flags;
5990 #ifndef DEBUGGING
5991     PERL_UNUSED_ARG(depth);
5992 #endif
5993 
5994     PERL_ARGS_ASSERT_REGREPEAT;
5995 
5996     scan = PL_reginput;
5997     if (max == REG_INFTY)
5998 	max = I32_MAX;
5999     else if (max < loceol - scan)
6000 	loceol = scan + max;
6001     switch (OP(p)) {
6002     case REG_ANY:
6003 	if (utf8_target) {
6004 	    loceol = PL_regeol;
6005 	    while (scan < loceol && hardcount < max && *scan != '\n') {
6006 		scan += UTF8SKIP(scan);
6007 		hardcount++;
6008 	    }
6009 	} else {
6010 	    while (scan < loceol && *scan != '\n')
6011 		scan++;
6012 	}
6013 	break;
6014     case SANY:
6015         if (utf8_target) {
6016 	    loceol = PL_regeol;
6017 	    while (scan < loceol && hardcount < max) {
6018 	        scan += UTF8SKIP(scan);
6019 		hardcount++;
6020 	    }
6021 	}
6022 	else
6023 	    scan = loceol;
6024 	break;
6025     case CANY:
6026 	scan = loceol;
6027 	break;
6028     case EXACT:
6029 	/* To get here, EXACTish nodes must have *byte* length == 1.  That
6030 	 * means they match only characters in the string that can be expressed
6031 	 * as a single byte.  For non-utf8 strings, that means a simple match.
6032 	 * For utf8 strings, the character matched must be an invariant, or
6033 	 * downgradable to a single byte.  The pattern's utf8ness is
6034 	 * irrelevant, as since it's a single byte, it either isn't utf8, or if
6035 	 * it is, it's an invariant */
6036 
6037 	c = (U8)*STRING(p);
6038 	assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
6039 
6040 	if (! utf8_target || UNI_IS_INVARIANT(c)) {
6041 	    while (scan < loceol && UCHARAT(scan) == c) {
6042 		scan++;
6043 	    }
6044 	}
6045 	else {
6046 
6047 	    /* Here, the string is utf8, and the pattern char is different
6048 	     * in utf8 than not, so can't compare them directly.  Outside the
6049 	     * loop, find the two utf8 bytes that represent c, and then
6050 	     * look for those in sequence in the utf8 string */
6051 	    U8 high = UTF8_TWO_BYTE_HI(c);
6052 	    U8 low = UTF8_TWO_BYTE_LO(c);
6053 	    loceol = PL_regeol;
6054 
6055 	    while (hardcount < max
6056 		    && scan + 1 < loceol
6057 		    && UCHARAT(scan) == high
6058 		    && UCHARAT(scan + 1) == low)
6059 	    {
6060 		scan += 2;
6061 		hardcount++;
6062 	    }
6063 	}
6064 	break;
6065     case EXACTFA:
6066 	utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
6067 	goto do_exactf;
6068 
6069     case EXACTFL:
6070 	PL_reg_flags |= RF_tainted;
6071 	utf8_flags = FOLDEQ_UTF8_LOCALE;
6072 	goto do_exactf;
6073 
6074     case EXACTF:
6075 	    utf8_flags = 0;
6076 	    goto do_exactf;
6077 
6078     case EXACTFU_SS:
6079     case EXACTFU_TRICKYFOLD:
6080     case EXACTFU:
6081 	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
6082 
6083 	/* The comments for the EXACT case above apply as well to these fold
6084 	 * ones */
6085 
6086     do_exactf:
6087 	c = (U8)*STRING(p);
6088 	assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
6089 
6090 	if (utf8_target || OP(p) == EXACTFU_SS) { /* Use full Unicode fold matching */
6091 	    char *tmpeol = loceol;
6092 	    while (hardcount < max
6093 		    && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
6094 				   STRING(p), NULL, 1, cBOOL(UTF_PATTERN), utf8_flags))
6095 	    {
6096 		scan = tmpeol;
6097 		tmpeol = loceol;
6098 		hardcount++;
6099 	    }
6100 
6101 	    /* XXX Note that the above handles properly the German sharp s in
6102 	     * the pattern matching ss in the string.  But it doesn't handle
6103 	     * properly cases where the string contains say 'LIGATURE ff' and
6104 	     * the pattern is 'f+'.  This would require, say, a new function or
6105 	     * revised interface to foldEQ_utf8(), in which the maximum number
6106 	     * of characters to match could be passed and it would return how
6107 	     * many actually did.  This is just one of many cases where
6108 	     * multi-char folds don't work properly, and so the fix is being
6109 	     * deferred */
6110 	}
6111 	else {
6112 	    U8 folded;
6113 
6114 	    /* Here, the string isn't utf8 and c is a single byte; and either
6115 	     * the pattern isn't utf8 or c is an invariant, so its utf8ness
6116 	     * doesn't affect c.  Can just do simple comparisons for exact or
6117 	     * fold matching. */
6118 	    switch (OP(p)) {
6119 		case EXACTF: folded = PL_fold[c]; break;
6120 		case EXACTFA:
6121 		case EXACTFU_TRICKYFOLD:
6122 		case EXACTFU: folded = PL_fold_latin1[c]; break;
6123 		case EXACTFL: folded = PL_fold_locale[c]; break;
6124 		default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
6125 	    }
6126 	    while (scan < loceol &&
6127 		   (UCHARAT(scan) == c || UCHARAT(scan) == folded))
6128 	    {
6129 		scan++;
6130 	    }
6131 	}
6132 	break;
6133     case ANYOFV:
6134     case ANYOF:
6135 	if (utf8_target || OP(p) == ANYOFV) {
6136 	    STRLEN inclasslen;
6137 	    loceol = PL_regeol;
6138 	    inclasslen = loceol - scan;
6139 	    while (hardcount < max
6140 		   && ((inclasslen = loceol - scan) > 0)
6141 		   && reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target))
6142 	    {
6143 		scan += inclasslen;
6144 		hardcount++;
6145 	    }
6146 	} else {
6147 	    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
6148 		scan++;
6149 	}
6150 	break;
6151     case ALNUMU:
6152 	if (utf8_target) {
6153     utf8_wordchar:
6154 	    loceol = PL_regeol;
6155 	    LOAD_UTF8_CHARCLASS_ALNUM();
6156 	    while (hardcount < max && scan < loceol &&
6157                    swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
6158             {
6159 		scan += UTF8SKIP(scan);
6160 		hardcount++;
6161 	    }
6162         } else {
6163             while (scan < loceol && isWORDCHAR_L1((U8) *scan)) {
6164                 scan++;
6165             }
6166 	}
6167 	break;
6168     case ALNUM:
6169 	if (utf8_target)
6170 	    goto utf8_wordchar;
6171 	while (scan < loceol && isALNUM((U8) *scan)) {
6172 	    scan++;
6173 	}
6174 	break;
6175     case ALNUMA:
6176 	while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
6177 	    scan++;
6178 	}
6179 	break;
6180     case ALNUML:
6181 	PL_reg_flags |= RF_tainted;
6182 	if (utf8_target) {
6183 	    loceol = PL_regeol;
6184 	    while (hardcount < max && scan < loceol &&
6185 		   isALNUM_LC_utf8((U8*)scan)) {
6186 		scan += UTF8SKIP(scan);
6187 		hardcount++;
6188 	    }
6189 	} else {
6190 	    while (scan < loceol && isALNUM_LC(*scan))
6191 		scan++;
6192 	}
6193 	break;
6194     case NALNUMU:
6195 	if (utf8_target) {
6196 
6197     utf8_Nwordchar:
6198 
6199 	    loceol = PL_regeol;
6200 	    LOAD_UTF8_CHARCLASS_ALNUM();
6201 	    while (hardcount < max && scan < loceol &&
6202                    ! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
6203             {
6204 		scan += UTF8SKIP(scan);
6205 		hardcount++;
6206 	    }
6207         } else {
6208             while (scan < loceol && ! isWORDCHAR_L1((U8) *scan)) {
6209                 scan++;
6210             }
6211 	}
6212 	break;
6213     case NALNUM:
6214 	if (utf8_target)
6215 	    goto utf8_Nwordchar;
6216 	while (scan < loceol && ! isALNUM((U8) *scan)) {
6217 	    scan++;
6218 	}
6219 	break;
6220     case NALNUMA:
6221 	if (utf8_target) {
6222 	    while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
6223 		scan += UTF8SKIP(scan);
6224 	    }
6225 	}
6226 	else {
6227 	    while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
6228 		scan++;
6229 	    }
6230 	}
6231 	break;
6232     case NALNUML:
6233 	PL_reg_flags |= RF_tainted;
6234 	if (utf8_target) {
6235 	    loceol = PL_regeol;
6236 	    while (hardcount < max && scan < loceol &&
6237 		   !isALNUM_LC_utf8((U8*)scan)) {
6238 		scan += UTF8SKIP(scan);
6239 		hardcount++;
6240 	    }
6241 	} else {
6242 	    while (scan < loceol && !isALNUM_LC(*scan))
6243 		scan++;
6244 	}
6245 	break;
6246     case SPACEU:
6247 	if (utf8_target) {
6248 
6249     utf8_space:
6250 
6251 	    loceol = PL_regeol;
6252 	    LOAD_UTF8_CHARCLASS_SPACE();
6253 	    while (hardcount < max && scan < loceol &&
6254 		   (*scan == ' ' ||
6255                     swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
6256             {
6257 		scan += UTF8SKIP(scan);
6258 		hardcount++;
6259 	    }
6260 	    break;
6261 	}
6262 	else {
6263             while (scan < loceol && isSPACE_L1((U8) *scan)) {
6264                 scan++;
6265             }
6266 	    break;
6267 	}
6268     case SPACE:
6269 	if (utf8_target)
6270 	    goto utf8_space;
6271 
6272 	while (scan < loceol && isSPACE((U8) *scan)) {
6273 	    scan++;
6274 	}
6275 	break;
6276     case SPACEA:
6277 	while (scan < loceol && isSPACE_A((U8) *scan)) {
6278 	    scan++;
6279 	}
6280 	break;
6281     case SPACEL:
6282 	PL_reg_flags |= RF_tainted;
6283 	if (utf8_target) {
6284 	    loceol = PL_regeol;
6285 	    while (hardcount < max && scan < loceol &&
6286 		   isSPACE_LC_utf8((U8*)scan)) {
6287 		scan += UTF8SKIP(scan);
6288 		hardcount++;
6289 	    }
6290 	} else {
6291 	    while (scan < loceol && isSPACE_LC(*scan))
6292 		scan++;
6293 	}
6294 	break;
6295     case NSPACEU:
6296 	if (utf8_target) {
6297 
6298     utf8_Nspace:
6299 
6300 	    loceol = PL_regeol;
6301 	    LOAD_UTF8_CHARCLASS_SPACE();
6302 	    while (hardcount < max && scan < loceol &&
6303 		   ! (*scan == ' ' ||
6304                       swash_fetch(PL_utf8_space,(U8*)scan, utf8_target)))
6305             {
6306 		scan += UTF8SKIP(scan);
6307 		hardcount++;
6308 	    }
6309 	    break;
6310 	}
6311 	else {
6312             while (scan < loceol && ! isSPACE_L1((U8) *scan)) {
6313                 scan++;
6314             }
6315 	}
6316 	break;
6317     case NSPACE:
6318 	if (utf8_target)
6319 	    goto utf8_Nspace;
6320 
6321 	while (scan < loceol && ! isSPACE((U8) *scan)) {
6322 	    scan++;
6323 	}
6324 	break;
6325     case NSPACEA:
6326 	if (utf8_target) {
6327 	    while (scan < loceol && ! isSPACE_A((U8) *scan)) {
6328 		scan += UTF8SKIP(scan);
6329 	    }
6330 	}
6331 	else {
6332 	    while (scan < loceol && ! isSPACE_A((U8) *scan)) {
6333 		scan++;
6334 	    }
6335 	}
6336 	break;
6337     case NSPACEL:
6338 	PL_reg_flags |= RF_tainted;
6339 	if (utf8_target) {
6340 	    loceol = PL_regeol;
6341 	    while (hardcount < max && scan < loceol &&
6342 		   !isSPACE_LC_utf8((U8*)scan)) {
6343 		scan += UTF8SKIP(scan);
6344 		hardcount++;
6345 	    }
6346 	} else {
6347 	    while (scan < loceol && !isSPACE_LC(*scan))
6348 		scan++;
6349 	}
6350 	break;
6351     case DIGIT:
6352 	if (utf8_target) {
6353 	    loceol = PL_regeol;
6354 	    LOAD_UTF8_CHARCLASS_DIGIT();
6355 	    while (hardcount < max && scan < loceol &&
6356 		   swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
6357 		scan += UTF8SKIP(scan);
6358 		hardcount++;
6359 	    }
6360 	} else {
6361 	    while (scan < loceol && isDIGIT(*scan))
6362 		scan++;
6363 	}
6364 	break;
6365     case DIGITA:
6366 	while (scan < loceol && isDIGIT_A((U8) *scan)) {
6367 	    scan++;
6368 	}
6369 	break;
6370     case DIGITL:
6371 	PL_reg_flags |= RF_tainted;
6372 	if (utf8_target) {
6373 	    loceol = PL_regeol;
6374 	    while (hardcount < max && scan < loceol &&
6375 		   isDIGIT_LC_utf8((U8*)scan)) {
6376 		scan += UTF8SKIP(scan);
6377 		hardcount++;
6378 	    }
6379 	} else {
6380 	    while (scan < loceol && isDIGIT_LC(*scan))
6381 		scan++;
6382 	}
6383 	break;
6384     case NDIGIT:
6385 	if (utf8_target) {
6386 	    loceol = PL_regeol;
6387 	    LOAD_UTF8_CHARCLASS_DIGIT();
6388 	    while (hardcount < max && scan < loceol &&
6389 		   !swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
6390 		scan += UTF8SKIP(scan);
6391 		hardcount++;
6392 	    }
6393 	} else {
6394 	    while (scan < loceol && !isDIGIT(*scan))
6395 		scan++;
6396 	}
6397 	break;
6398     case NDIGITA:
6399 	if (utf8_target) {
6400 	    while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
6401 		scan += UTF8SKIP(scan);
6402 	    }
6403 	}
6404 	else {
6405 	    while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
6406 		scan++;
6407 	    }
6408 	}
6409 	break;
6410     case NDIGITL:
6411 	PL_reg_flags |= RF_tainted;
6412 	if (utf8_target) {
6413 	    loceol = PL_regeol;
6414 	    while (hardcount < max && scan < loceol &&
6415 		   !isDIGIT_LC_utf8((U8*)scan)) {
6416 		scan += UTF8SKIP(scan);
6417 		hardcount++;
6418 	    }
6419 	} else {
6420 	    while (scan < loceol && !isDIGIT_LC(*scan))
6421 		scan++;
6422 	}
6423 	break;
6424     case LNBREAK:
6425         if (utf8_target) {
6426 	    loceol = PL_regeol;
6427 	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
6428 		scan += c;
6429 		hardcount++;
6430 	    }
6431 	} else {
6432 	    /*
6433 	      LNBREAK can match two latin chars, which is ok,
6434 	      because we have a null terminated string, but we
6435 	      have to use hardcount in this situation
6436 	    */
6437 	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
6438 		scan+=c;
6439 		hardcount++;
6440 	    }
6441 	}
6442 	break;
6443     case HORIZWS:
6444         if (utf8_target) {
6445 	    loceol = PL_regeol;
6446 	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
6447 		scan += c;
6448 		hardcount++;
6449 	    }
6450 	} else {
6451 	    while (scan < loceol && is_HORIZWS_latin1(scan))
6452 		scan++;
6453 	}
6454 	break;
6455     case NHORIZWS:
6456         if (utf8_target) {
6457 	    loceol = PL_regeol;
6458 	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
6459 		scan += UTF8SKIP(scan);
6460 		hardcount++;
6461 	    }
6462 	} else {
6463 	    while (scan < loceol && !is_HORIZWS_latin1(scan))
6464 		scan++;
6465 
6466 	}
6467 	break;
6468     case VERTWS:
6469         if (utf8_target) {
6470 	    loceol = PL_regeol;
6471 	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
6472 		scan += c;
6473 		hardcount++;
6474 	    }
6475 	} else {
6476 	    while (scan < loceol && is_VERTWS_latin1(scan))
6477 		scan++;
6478 
6479 	}
6480 	break;
6481     case NVERTWS:
6482         if (utf8_target) {
6483 	    loceol = PL_regeol;
6484 	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
6485 		scan += UTF8SKIP(scan);
6486 		hardcount++;
6487 	    }
6488 	} else {
6489 	    while (scan < loceol && !is_VERTWS_latin1(scan))
6490 		scan++;
6491 
6492 	}
6493 	break;
6494 
6495     default:		/* Called on something of 0 width. */
6496 	break;		/* So match right here or not at all. */
6497     }
6498 
6499     if (hardcount)
6500 	c = hardcount;
6501     else
6502 	c = scan - PL_reginput;
6503     PL_reginput = scan;
6504 
6505     DEBUG_r({
6506 	GET_RE_DEBUG_FLAGS_DECL;
6507 	DEBUG_EXECUTE_r({
6508 	    SV * const prop = sv_newmortal();
6509 	    regprop(prog, prop, p);
6510 	    PerlIO_printf(Perl_debug_log,
6511 			"%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
6512 			REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
6513 	});
6514     });
6515 
6516     return(c);
6517 }
6518 
6519 
6520 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
6521 /*
6522 - regclass_swash - prepare the utf8 swash.  Wraps the shared core version to
6523 create a copy so that changes the caller makes won't change the shared one
6524  */
6525 SV *
6526 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
6527 {
6528     PERL_ARGS_ASSERT_REGCLASS_SWASH;
6529     return newSVsv(core_regclass_swash(prog, node, doinit, listsvp, altsvp));
6530 }
6531 #endif
6532 
6533 STATIC SV *
6534 S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
6535 {
6536     /* Returns the swash for the input 'node' in the regex 'prog'.
6537      * If <doinit> is true, will attempt to create the swash if not already
6538      *	  done.
6539      * If <listsvp> is non-null, will return the swash initialization string in
6540      *	  it.
6541      * If <altsvp> is non-null, will return the alternates to the regular swash
6542      *	  in it
6543      * Tied intimately to how regcomp.c sets up the data structure */
6544 
6545     dVAR;
6546     SV *sw  = NULL;
6547     SV *si  = NULL;
6548     SV *alt = NULL;
6549     SV*  invlist = NULL;
6550 
6551     RXi_GET_DECL(prog,progi);
6552     const struct reg_data * const data = prog ? progi->data : NULL;
6553 
6554     PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
6555 
6556     assert(ANYOF_NONBITMAP(node));
6557 
6558     if (data && data->count) {
6559 	const U32 n = ARG(node);
6560 
6561 	if (data->what[n] == 's') {
6562 	    SV * const rv = MUTABLE_SV(data->data[n]);
6563 	    AV * const av = MUTABLE_AV(SvRV(rv));
6564 	    SV **const ary = AvARRAY(av);
6565 	    bool invlist_has_user_defined_property;
6566 
6567 	    si = *ary;	/* ary[0] = the string to initialize the swash with */
6568 
6569 	    /* Elements 3 and 4 are either both present or both absent. [3] is
6570 	     * any inversion list generated at compile time; [4] indicates if
6571 	     * that inversion list has any user-defined properties in it. */
6572 	    if (av_len(av) >= 3) {
6573 		invlist = ary[3];
6574 		invlist_has_user_defined_property = cBOOL(SvUV(ary[4]));
6575 	    }
6576 	    else {
6577 		invlist = NULL;
6578 		invlist_has_user_defined_property = FALSE;
6579 	    }
6580 
6581 	    /* Element [1] is reserved for the set-up swash.  If already there,
6582 	     * return it; if not, create it and store it there */
6583 	    if (SvROK(ary[1])) {
6584 		sw = ary[1];
6585 	    }
6586 	    else if (si && doinit) {
6587 
6588 		sw = _core_swash_init("utf8", /* the utf8 package */
6589 				      "", /* nameless */
6590 				      si,
6591 				      1, /* binary */
6592 				      0, /* not from tr/// */
6593 				      FALSE, /* is error if can't find
6594 						property */
6595 				      invlist,
6596 				      invlist_has_user_defined_property);
6597 		(void)av_store(av, 1, sw);
6598 	    }
6599 
6600 	    /* Element [2] is for any multi-char folds.  Note that is a
6601 	     * fundamentally flawed design, because can't backtrack and try
6602 	     * again.  See [perl #89774] */
6603 	    if (SvTYPE(ary[2]) == SVt_PVAV) {
6604 	        alt = ary[2];
6605 	    }
6606 	}
6607     }
6608 
6609     if (listsvp) {
6610 	SV* matches_string = newSVpvn("", 0);
6611 	SV** invlistsvp;
6612 
6613 	/* Use the swash, if any, which has to have incorporated into it all
6614 	 * possibilities */
6615 	if (   sw
6616 	    && SvROK(sw)
6617 	    && SvTYPE(SvRV(sw)) == SVt_PVHV
6618 	    && (invlistsvp = hv_fetchs(MUTABLE_HV(SvRV(sw)), "INVLIST", FALSE)))
6619 	{
6620 	    invlist = *invlistsvp;
6621 	}
6622 	else if (si && si != &PL_sv_undef) {
6623 
6624 	    /* If no swash, use the input nitialization string, if available */
6625 	    sv_catsv(matches_string, si);
6626 	}
6627 
6628 	/* Add the inversion list to whatever we have.  This may have come from
6629 	 * the swash, or from an input parameter */
6630 	if (invlist) {
6631 	    sv_catsv(matches_string, _invlist_contents(invlist));
6632 	}
6633 	*listsvp = matches_string;
6634     }
6635 
6636     if (altsvp)
6637 	*altsvp  = alt;
6638 
6639     return sw;
6640 }
6641 
6642 /*
6643  - reginclass - determine if a character falls into a character class
6644 
6645   n is the ANYOF regnode
6646   p is the target string
6647   lenp is pointer to the maximum number of bytes of how far to go in p
6648     (This is assumed wthout checking to always be at least the current
6649     character's size)
6650   utf8_target tells whether p is in UTF-8.
6651 
6652   Returns true if matched; false otherwise.  If lenp is not NULL, on return
6653   from a successful match, the value it points to will be updated to how many
6654   bytes in p were matched.  If there was no match, the value is undefined,
6655   possibly changed from the input.
6656 
6657   Note that this can be a synthetic start class, a combination of various
6658   nodes, so things you think might be mutually exclusive, such as locale,
6659   aren't.  It can match both locale and non-locale
6660 
6661  */
6662 
6663 STATIC bool
6664 S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
6665 {
6666     dVAR;
6667     const char flags = ANYOF_FLAGS(n);
6668     bool match = FALSE;
6669     UV c = *p;
6670     STRLEN c_len = 0;
6671     STRLEN maxlen;
6672 
6673     PERL_ARGS_ASSERT_REGINCLASS;
6674 
6675     /* If c is not already the code point, get it */
6676     if (utf8_target && !UTF8_IS_INVARIANT(c)) {
6677 	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
6678 		(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
6679 		| UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
6680 		/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
6681 		 * UTF8_ALLOW_FFFF */
6682 	if (c_len == (STRLEN)-1)
6683 	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
6684     }
6685     else {
6686 	c_len = 1;
6687     }
6688 
6689     /* Use passed in max length, or one character if none passed in or less
6690      * than one character.  And assume will match just one character.  This is
6691      * overwritten later if matched more. */
6692     if (lenp) {
6693 	maxlen = (*lenp > c_len) ? *lenp : c_len;
6694 	*lenp = c_len;
6695 
6696     }
6697     else {
6698 	maxlen = c_len;
6699     }
6700 
6701     /* If this character is potentially in the bitmap, check it */
6702     if (c < 256) {
6703 	if (ANYOF_BITMAP_TEST(n, c))
6704 	    match = TRUE;
6705 	else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
6706 		&& ! utf8_target
6707 		&& ! isASCII(c))
6708 	{
6709 	    match = TRUE;
6710 	}
6711 
6712 	else if (flags & ANYOF_LOCALE) {
6713 	    PL_reg_flags |= RF_tainted;
6714 
6715 	    if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
6716 		 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
6717 	    {
6718 		match = TRUE;
6719 	    }
6720 	    else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
6721 		     ((ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
6722 		      (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
6723 		      (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
6724 		      (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
6725 		      (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
6726 		      (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
6727 		      (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
6728 		      (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
6729 		      (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
6730 		      (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
6731 		      (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII_LC(c))  ||
6732 		      (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII_LC(c))  ||
6733 		      (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
6734 		      (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
6735 		      (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
6736 		      (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
6737 		      (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
6738 		      (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
6739 		      (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
6740 		      (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
6741 		      (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
6742 		      (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
6743 		      (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
6744 		      (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
6745 		      (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
6746 		      (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
6747 		      (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
6748 		      (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
6749 		      (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK_LC(c))  ||
6750 		      (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK_LC(c))
6751 		     ) /* How's that for a conditional? */
6752 	    ) {
6753 		match = TRUE;
6754 	    }
6755 	}
6756     }
6757 
6758     /* If the bitmap didn't (or couldn't) match, and something outside the
6759      * bitmap could match, try that.  Locale nodes specifiy completely the
6760      * behavior of code points in the bit map (otherwise, a utf8 target would
6761      * cause them to be treated as Unicode and not locale), except in
6762      * the very unlikely event when this node is a synthetic start class, which
6763      * could be a combination of locale and non-locale nodes.  So allow locale
6764      * to match for the synthetic start class, which will give a false
6765      * positive that will be resolved when the match is done again as not part
6766      * of the synthetic start class */
6767     if (!match) {
6768 	if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
6769 	    match = TRUE;	/* Everything above 255 matches */
6770 	}
6771 	else if (ANYOF_NONBITMAP(n)
6772 		 && ((flags & ANYOF_NONBITMAP_NON_UTF8)
6773 		     || (utf8_target
6774 		         && (c >=256
6775 			     || (! (flags & ANYOF_LOCALE))
6776 			     || (flags & ANYOF_IS_SYNTHETIC)))))
6777 	{
6778 	    AV *av;
6779 	    SV * const sw = core_regclass_swash(prog, n, TRUE, 0, (SV**)&av);
6780 
6781 	    if (sw) {
6782 		U8 * utf8_p;
6783 		if (utf8_target) {
6784 		    utf8_p = (U8 *) p;
6785 		} else {
6786 
6787 		    /* Not utf8.  Convert as much of the string as available up
6788 		     * to the limit of how far the (single) character in the
6789 		     * pattern can possibly match (no need to go further).  If
6790 		     * the node is a straight ANYOF or not folding, it can't
6791 		     * match more than one.  Otherwise, It can match up to how
6792 		     * far a single char can fold to.  Since not utf8, each
6793 		     * character is a single byte, so the max it can be in
6794 		     * bytes is the same as the max it can be in characters */
6795 		    STRLEN len = (OP(n) == ANYOF
6796 				  || ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
6797 				  ? 1
6798 				  : (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
6799 				    ? maxlen
6800 				    : UTF8_MAX_FOLD_CHAR_EXPAND;
6801 		    utf8_p = bytes_to_utf8(p, &len);
6802 		}
6803 
6804 		if (swash_fetch(sw, utf8_p, TRUE))
6805 		    match = TRUE;
6806 		else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
6807 
6808 		    /* Here, we need to test if the fold of the target string
6809 		     * matches.  The non-multi char folds have all been moved to
6810                      * the compilation phase, and the multi-char folds have
6811                      * been stored by regcomp into 'av'; we linearly check to
6812                      * see if any match the target string (folded).   We know
6813                      * that the originals were each one character, but we don't
6814                      * currently know how many characters/bytes each folded to,
6815                      * except we do know that there are small limits imposed by
6816                      * Unicode.  XXX A performance enhancement would be to have
6817                      * regcomp.c store the max number of chars/bytes that are
6818                      * in an av entry, as, say the 0th element.  Even better
6819                      * would be to have a hash of the few characters that can
6820                      * start a multi-char fold to the max number of chars of
6821                      * those folds.
6822 		     *
6823 		     * If there is a match, we will need to advance (if lenp is
6824 		     * specified) the match pointer in the target string.  But
6825 		     * what we are comparing here isn't that string directly,
6826 		     * but its fold, whose length may differ from the original.
6827 		     * As we go along in constructing the fold, therefore, we
6828 		     * create a map so that we know how many bytes in the
6829 		     * source to advance given that we have matched a certain
6830 		     * number of bytes in the fold.  This map is stored in
6831 		     * 'map_fold_len_back'.  Let n mean the number of bytes in
6832 		     * the fold of the first character that we are folding.
6833 		     * Then map_fold_len_back[n] is set to the number of bytes
6834 		     * in that first character.  Similarly let m be the
6835 		     * corresponding number for the second character to be
6836 		     * folded.  Then map_fold_len_back[n+m] is set to the
6837 		     * number of bytes occupied by the first two source
6838 		     * characters. ... */
6839 		    U8 map_fold_len_back[UTF8_MAXBYTES_CASE+1] = { 0 };
6840 		    U8 folded[UTF8_MAXBYTES_CASE+1];
6841 		    STRLEN foldlen = 0; /* num bytes in fold of 1st char */
6842 		    STRLEN total_foldlen = 0; /* num bytes in fold of all
6843 						  chars */
6844 
6845 		    if (OP(n) == ANYOF || maxlen == 1 || ! lenp || ! av) {
6846 
6847 			/* Here, only need to fold the first char of the target
6848 			 * string.  It the source wasn't utf8, is 1 byte long */
6849 			to_utf8_fold(utf8_p, folded, &foldlen);
6850 			total_foldlen = foldlen;
6851 			map_fold_len_back[foldlen] = (utf8_target)
6852 						     ? UTF8SKIP(utf8_p)
6853 						     : 1;
6854 		    }
6855 		    else {
6856 
6857 			/* Here, need to fold more than the first char.  Do so
6858 			 * up to the limits */
6859 			U8* source_ptr = utf8_p;    /* The source for the fold
6860 						       is the regex target
6861 						       string */
6862 			U8* folded_ptr = folded;
6863 			U8* e = utf8_p + maxlen;    /* Can't go beyond last
6864 						       available byte in the
6865 						       target string */
6866 			U8 i;
6867 			for (i = 0;
6868 			     i < UTF8_MAX_FOLD_CHAR_EXPAND && source_ptr < e;
6869 			     i++)
6870 			{
6871 
6872 			    /* Fold the next character */
6873 			    U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
6874 			    STRLEN this_char_foldlen;
6875 			    to_utf8_fold(source_ptr,
6876 				         this_char_folded,
6877 					 &this_char_foldlen);
6878 
6879 			    /* Bail if it would exceed the byte limit for
6880 			     * folding a single char. */
6881 			    if (this_char_foldlen + folded_ptr - folded >
6882 							    UTF8_MAXBYTES_CASE)
6883 			    {
6884 				break;
6885 			    }
6886 
6887 			    /* Add the fold of this character */
6888 			    Copy(this_char_folded,
6889 				 folded_ptr,
6890 				 this_char_foldlen,
6891 				 U8);
6892 			    source_ptr += UTF8SKIP(source_ptr);
6893 			    folded_ptr += this_char_foldlen;
6894 			    total_foldlen = folded_ptr - folded;
6895 
6896 			    /* Create map from the number of bytes in the fold
6897 			     * back to the number of bytes in the source.  If
6898 			     * the source isn't utf8, the byte count is just
6899 			     * the number of characters so far */
6900 			    map_fold_len_back[total_foldlen]
6901 						      = (utf8_target)
6902 							? source_ptr - utf8_p
6903 							: i + 1;
6904 			}
6905 			*folded_ptr = '\0';
6906 		    }
6907 
6908 
6909 		    /* Do the linear search to see if the fold is in the list
6910 		     * of multi-char folds. */
6911 		    if (av) {
6912 		        I32 i;
6913 			for (i = 0; i <= av_len(av); i++) {
6914 			    SV* const sv = *av_fetch(av, i, FALSE);
6915 			    STRLEN len;
6916 			    const char * const s = SvPV_const(sv, len);
6917 
6918 			    if (len <= total_foldlen
6919 				&& memEQ(s, (char*)folded, len)
6920 
6921 				   /* If 0, means matched a partial char. See
6922 				    * [perl #90536] */
6923 				&& map_fold_len_back[len])
6924 			    {
6925 
6926 				/* Advance the target string ptr to account for
6927 				 * this fold, but have to translate from the
6928 				 * folded length to the corresponding source
6929 				 * length. */
6930 				if (lenp) {
6931 				    *lenp = map_fold_len_back[len];
6932 				}
6933 				match = TRUE;
6934 				break;
6935 			    }
6936 			}
6937 		    }
6938 		}
6939 
6940 		/* If we allocated a string above, free it */
6941 		if (! utf8_target) Safefree(utf8_p);
6942 	    }
6943 	}
6944     }
6945 
6946     return (flags & ANYOF_INVERT) ? !match : match;
6947 }
6948 
6949 STATIC U8 *
6950 S_reghop3(U8 *s, I32 off, const U8* lim)
6951 {
6952     /* return the position 'off' UTF-8 characters away from 's', forward if
6953      * 'off' >= 0, backwards if negative.  But don't go outside of position
6954      * 'lim', which better be < s  if off < 0 */
6955 
6956     dVAR;
6957 
6958     PERL_ARGS_ASSERT_REGHOP3;
6959 
6960     if (off >= 0) {
6961 	while (off-- && s < lim) {
6962 	    /* XXX could check well-formedness here */
6963 	    s += UTF8SKIP(s);
6964 	}
6965     }
6966     else {
6967         while (off++ && s > lim) {
6968             s--;
6969             if (UTF8_IS_CONTINUED(*s)) {
6970                 while (s > lim && UTF8_IS_CONTINUATION(*s))
6971                     s--;
6972 	    }
6973             /* XXX could check well-formedness here */
6974 	}
6975     }
6976     return s;
6977 }
6978 
6979 #ifdef XXX_dmq
6980 /* there are a bunch of places where we use two reghop3's that should
6981    be replaced with this routine. but since thats not done yet
6982    we ifdef it out - dmq
6983 */
6984 STATIC U8 *
6985 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
6986 {
6987     dVAR;
6988 
6989     PERL_ARGS_ASSERT_REGHOP4;
6990 
6991     if (off >= 0) {
6992         while (off-- && s < rlim) {
6993             /* XXX could check well-formedness here */
6994             s += UTF8SKIP(s);
6995         }
6996     }
6997     else {
6998         while (off++ && s > llim) {
6999             s--;
7000             if (UTF8_IS_CONTINUED(*s)) {
7001                 while (s > llim && UTF8_IS_CONTINUATION(*s))
7002                     s--;
7003             }
7004             /* XXX could check well-formedness here */
7005         }
7006     }
7007     return s;
7008 }
7009 #endif
7010 
7011 STATIC U8 *
7012 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
7013 {
7014     dVAR;
7015 
7016     PERL_ARGS_ASSERT_REGHOPMAYBE3;
7017 
7018     if (off >= 0) {
7019 	while (off-- && s < lim) {
7020 	    /* XXX could check well-formedness here */
7021 	    s += UTF8SKIP(s);
7022 	}
7023 	if (off >= 0)
7024 	    return NULL;
7025     }
7026     else {
7027         while (off++ && s > lim) {
7028             s--;
7029             if (UTF8_IS_CONTINUED(*s)) {
7030                 while (s > lim && UTF8_IS_CONTINUATION(*s))
7031                     s--;
7032 	    }
7033             /* XXX could check well-formedness here */
7034 	}
7035 	if (off <= 0)
7036 	    return NULL;
7037     }
7038     return s;
7039 }
7040 
7041 static void
7042 restore_pos(pTHX_ void *arg)
7043 {
7044     dVAR;
7045     regexp * const rex = (regexp *)arg;
7046     if (PL_reg_eval_set) {
7047 	if (PL_reg_oldsaved) {
7048 	    rex->subbeg = PL_reg_oldsaved;
7049 	    rex->sublen = PL_reg_oldsavedlen;
7050 #ifdef PERL_OLD_COPY_ON_WRITE
7051 	    rex->saved_copy = PL_nrs;
7052 #endif
7053 	    RXp_MATCH_COPIED_on(rex);
7054 	}
7055 	PL_reg_magic->mg_len = PL_reg_oldpos;
7056 	PL_reg_eval_set = 0;
7057 	PL_curpm = PL_reg_oldcurpm;
7058     }
7059 }
7060 
7061 STATIC void
7062 S_to_utf8_substr(pTHX_ register regexp *prog)
7063 {
7064     int i = 1;
7065 
7066     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
7067 
7068     do {
7069 	if (prog->substrs->data[i].substr
7070 	    && !prog->substrs->data[i].utf8_substr) {
7071 	    SV* const sv = newSVsv(prog->substrs->data[i].substr);
7072 	    prog->substrs->data[i].utf8_substr = sv;
7073 	    sv_utf8_upgrade(sv);
7074 	    if (SvVALID(prog->substrs->data[i].substr)) {
7075 		if (SvTAIL(prog->substrs->data[i].substr)) {
7076 		    /* Trim the trailing \n that fbm_compile added last
7077 		       time.  */
7078 		    SvCUR_set(sv, SvCUR(sv) - 1);
7079 		    /* Whilst this makes the SV technically "invalid" (as its
7080 		       buffer is no longer followed by "\0") when fbm_compile()
7081 		       adds the "\n" back, a "\0" is restored.  */
7082 		    fbm_compile(sv, FBMcf_TAIL);
7083 		} else
7084 		    fbm_compile(sv, 0);
7085 	    }
7086 	    if (prog->substrs->data[i].substr == prog->check_substr)
7087 		prog->check_utf8 = sv;
7088 	}
7089     } while (i--);
7090 }
7091 
7092 STATIC void
7093 S_to_byte_substr(pTHX_ register regexp *prog)
7094 {
7095     dVAR;
7096     int i = 1;
7097 
7098     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
7099 
7100     do {
7101 	if (prog->substrs->data[i].utf8_substr
7102 	    && !prog->substrs->data[i].substr) {
7103 	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
7104 	    if (sv_utf8_downgrade(sv, TRUE)) {
7105 		if (SvVALID(prog->substrs->data[i].utf8_substr)) {
7106 		    if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
7107 			/* Trim the trailing \n that fbm_compile added last
7108 			   time.  */
7109 			SvCUR_set(sv, SvCUR(sv) - 1);
7110 			fbm_compile(sv, FBMcf_TAIL);
7111 		    } else
7112 			fbm_compile(sv, 0);
7113 		}
7114 	    } else {
7115 		SvREFCNT_dec(sv);
7116 		sv = &PL_sv_undef;
7117 	    }
7118 	    prog->substrs->data[i].substr = sv;
7119 	    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
7120 		prog->check_substr = sv;
7121 	}
7122     } while (i--);
7123 }
7124 
7125 /*
7126  * Local variables:
7127  * c-indentation-style: bsd
7128  * c-basic-offset: 4
7129  * indent-tabs-mode: t
7130  * End:
7131  *
7132  * ex: set ts=8 sts=4 sw=4 noet:
7133  */
7134