xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision ac9b4aacc1da35008afea06a5d23c2f2dea9b93e)
1 /*    regexec.c
2  */
3 
4 /*
5  * 	One Ring to rule them all, One Ring to find them
6  &
7  *     [p.v of _The Lord of the Rings_, opening poem]
8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
10  */
11 
12 /* This file contains functions for executing a regular expression.  See
13  * also regcomp.c which funnily enough, contains functions for compiling
14  * a regular expression.
15  *
16  * This file is also copied at build time to ext/re/re_exec.c, where
17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
18  * This causes the main functions to be compiled under new names and with
19  * debugging support added, which makes "use re 'debug'" work.
20  */
21 
22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
23  * confused with the original package (see point 3 below).  Thanks, Henry!
24  */
25 
26 /* Additional note: this code is very heavily munged from Henry's version
27  * in places.  In some spots I've traded clarity for efficiency, so don't
28  * blame Henry for some of the lack of readability.
29  */
30 
31 /* The names of the functions have been changed from regcomp and
32  * regexec to  pregcomp and pregexec in order to avoid conflicts
33  * with the POSIX routines of the same names.
34 */
35 
36 #ifdef PERL_EXT_RE_BUILD
37 #include "re_top.h"
38 #endif
39 
40 /*
41  * pregcomp and pregexec -- regsub and regerror are not used in perl
42  *
43  *	Copyright (c) 1986 by University of Toronto.
44  *	Written by Henry Spencer.  Not derived from licensed software.
45  *
46  *	Permission is granted to anyone to use this software for any
47  *	purpose on any computer system, and to redistribute it freely,
48  *	subject to the following restrictions:
49  *
50  *	1. The author is not responsible for the consequences of use of
51  *		this software, no matter how awful, even if they arise
52  *		from defects in it.
53  *
54  *	2. The origin of this software must not be misrepresented, either
55  *		by explicit claim or by omission.
56  *
57  *	3. Altered versions must be plainly marked as such, and must not
58  *		be misrepresented as being the original software.
59  *
60  ****    Alterations to Henry's code are...
61  ****
62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
64  ****    by Larry Wall and others
65  ****
66  ****    You may distribute under the terms of either the GNU General Public
67  ****    License or the Artistic License, as specified in the README file.
68  *
69  * Beware that some of this code is subtly aware of the way operator
70  * precedence is structured in regular expressions.  Serious changes in
71  * regular-expression syntax might require a total rethink.
72  */
73 #include "EXTERN.h"
74 #define PERL_IN_REGEXEC_C
75 #include "perl.h"
76 
77 #ifdef PERL_IN_XSUB_RE
78 #  include "re_comp.h"
79 #else
80 #  include "regcomp.h"
81 #endif
82 
83 #define RF_tainted	1		/* tainted information used? */
84 #define RF_warned	2		/* warned about big count? */
85 
86 #define RF_utf8		8		/* Pattern contains multibyte chars? */
87 
88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
89 
90 #define RS_init		1		/* eval environment created */
91 #define RS_set		2		/* replsv value is set */
92 
93 #ifndef STATIC
94 #define	STATIC	static
95 #endif
96 
97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
98 
99 /*
100  * Forwards.
101  */
102 
103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
105 
106 #define HOPc(pos,off) \
107 	(char *)(PL_reg_match_utf8 \
108 	    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
109 	    : (U8*)(pos + off))
110 #define HOPBACKc(pos, off) \
111 	(char*)(PL_reg_match_utf8\
112 	    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
113 	    : (pos - off >= PL_bostr)		\
114 		? (U8*)pos - off		\
115 		: NULL)
116 
117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
119 
120 /* these are unrolled below in the CCC_TRY_XXX defined */
121 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
122     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
123 
124 /* Doesn't do an assert to verify that is correct */
125 #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
126     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)" "); LEAVE; } } STMT_END
127 
128 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
129 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
130 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
131 
132 #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
133 	LOAD_UTF8_CHARCLASS(X_begin, " ");                                  \
134 	LOAD_UTF8_CHARCLASS(X_non_hangul, "A");                             \
135 	/* These are utf8 constants, and not utf-ebcdic constants, so the   \
136 	    * assert should likely and hopefully fail on an EBCDIC machine */ \
137 	LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */             \
138 									    \
139 	/* No asserts are done for these, in case called on an early        \
140 	    * Unicode version in which they map to nothing */               \
141 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
142 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_L);	    /* U+1100 "\xe1\x84\x80" */ \
143 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV);     /* U+AC00 "\xea\xb0\x80" */ \
144 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT);    /* U+AC01 "\xea\xb0\x81" */ \
145 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
146 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_T);      /* U+11A8 "\xe1\x86\xa8" */ \
147 	LOAD_UTF8_CHARCLASS_NO_CHECK(X_V)       /* U+1160 "\xe1\x85\xa0" */
148 
149 /*
150    We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
151    so that it is possible to override the option here without having to
152    rebuild the entire core. as we are required to do if we change regcomp.h
153    which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
154 */
155 #if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
156 #define BROKEN_UNICODE_CHARCLASS_MAPPINGS
157 #endif
158 
159 #ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
160 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
161 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
162 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
163 #define RE_utf8_perl_word   PL_utf8_alnum
164 #define RE_utf8_perl_space  PL_utf8_space
165 #define RE_utf8_posix_digit PL_utf8_digit
166 #define perl_word  alnum
167 #define perl_space space
168 #define posix_digit digit
169 #else
170 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
171 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
172 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
173 #define RE_utf8_perl_word   PL_utf8_perl_word
174 #define RE_utf8_perl_space  PL_utf8_perl_space
175 #define RE_utf8_posix_digit PL_utf8_posix_digit
176 #endif
177 
178 
179 #define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
180         case NAMEL:                                                              \
181             PL_reg_flags |= RF_tainted;                                                 \
182             /* FALL THROUGH */                                                          \
183         case NAME:                                                                     \
184             if (!nextchr)                                                               \
185                 sayNO;                                                                  \
186             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
187                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
188                     bool ok;                                                            \
189                     ENTER;                                                              \
190                     save_re_context();                                                  \
191                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
192                     assert(ok);                                                         \
193                     LEAVE;                                                              \
194                 }                                                                       \
195                 if (!(OP(scan) == NAME                                                  \
196                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
197                     : LCFUNC_utf8((U8*)locinput)))                                      \
198                 {                                                                       \
199                     sayNO;                                                              \
200                 }                                                                       \
201                 locinput += PL_utf8skip[nextchr];                                       \
202                 nextchr = UCHARAT(locinput);                                            \
203                 break;                                                                  \
204             }                                                                           \
205             if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
206                 sayNO;                                                                  \
207             nextchr = UCHARAT(++locinput);                                              \
208             break
209 
210 #define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
211         case NAMEL:                                                              \
212             PL_reg_flags |= RF_tainted;                                                 \
213             /* FALL THROUGH */                                                          \
214         case NAME :                                                                     \
215             if (!nextchr && locinput >= PL_regeol)                                      \
216                 sayNO;                                                                  \
217             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
218                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
219                     bool ok;                                                            \
220                     ENTER;                                                              \
221                     save_re_context();                                                  \
222                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
223                     assert(ok);                                                         \
224                     LEAVE;                                                              \
225                 }                                                                       \
226                 if ((OP(scan) == NAME                                                  \
227                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
228                     : LCFUNC_utf8((U8*)locinput)))                                      \
229                 {                                                                       \
230                     sayNO;                                                              \
231                 }                                                                       \
232                 locinput += PL_utf8skip[nextchr];                                       \
233                 nextchr = UCHARAT(locinput);                                            \
234                 break;                                                                  \
235             }                                                                           \
236             if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
237                 sayNO;                                                                  \
238             nextchr = UCHARAT(++locinput);                                              \
239             break
240 
241 
242 
243 
244 
245 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
246 
247 /* for use after a quantifier and before an EXACT-like node -- japhy */
248 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
249 #define JUMPABLE(rn) (      \
250     OP(rn) == OPEN ||       \
251     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
252     OP(rn) == EVAL ||   \
253     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
254     OP(rn) == PLUS || OP(rn) == MINMOD || \
255     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
256     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
257 )
258 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
259 
260 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
261 
262 #if 0
263 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
264    we don't need this definition. */
265 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
266 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
267 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
268 
269 #else
270 /* ... so we use this as its faster. */
271 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
272 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
273 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
274 
275 #endif
276 
277 /*
278   Search for mandatory following text node; for lookahead, the text must
279   follow but for lookbehind (rn->flags != 0) we skip to the next step.
280 */
281 #define FIND_NEXT_IMPT(rn) STMT_START { \
282     while (JUMPABLE(rn)) { \
283 	const OPCODE type = OP(rn); \
284 	if (type == SUSPEND || PL_regkind[type] == CURLY) \
285 	    rn = NEXTOPER(NEXTOPER(rn)); \
286 	else if (type == PLUS) \
287 	    rn = NEXTOPER(rn); \
288 	else if (type == IFMATCH) \
289 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
290 	else rn += NEXT_OFF(rn); \
291     } \
292 } STMT_END
293 
294 
295 static void restore_pos(pTHX_ void *arg);
296 
297 STATIC CHECKPOINT
298 S_regcppush(pTHX_ I32 parenfloor)
299 {
300     dVAR;
301     const int retval = PL_savestack_ix;
302 #define REGCP_PAREN_ELEMS 4
303     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
304     int p;
305     GET_RE_DEBUG_FLAGS_DECL;
306 
307     if (paren_elems_to_push < 0)
308 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
309 
310 #define REGCP_OTHER_ELEMS 7
311     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
312 
313     for (p = PL_regsize; p > parenfloor; p--) {
314 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
315 	SSPUSHINT(PL_regoffs[p].end);
316 	SSPUSHINT(PL_regoffs[p].start);
317 	SSPUSHPTR(PL_reg_start_tmp[p]);
318 	SSPUSHINT(p);
319 	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
320 	  "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
321 		      (UV)p, (IV)PL_regoffs[p].start,
322 		      (IV)(PL_reg_start_tmp[p] - PL_bostr),
323 		      (IV)PL_regoffs[p].end
324 	));
325     }
326 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
327     SSPUSHPTR(PL_regoffs);
328     SSPUSHINT(PL_regsize);
329     SSPUSHINT(*PL_reglastparen);
330     SSPUSHINT(*PL_reglastcloseparen);
331     SSPUSHPTR(PL_reginput);
332 #define REGCP_FRAME_ELEMS 2
333 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
334  * are needed for the regexp context stack bookkeeping. */
335     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
336     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
337 
338     return retval;
339 }
340 
341 /* These are needed since we do not localize EVAL nodes: */
342 #define REGCP_SET(cp)                                           \
343     DEBUG_STATE_r(                                              \
344             PerlIO_printf(Perl_debug_log,		        \
345 	        "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
346 	        (IV)PL_savestack_ix));                          \
347     cp = PL_savestack_ix
348 
349 #define REGCP_UNWIND(cp)                                        \
350     DEBUG_STATE_r(                                              \
351         if (cp != PL_savestack_ix) 		                \
352     	    PerlIO_printf(Perl_debug_log,		        \
353 		"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
354 	        (IV)(cp), (IV)PL_savestack_ix));                \
355     regcpblow(cp)
356 
357 STATIC char *
358 S_regcppop(pTHX_ const regexp *rex)
359 {
360     dVAR;
361     U32 i;
362     char *input;
363     GET_RE_DEBUG_FLAGS_DECL;
364 
365     PERL_ARGS_ASSERT_REGCPPOP;
366 
367     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
368     i = SSPOPINT;
369     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
370     i = SSPOPINT; /* Parentheses elements to pop. */
371     input = (char *) SSPOPPTR;
372     *PL_reglastcloseparen = SSPOPINT;
373     *PL_reglastparen = SSPOPINT;
374     PL_regsize = SSPOPINT;
375     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
376 
377 
378     /* Now restore the parentheses context. */
379     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
380 	 i > 0; i -= REGCP_PAREN_ELEMS) {
381 	I32 tmps;
382 	U32 paren = (U32)SSPOPINT;
383 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
384 	PL_regoffs[paren].start = SSPOPINT;
385 	tmps = SSPOPINT;
386 	if (paren <= *PL_reglastparen)
387 	    PL_regoffs[paren].end = tmps;
388 	DEBUG_BUFFERS_r(
389 	    PerlIO_printf(Perl_debug_log,
390 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
391 			  (UV)paren, (IV)PL_regoffs[paren].start,
392 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
393 			  (IV)PL_regoffs[paren].end,
394 			  (paren > *PL_reglastparen ? "(no)" : ""));
395 	);
396     }
397     DEBUG_BUFFERS_r(
398 	if (*PL_reglastparen + 1 <= rex->nparens) {
399 	    PerlIO_printf(Perl_debug_log,
400 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
401 			  (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
402 	}
403     );
404 #if 1
405     /* It would seem that the similar code in regtry()
406      * already takes care of this, and in fact it is in
407      * a better location to since this code can #if 0-ed out
408      * but the code in regtry() is needed or otherwise tests
409      * requiring null fields (pat.t#187 and split.t#{13,14}
410      * (as of patchlevel 7877)  will fail.  Then again,
411      * this code seems to be necessary or otherwise
412      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
413      * --jhi updated by dapm */
414     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
415 	if (i > PL_regsize)
416 	    PL_regoffs[i].start = -1;
417 	PL_regoffs[i].end = -1;
418     }
419 #endif
420     return input;
421 }
422 
423 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
424 
425 /*
426  * pregexec and friends
427  */
428 
429 #ifndef PERL_IN_XSUB_RE
430 /*
431  - pregexec - match a regexp against a string
432  */
433 I32
434 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
435 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
436 /* strend: pointer to null at end of string */
437 /* strbeg: real beginning of string */
438 /* minend: end of match must be >=minend after stringarg. */
439 /* nosave: For optimizations. */
440 {
441     PERL_ARGS_ASSERT_PREGEXEC;
442 
443     return
444 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
445 		      nosave ? 0 : REXEC_COPY_STR);
446 }
447 #endif
448 
449 /*
450  * Need to implement the following flags for reg_anch:
451  *
452  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
453  * USE_INTUIT_ML
454  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
455  * INTUIT_AUTORITATIVE_ML
456  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
457  * INTUIT_ONCE_ML
458  *
459  * Another flag for this function: SECOND_TIME (so that float substrs
460  * with giant delta may be not rechecked).
461  */
462 
463 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
464 
465 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
466    Otherwise, only SvCUR(sv) is used to get strbeg. */
467 
468 /* XXXX We assume that strpos is strbeg unless sv. */
469 
470 /* XXXX Some places assume that there is a fixed substring.
471 	An update may be needed if optimizer marks as "INTUITable"
472 	RExen without fixed substrings.  Similarly, it is assumed that
473 	lengths of all the strings are no more than minlen, thus they
474 	cannot come from lookahead.
475 	(Or minlen should take into account lookahead.)
476   NOTE: Some of this comment is not correct. minlen does now take account
477   of lookahead/behind. Further research is required. -- demerphq
478 
479 */
480 
481 /* A failure to find a constant substring means that there is no need to make
482    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
483    finding a substring too deep into the string means that less calls to
484    regtry() should be needed.
485 
486    REx compiler's optimizer found 4 possible hints:
487 	a) Anchored substring;
488 	b) Fixed substring;
489 	c) Whether we are anchored (beginning-of-line or \G);
490 	d) First node (of those at offset 0) which may distingush positions;
491    We use a)b)d) and multiline-part of c), and try to find a position in the
492    string which does not contradict any of them.
493  */
494 
495 /* Most of decisions we do here should have been done at compile time.
496    The nodes of the REx which we used for the search should have been
497    deleted from the finite automaton. */
498 
499 char *
500 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
501 		     char *strend, const U32 flags, re_scream_pos_data *data)
502 {
503     dVAR;
504     struct regexp *const prog = (struct regexp *)SvANY(rx);
505     register I32 start_shift = 0;
506     /* Should be nonnegative! */
507     register I32 end_shift   = 0;
508     register char *s;
509     register SV *check;
510     char *strbeg;
511     char *t;
512     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
513     I32 ml_anch;
514     register char *other_last = NULL;	/* other substr checked before this */
515     char *check_at = NULL;		/* check substr found at this pos */
516     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
517     RXi_GET_DECL(prog,progi);
518 #ifdef DEBUGGING
519     const char * const i_strpos = strpos;
520 #endif
521     GET_RE_DEBUG_FLAGS_DECL;
522 
523     PERL_ARGS_ASSERT_RE_INTUIT_START;
524 
525     RX_MATCH_UTF8_set(rx,do_utf8);
526 
527     if (RX_UTF8(rx)) {
528 	PL_reg_flags |= RF_utf8;
529     }
530     DEBUG_EXECUTE_r(
531         debug_start_match(rx, do_utf8, strpos, strend,
532             sv ? "Guessing start of match in sv for"
533                : "Guessing start of match in string for");
534 	      );
535 
536     /* CHR_DIST() would be more correct here but it makes things slow. */
537     if (prog->minlen > strend - strpos) {
538 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
539 			      "String too short... [re_intuit_start]\n"));
540 	goto fail;
541     }
542 
543     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
544     PL_regeol = strend;
545     if (do_utf8) {
546 	if (!prog->check_utf8 && prog->check_substr)
547 	    to_utf8_substr(prog);
548 	check = prog->check_utf8;
549     } else {
550 	if (!prog->check_substr && prog->check_utf8)
551 	    to_byte_substr(prog);
552 	check = prog->check_substr;
553     }
554     if (check == &PL_sv_undef) {
555 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
556 		"Non-utf8 string cannot match utf8 check string\n"));
557 	goto fail;
558     }
559     if (prog->extflags & RXf_ANCH) {	/* Match at beg-of-str or after \n */
560 	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
561 		     || ( (prog->extflags & RXf_ANCH_BOL)
562 			  && !multiline ) );	/* Check after \n? */
563 
564 	if (!ml_anch) {
565 	  if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
566 		&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
567 	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
568 	       && sv && !SvROK(sv)
569 	       && (strpos != strbeg)) {
570 	      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
571 	      goto fail;
572 	  }
573 	  if (prog->check_offset_min == prog->check_offset_max &&
574 	      !(prog->extflags & RXf_CANY_SEEN)) {
575 	    /* Substring at constant offset from beg-of-str... */
576 	    I32 slen;
577 
578 	    s = HOP3c(strpos, prog->check_offset_min, strend);
579 
580 	    if (SvTAIL(check)) {
581 		slen = SvCUR(check);	/* >= 1 */
582 
583 		if ( strend - s > slen || strend - s < slen - 1
584 		     || (strend - s == slen && strend[-1] != '\n')) {
585 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
586 		    goto fail_finish;
587 		}
588 		/* Now should match s[0..slen-2] */
589 		slen--;
590 		if (slen && (*SvPVX_const(check) != *s
591 			     || (slen > 1
592 				 && memNE(SvPVX_const(check), s, slen)))) {
593 		  report_neq:
594 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
595 		    goto fail_finish;
596 		}
597 	    }
598 	    else if (*SvPVX_const(check) != *s
599 		     || ((slen = SvCUR(check)) > 1
600 			 && memNE(SvPVX_const(check), s, slen)))
601 		goto report_neq;
602 	    check_at = s;
603 	    goto success_at_start;
604 	  }
605 	}
606 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
607 	s = strpos;
608 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
609 	end_shift = prog->check_end_shift;
610 
611 	if (!ml_anch) {
612 	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
613 					 - (SvTAIL(check) != 0);
614 	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
615 
616 	    if (end_shift < eshift)
617 		end_shift = eshift;
618 	}
619     }
620     else {				/* Can match at random position */
621 	ml_anch = 0;
622 	s = strpos;
623 	start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
624 	end_shift = prog->check_end_shift;
625 
626 	/* end shift should be non negative here */
627     }
628 
629 #ifdef QDEBUGGING	/* 7/99: reports of failure (with the older version) */
630     if (end_shift < 0)
631 	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
632 		   (IV)end_shift, RX_PRECOMP(prog));
633 #endif
634 
635   restart:
636     /* Find a possible match in the region s..strend by looking for
637        the "check" substring in the region corrected by start/end_shift. */
638 
639     {
640         I32 srch_start_shift = start_shift;
641         I32 srch_end_shift = end_shift;
642         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
643 	    srch_end_shift -= ((strbeg - s) - srch_start_shift);
644 	    srch_start_shift = strbeg - s;
645 	}
646     DEBUG_OPTIMISE_MORE_r({
647         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
648             (IV)prog->check_offset_min,
649             (IV)srch_start_shift,
650             (IV)srch_end_shift,
651             (IV)prog->check_end_shift);
652     });
653 
654     if (flags & REXEC_SCREAM) {
655 	I32 p = -1;			/* Internal iterator of scream. */
656 	I32 * const pp = data ? data->scream_pos : &p;
657 
658 	if (PL_screamfirst[BmRARE(check)] >= 0
659 	    || ( BmRARE(check) == '\n'
660 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
661 		 && SvTAIL(check) ))
662 	    s = screaminstr(sv, check,
663 			    srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
664 	else
665 	    goto fail_finish;
666 	/* we may be pointing at the wrong string */
667 	if (s && RXp_MATCH_COPIED(prog))
668 	    s = strbeg + (s - SvPVX_const(sv));
669 	if (data)
670 	    *data->scream_olds = s;
671     }
672     else {
673         U8* start_point;
674         U8* end_point;
675         if (prog->extflags & RXf_CANY_SEEN) {
676             start_point= (U8*)(s + srch_start_shift);
677             end_point= (U8*)(strend - srch_end_shift);
678         } else {
679 	    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
680             end_point= HOP3(strend, -srch_end_shift, strbeg);
681 	}
682 	DEBUG_OPTIMISE_MORE_r({
683             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
684                 (int)(end_point - start_point),
685                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
686                 start_point);
687         });
688 
689 	s = fbm_instr( start_point, end_point,
690 		      check, multiline ? FBMrf_MULTILINE : 0);
691     }
692     }
693     /* Update the count-of-usability, remove useless subpatterns,
694 	unshift s.  */
695 
696     DEBUG_EXECUTE_r({
697         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
698             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
699         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
700 			  (s ? "Found" : "Did not find"),
701 	    (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
702 	        ? "anchored" : "floating"),
703 	    quoted,
704 	    RE_SV_TAIL(check),
705 	    (s ? " at offset " : "...\n") );
706     });
707 
708     if (!s)
709 	goto fail_finish;
710     /* Finish the diagnostic message */
711     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
712 
713     /* XXX dmq: first branch is for positive lookbehind...
714        Our check string is offset from the beginning of the pattern.
715        So we need to do any stclass tests offset forward from that
716        point. I think. :-(
717      */
718 
719 
720 
721     check_at=s;
722 
723 
724     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
725        Start with the other substr.
726        XXXX no SCREAM optimization yet - and a very coarse implementation
727        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
728 		*always* match.  Probably should be marked during compile...
729        Probably it is right to do no SCREAM here...
730      */
731 
732     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
733                 : (prog->float_substr && prog->anchored_substr))
734     {
735 	/* Take into account the "other" substring. */
736 	/* XXXX May be hopelessly wrong for UTF... */
737 	if (!other_last)
738 	    other_last = strpos;
739 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
740 	  do_other_anchored:
741 	    {
742 		char * const last = HOP3c(s, -start_shift, strbeg);
743 		char *last1, *last2;
744 		char * const saved_s = s;
745 		SV* must;
746 
747 		t = s - prog->check_offset_max;
748 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
749 		    && (!do_utf8
750 			|| ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
751 			    && t > strpos)))
752 		    NOOP;
753 		else
754 		    t = strpos;
755 		t = HOP3c(t, prog->anchored_offset, strend);
756 		if (t < other_last)	/* These positions already checked */
757 		    t = other_last;
758 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
759 		if (last < last1)
760 		    last1 = last;
761                 /* XXXX It is not documented what units *_offsets are in.
762                    We assume bytes, but this is clearly wrong.
763                    Meaning this code needs to be carefully reviewed for errors.
764                    dmq.
765                   */
766 
767 		/* On end-of-str: see comment below. */
768 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
769 		if (must == &PL_sv_undef) {
770 		    s = (char*)NULL;
771 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
772 		}
773 		else
774 		    s = fbm_instr(
775 			(unsigned char*)t,
776 			HOP3(HOP3(last1, prog->anchored_offset, strend)
777 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
778 			must,
779 			multiline ? FBMrf_MULTILINE : 0
780 		    );
781                 DEBUG_EXECUTE_r({
782                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
783                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
784                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
785 			(s ? "Found" : "Contradicts"),
786                         quoted, RE_SV_TAIL(must));
787                 });
788 
789 
790 		if (!s) {
791 		    if (last1 >= last2) {
792 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
793 						", giving up...\n"));
794 			goto fail_finish;
795 		    }
796 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
797 			", trying floating at offset %ld...\n",
798 			(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
799 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
800 		    s = HOP3c(last, 1, strend);
801 		    goto restart;
802 		}
803 		else {
804 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
805 			  (long)(s - i_strpos)));
806 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
807 		    other_last = HOP3c(s, 1, strend);
808 		    s = saved_s;
809 		    if (t == strpos)
810 			goto try_at_start;
811 		    goto try_at_offset;
812 		}
813 	    }
814 	}
815 	else {		/* Take into account the floating substring. */
816 	    char *last, *last1;
817 	    char * const saved_s = s;
818 	    SV* must;
819 
820 	    t = HOP3c(s, -start_shift, strbeg);
821 	    last1 = last =
822 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
823 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
824 		last = HOP3c(t, prog->float_max_offset, strend);
825 	    s = HOP3c(t, prog->float_min_offset, strend);
826 	    if (s < other_last)
827 		s = other_last;
828  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
829 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
830 	    /* fbm_instr() takes into account exact value of end-of-str
831 	       if the check is SvTAIL(ed).  Since false positives are OK,
832 	       and end-of-str is not later than strend we are OK. */
833 	    if (must == &PL_sv_undef) {
834 		s = (char*)NULL;
835 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
836 	    }
837 	    else
838 		s = fbm_instr((unsigned char*)s,
839 			      (unsigned char*)last + SvCUR(must)
840 				  - (SvTAIL(must)!=0),
841 			      must, multiline ? FBMrf_MULTILINE : 0);
842 	    DEBUG_EXECUTE_r({
843 	        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
844 	            SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
845 	        PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
846 		    (s ? "Found" : "Contradicts"),
847 		    quoted, RE_SV_TAIL(must));
848             });
849 	    if (!s) {
850 		if (last1 == last) {
851 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
852 					    ", giving up...\n"));
853 		    goto fail_finish;
854 		}
855 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
856 		    ", trying anchored starting at offset %ld...\n",
857 		    (long)(saved_s + 1 - i_strpos)));
858 		other_last = last;
859 		s = HOP3c(t, 1, strend);
860 		goto restart;
861 	    }
862 	    else {
863 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
864 		      (long)(s - i_strpos)));
865 		other_last = s; /* Fix this later. --Hugo */
866 		s = saved_s;
867 		if (t == strpos)
868 		    goto try_at_start;
869 		goto try_at_offset;
870 	    }
871 	}
872     }
873 
874 
875     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
876 
877     DEBUG_OPTIMISE_MORE_r(
878         PerlIO_printf(Perl_debug_log,
879             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
880             (IV)prog->check_offset_min,
881             (IV)prog->check_offset_max,
882             (IV)(s-strpos),
883             (IV)(t-strpos),
884             (IV)(t-s),
885             (IV)(strend-strpos)
886         )
887     );
888 
889     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
890         && (!do_utf8
891 	    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
892 		 && t > strpos)))
893     {
894 	/* Fixed substring is found far enough so that the match
895 	   cannot start at strpos. */
896       try_at_offset:
897 	if (ml_anch && t[-1] != '\n') {
898 	    /* Eventually fbm_*() should handle this, but often
899 	       anchored_offset is not 0, so this check will not be wasted. */
900 	    /* XXXX In the code below we prefer to look for "^" even in
901 	       presence of anchored substrings.  And we search even
902 	       beyond the found float position.  These pessimizations
903 	       are historical artefacts only.  */
904 	  find_anchor:
905 	    while (t < strend - prog->minlen) {
906 		if (*t == '\n') {
907 		    if (t < check_at - prog->check_offset_min) {
908 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
909 			    /* Since we moved from the found position,
910 			       we definitely contradict the found anchored
911 			       substr.  Due to the above check we do not
912 			       contradict "check" substr.
913 			       Thus we can arrive here only if check substr
914 			       is float.  Redo checking for "other"=="fixed".
915 			     */
916 			    strpos = t + 1;
917 			    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
918 				PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
919 			    goto do_other_anchored;
920 			}
921 			/* We don't contradict the found floating substring. */
922 			/* XXXX Why not check for STCLASS? */
923 			s = t + 1;
924 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
925 			    PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
926 			goto set_useful;
927 		    }
928 		    /* Position contradicts check-string */
929 		    /* XXXX probably better to look for check-string
930 		       than for "\n", so one should lower the limit for t? */
931 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
932 			PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
933 		    other_last = strpos = s = t + 1;
934 		    goto restart;
935 		}
936 		t++;
937 	    }
938 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
939 			PL_colors[0], PL_colors[1]));
940 	    goto fail_finish;
941 	}
942 	else {
943 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
944 			PL_colors[0], PL_colors[1]));
945 	}
946 	s = t;
947       set_useful:
948 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
949     }
950     else {
951 	/* The found string does not prohibit matching at strpos,
952 	   - no optimization of calling REx engine can be performed,
953 	   unless it was an MBOL and we are not after MBOL,
954 	   or a future STCLASS check will fail this. */
955       try_at_start:
956 	/* Even in this situation we may use MBOL flag if strpos is offset
957 	   wrt the start of the string. */
958 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
959 	    && (strpos != strbeg) && strpos[-1] != '\n'
960 	    /* May be due to an implicit anchor of m{.*foo}  */
961 	    && !(prog->intflags & PREGf_IMPLICIT))
962 	{
963 	    t = strpos;
964 	    goto find_anchor;
965 	}
966 	DEBUG_EXECUTE_r( if (ml_anch)
967 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
968 			  (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
969 	);
970       success_at_start:
971 	if (!(prog->intflags & PREGf_NAUGHTY)	/* XXXX If strpos moved? */
972 	    && (do_utf8 ? (
973 		prog->check_utf8		/* Could be deleted already */
974 		&& --BmUSEFUL(prog->check_utf8) < 0
975 		&& (prog->check_utf8 == prog->float_utf8)
976 	    ) : (
977 		prog->check_substr		/* Could be deleted already */
978 		&& --BmUSEFUL(prog->check_substr) < 0
979 		&& (prog->check_substr == prog->float_substr)
980 	    )))
981 	{
982 	    /* If flags & SOMETHING - do not do it many times on the same match */
983 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
984 	    /* XXX Does the destruction order has to change with do_utf8? */
985 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
986 	    SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
987 	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
988 	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
989 	    check = NULL;			/* abort */
990 	    s = strpos;
991 	    /* XXXX This is a remnant of the old implementation.  It
992 	            looks wasteful, since now INTUIT can use many
993 	            other heuristics. */
994 	    prog->extflags &= ~RXf_USE_INTUIT;
995 	}
996 	else
997 	    s = strpos;
998     }
999 
1000     /* Last resort... */
1001     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
1002     /* trie stclasses are too expensive to use here, we are better off to
1003        leave it to regmatch itself */
1004     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
1005 	/* minlen == 0 is possible if regstclass is \b or \B,
1006 	   and the fixed substr is ''$.
1007 	   Since minlen is already taken into account, s+1 is before strend;
1008 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
1009 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
1010 	   regstclass does not come from lookahead...  */
1011 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
1012 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
1013         const U8* const str = (U8*)STRING(progi->regstclass);
1014         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
1015 		    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
1016 		    : 1);
1017 	char * endpos;
1018 	if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1019             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1020         else if (prog->float_substr || prog->float_utf8)
1021 	    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1022         else
1023             endpos= strend;
1024 
1025         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1026 				      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1027 
1028 	t = s;
1029         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1030 	if (!s) {
1031 #ifdef DEBUGGING
1032 	    const char *what = NULL;
1033 #endif
1034 	    if (endpos == strend) {
1035 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1036 				"Could not match STCLASS...\n") );
1037 		goto fail;
1038 	    }
1039 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1040 				   "This position contradicts STCLASS...\n") );
1041 	    if ((prog->extflags & RXf_ANCH) && !ml_anch)
1042 		goto fail;
1043 	    /* Contradict one of substrings */
1044 	    if (prog->anchored_substr || prog->anchored_utf8) {
1045 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1046 		    DEBUG_EXECUTE_r( what = "anchored" );
1047 		  hop_and_restart:
1048 		    s = HOP3c(t, 1, strend);
1049 		    if (s + start_shift + end_shift > strend) {
1050 			/* XXXX Should be taken into account earlier? */
1051 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1052 					       "Could not match STCLASS...\n") );
1053 			goto fail;
1054 		    }
1055 		    if (!check)
1056 			goto giveup;
1057 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1058 				"Looking for %s substr starting at offset %ld...\n",
1059 				 what, (long)(s + start_shift - i_strpos)) );
1060 		    goto restart;
1061 		}
1062 		/* Have both, check_string is floating */
1063 		if (t + start_shift >= check_at) /* Contradicts floating=check */
1064 		    goto retry_floating_check;
1065 		/* Recheck anchored substring, but not floating... */
1066 		s = check_at;
1067 		if (!check)
1068 		    goto giveup;
1069 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1070 			  "Looking for anchored substr starting at offset %ld...\n",
1071 			  (long)(other_last - i_strpos)) );
1072 		goto do_other_anchored;
1073 	    }
1074 	    /* Another way we could have checked stclass at the
1075                current position only: */
1076 	    if (ml_anch) {
1077 		s = t = t + 1;
1078 		if (!check)
1079 		    goto giveup;
1080 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1081 			  "Looking for /%s^%s/m starting at offset %ld...\n",
1082 			  PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1083 		goto try_at_offset;
1084 	    }
1085 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
1086 		goto fail;
1087 	    /* Check is floating subtring. */
1088 	  retry_floating_check:
1089 	    t = check_at - start_shift;
1090 	    DEBUG_EXECUTE_r( what = "floating" );
1091 	    goto hop_and_restart;
1092 	}
1093 	if (t != s) {
1094             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1095 			"By STCLASS: moving %ld --> %ld\n",
1096                                   (long)(t - i_strpos), (long)(s - i_strpos))
1097                    );
1098         }
1099         else {
1100             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1101                                   "Does not contradict STCLASS...\n");
1102                    );
1103         }
1104     }
1105   giveup:
1106     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1107 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
1108 			  PL_colors[5], (long)(s - i_strpos)) );
1109     return s;
1110 
1111   fail_finish:				/* Substring not found */
1112     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
1113 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1114   fail:
1115     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1116 			  PL_colors[4], PL_colors[5]));
1117     return NULL;
1118 }
1119 
1120 #define DECL_TRIE_TYPE(scan) \
1121     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1122 		    trie_type = (scan->flags != EXACT) \
1123 		              ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1124                               : (do_utf8 ? trie_utf8 : trie_plain)
1125 
1126 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1127 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1128     switch (trie_type) {                                                    \
1129     case trie_utf8_fold:                                                    \
1130 	if ( foldlen>0 ) {                                                  \
1131 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1132 	    foldlen -= len;                                                 \
1133 	    uscan += len;                                                   \
1134 	    len=0;                                                          \
1135 	} else {                                                            \
1136 	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1137 	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1138 	    foldlen -= UNISKIP( uvc );                                      \
1139 	    uscan = foldbuf + UNISKIP( uvc );                               \
1140 	}                                                                   \
1141 	break;                                                              \
1142     case trie_latin_utf8_fold:                                              \
1143 	if ( foldlen>0 ) {                                                  \
1144 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1145 	    foldlen -= len;                                                 \
1146 	    uscan += len;                                                   \
1147 	    len=0;                                                          \
1148 	} else {                                                            \
1149 	    len = 1;                                                        \
1150 	    uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1151 	    foldlen -= UNISKIP( uvc );                                      \
1152 	    uscan = foldbuf + UNISKIP( uvc );                               \
1153 	}                                                                   \
1154 	break;                                                              \
1155     case trie_utf8:                                                         \
1156 	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1157 	break;                                                              \
1158     case trie_plain:                                                        \
1159 	uvc = (UV)*uc;                                                      \
1160 	len = 1;                                                            \
1161     }                                                                       \
1162     if (uvc < 256) {                                                        \
1163 	charid = trie->charmap[ uvc ];                                      \
1164     }                                                                       \
1165     else {                                                                  \
1166 	charid = 0;                                                         \
1167 	if (widecharmap) {                                                  \
1168 	    SV** const svpp = hv_fetch(widecharmap,                         \
1169 			(char*)&uvc, sizeof(UV), 0);                        \
1170 	    if (svpp)                                                       \
1171 		charid = (U16)SvIV(*svpp);                                  \
1172 	}                                                                   \
1173     }                                                                       \
1174 } STMT_END
1175 
1176 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1177 {                                                      \
1178     char *my_strend= (char *)strend;                   \
1179     if ( (CoNd)                                        \
1180 	 && (ln == len ||                              \
1181 	     !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1182 			m, NULL, ln, (bool)UTF))       \
1183 	 && (!reginfo || regtry(reginfo, &s)) )        \
1184 	goto got_it;                                   \
1185     else {                                             \
1186 	 U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1187 	 uvchr_to_utf8(tmpbuf, c);                     \
1188 	 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1189 	 if ( f != c                                   \
1190 	      && (f == c1 || f == c2)                  \
1191 	      && (ln == len ||                         \
1192 	        !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1193 			      m, NULL, ln, (bool)UTF)) \
1194 	      && (!reginfo || regtry(reginfo, &s)) )   \
1195 	      goto got_it;                             \
1196     }                                                  \
1197 }                                                      \
1198 s += len
1199 
1200 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1201 STMT_START {                                              \
1202     while (s <= e) {                                      \
1203 	if ( (CoNd)                                       \
1204 	     && (ln == 1 || !(OP(c) == EXACTF             \
1205 			      ? ibcmp(s, m, ln)           \
1206 			      : ibcmp_locale(s, m, ln)))  \
1207 	     && (!reginfo || regtry(reginfo, &s)) )        \
1208 	    goto got_it;                                  \
1209 	s++;                                              \
1210     }                                                     \
1211 } STMT_END
1212 
1213 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1214 STMT_START {                                          \
1215     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1216 	CoDe                                          \
1217 	s += uskip;                                   \
1218     }                                                 \
1219 } STMT_END
1220 
1221 #define REXEC_FBC_SCAN(CoDe)                          \
1222 STMT_START {                                          \
1223     while (s < strend) {                              \
1224 	CoDe                                          \
1225 	s++;                                          \
1226     }                                                 \
1227 } STMT_END
1228 
1229 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1230 REXEC_FBC_UTF8_SCAN(                                  \
1231     if (CoNd) {                                       \
1232 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1233 	    goto got_it;                              \
1234 	else                                          \
1235 	    tmp = doevery;                            \
1236     }                                                 \
1237     else                                              \
1238 	tmp = 1;                                      \
1239 )
1240 
1241 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1242 REXEC_FBC_SCAN(                                       \
1243     if (CoNd) {                                       \
1244 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1245 	    goto got_it;                              \
1246 	else                                          \
1247 	    tmp = doevery;                            \
1248     }                                                 \
1249     else                                              \
1250 	tmp = 1;                                      \
1251 )
1252 
1253 #define REXEC_FBC_TRYIT               \
1254 if ((!reginfo || regtry(reginfo, &s))) \
1255     goto got_it
1256 
1257 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1258     if (do_utf8) {                                             \
1259 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1260     }                                                          \
1261     else {                                                     \
1262 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1263     }                                                          \
1264     break
1265 
1266 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1267     if (do_utf8) {                                             \
1268 	UtFpReLoAd;                                            \
1269 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1270     }                                                          \
1271     else {                                                     \
1272 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1273     }                                                          \
1274     break
1275 
1276 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1277     PL_reg_flags |= RF_tainted;                                \
1278     if (do_utf8) {                                             \
1279 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1280     }                                                          \
1281     else {                                                     \
1282 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1283     }                                                          \
1284     break
1285 
1286 #define DUMP_EXEC_POS(li,s,doutf8) \
1287     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1288 
1289 /* We know what class REx starts with.  Try to find this position... */
1290 /* if reginfo is NULL, its a dryrun */
1291 /* annoyingly all the vars in this routine have different names from their counterparts
1292    in regmatch. /grrr */
1293 
1294 STATIC char *
1295 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1296     const char *strend, regmatch_info *reginfo)
1297 {
1298 	dVAR;
1299 	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1300 	char *m;
1301 	STRLEN ln;
1302 	STRLEN lnc;
1303 	register STRLEN uskip;
1304 	unsigned int c1;
1305 	unsigned int c2;
1306 	char *e;
1307 	register I32 tmp = 1;	/* Scratch variable? */
1308 	register const bool do_utf8 = PL_reg_match_utf8;
1309         RXi_GET_DECL(prog,progi);
1310 
1311 	PERL_ARGS_ASSERT_FIND_BYCLASS;
1312 
1313 	/* We know what class it must start with. */
1314 	switch (OP(c)) {
1315 	case ANYOF:
1316 	    if (do_utf8) {
1317 		 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1318 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
1319 			  reginclass(prog, c, (U8*)s, 0, do_utf8) :
1320 			  REGINCLASS(prog, c, (U8*)s));
1321 	    }
1322 	    else {
1323 		 while (s < strend) {
1324 		      STRLEN skip = 1;
1325 
1326 		      if (REGINCLASS(prog, c, (U8*)s) ||
1327 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1328 			   /* The assignment of 2 is intentional:
1329 			    * for the folded sharp s, the skip is 2. */
1330 			   (skip = SHARP_S_SKIP))) {
1331 			   if (tmp && (!reginfo || regtry(reginfo, &s)))
1332 				goto got_it;
1333 			   else
1334 				tmp = doevery;
1335 		      }
1336 		      else
1337 			   tmp = 1;
1338 		      s += skip;
1339 		 }
1340 	    }
1341 	    break;
1342 	case CANY:
1343 	    REXEC_FBC_SCAN(
1344 	        if (tmp && (!reginfo || regtry(reginfo, &s)))
1345 		    goto got_it;
1346 		else
1347 		    tmp = doevery;
1348 	    );
1349 	    break;
1350 	case EXACTF:
1351 	    m   = STRING(c);
1352 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1353 	    lnc = (I32) ln;	/* length to match in characters */
1354 	    if (UTF) {
1355 	        STRLEN ulen1, ulen2;
1356 		U8 *sm = (U8 *) m;
1357 		U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1358 		U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1359 		/* used by commented-out code below */
1360 		/*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1361 
1362                 /* XXX: Since the node will be case folded at compile
1363                    time this logic is a little odd, although im not
1364                    sure that its actually wrong. --dmq */
1365 
1366 		c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1367 		c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1368 
1369 		/* XXX: This is kinda strange. to_utf8_XYZ returns the
1370                    codepoint of the first character in the converted
1371                    form, yet originally we did the extra step.
1372                    No tests fail by commenting this code out however
1373                    so Ive left it out. -- dmq.
1374 
1375 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1376 				    0, uniflags);
1377 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1378 				    0, uniflags);
1379                 */
1380 
1381 		lnc = 0;
1382 		while (sm < ((U8 *) m + ln)) {
1383 		    lnc++;
1384 		    sm += UTF8SKIP(sm);
1385 		}
1386 	    }
1387 	    else {
1388 		c1 = *(U8*)m;
1389 		c2 = PL_fold[c1];
1390 	    }
1391 	    goto do_exactf;
1392 	case EXACTFL:
1393 	    m   = STRING(c);
1394 	    ln  = STR_LEN(c);
1395 	    lnc = (I32) ln;
1396 	    c1 = *(U8*)m;
1397 	    c2 = PL_fold_locale[c1];
1398 	  do_exactf:
1399 	    e = HOP3c(strend, -((I32)lnc), s);
1400 
1401 	    if (!reginfo && e < s)
1402 		e = s;			/* Due to minlen logic of intuit() */
1403 
1404 	    /* The idea in the EXACTF* cases is to first find the
1405 	     * first character of the EXACTF* node and then, if
1406 	     * necessary, case-insensitively compare the full
1407 	     * text of the node.  The c1 and c2 are the first
1408 	     * characters (though in Unicode it gets a bit
1409 	     * more complicated because there are more cases
1410 	     * than just upper and lower: one needs to use
1411 	     * the so-called folding case for case-insensitive
1412 	     * matching (called "loose matching" in Unicode).
1413 	     * ibcmp_utf8() will do just that. */
1414 
1415 	    if (do_utf8 || UTF) {
1416 	        UV c, f;
1417 	        U8 tmpbuf [UTF8_MAXBYTES+1];
1418 		STRLEN len = 1;
1419 		STRLEN foldlen;
1420 		const U32 uniflags = UTF8_ALLOW_DEFAULT;
1421 		if (c1 == c2) {
1422 		    /* Upper and lower of 1st char are equal -
1423 		     * probably not a "letter". */
1424 		    while (s <= e) {
1425 		        if (do_utf8) {
1426 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1427 					   uniflags);
1428                         } else {
1429                             c = *((U8*)s);
1430                         }
1431 			REXEC_FBC_EXACTISH_CHECK(c == c1);
1432 		    }
1433 		}
1434 		else {
1435 		    while (s <= e) {
1436 		        if (do_utf8) {
1437 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1438 					   uniflags);
1439                         } else {
1440                             c = *((U8*)s);
1441                         }
1442 
1443 			/* Handle some of the three Greek sigmas cases.
1444 			 * Note that not all the possible combinations
1445 			 * are handled here: some of them are handled
1446 			 * by the standard folding rules, and some of
1447 			 * them (the character class or ANYOF cases)
1448 			 * are handled during compiletime in
1449 			 * regexec.c:S_regclass(). */
1450 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1451 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1452 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1453 
1454 			REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1455 		    }
1456 		}
1457 	    }
1458 	    else {
1459 	        /* Neither pattern nor string are UTF8 */
1460 		if (c1 == c2)
1461 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1462 		else
1463 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1464 	    }
1465 	    break;
1466 	case BOUNDL:
1467 	    PL_reg_flags |= RF_tainted;
1468 	    /* FALL THROUGH */
1469 	case BOUND:
1470 	    if (do_utf8) {
1471 		if (s == PL_bostr)
1472 		    tmp = '\n';
1473 		else {
1474 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1475 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1476 		}
1477 		tmp = ((OP(c) == BOUND ?
1478 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1479 		LOAD_UTF8_CHARCLASS_ALNUM();
1480 		REXEC_FBC_UTF8_SCAN(
1481 		    if (tmp == !(OP(c) == BOUND ?
1482 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1483 				 isALNUM_LC_utf8((U8*)s)))
1484 		    {
1485 			tmp = !tmp;
1486 			REXEC_FBC_TRYIT;
1487 		}
1488 		);
1489 	    }
1490 	    else {
1491 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1492 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1493 		REXEC_FBC_SCAN(
1494 		    if (tmp ==
1495 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1496 			tmp = !tmp;
1497 			REXEC_FBC_TRYIT;
1498 		}
1499 		);
1500 	    }
1501 	    if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1502 		goto got_it;
1503 	    break;
1504 	case NBOUNDL:
1505 	    PL_reg_flags |= RF_tainted;
1506 	    /* FALL THROUGH */
1507 	case NBOUND:
1508 	    if (do_utf8) {
1509 		if (s == PL_bostr)
1510 		    tmp = '\n';
1511 		else {
1512 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1513 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1514 		}
1515 		tmp = ((OP(c) == NBOUND ?
1516 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1517 		LOAD_UTF8_CHARCLASS_ALNUM();
1518 		REXEC_FBC_UTF8_SCAN(
1519 		    if (tmp == !(OP(c) == NBOUND ?
1520 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1521 				 isALNUM_LC_utf8((U8*)s)))
1522 			tmp = !tmp;
1523 		    else REXEC_FBC_TRYIT;
1524 		);
1525 	    }
1526 	    else {
1527 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1528 		tmp = ((OP(c) == NBOUND ?
1529 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1530 		REXEC_FBC_SCAN(
1531 		    if (tmp ==
1532 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1533 			tmp = !tmp;
1534 		    else REXEC_FBC_TRYIT;
1535 		);
1536 	    }
1537 	    if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1538 		goto got_it;
1539 	    break;
1540 	case ALNUM:
1541 	    REXEC_FBC_CSCAN_PRELOAD(
1542 		LOAD_UTF8_CHARCLASS_PERL_WORD(),
1543 		swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1544 		isALNUM(*s)
1545 	    );
1546 	case ALNUML:
1547 	    REXEC_FBC_CSCAN_TAINT(
1548 		isALNUM_LC_utf8((U8*)s),
1549 		isALNUM_LC(*s)
1550 	    );
1551 	case NALNUM:
1552 	    REXEC_FBC_CSCAN_PRELOAD(
1553 		LOAD_UTF8_CHARCLASS_PERL_WORD(),
1554 		!swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1555 		!isALNUM(*s)
1556 	    );
1557 	case NALNUML:
1558 	    REXEC_FBC_CSCAN_TAINT(
1559 		!isALNUM_LC_utf8((U8*)s),
1560 		!isALNUM_LC(*s)
1561 	    );
1562 	case SPACE:
1563 	    REXEC_FBC_CSCAN_PRELOAD(
1564 		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1565 		*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
1566 		isSPACE(*s)
1567 	    );
1568 	case SPACEL:
1569 	    REXEC_FBC_CSCAN_TAINT(
1570 		*s == ' ' || isSPACE_LC_utf8((U8*)s),
1571 		isSPACE_LC(*s)
1572 	    );
1573 	case NSPACE:
1574 	    REXEC_FBC_CSCAN_PRELOAD(
1575 		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1576 		!(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
1577 		!isSPACE(*s)
1578 	    );
1579 	case NSPACEL:
1580 	    REXEC_FBC_CSCAN_TAINT(
1581 		!(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1582 		!isSPACE_LC(*s)
1583 	    );
1584 	case DIGIT:
1585 	    REXEC_FBC_CSCAN_PRELOAD(
1586 		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1587 		swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1588 		isDIGIT(*s)
1589 	    );
1590 	case DIGITL:
1591 	    REXEC_FBC_CSCAN_TAINT(
1592 		isDIGIT_LC_utf8((U8*)s),
1593 		isDIGIT_LC(*s)
1594 	    );
1595 	case NDIGIT:
1596 	    REXEC_FBC_CSCAN_PRELOAD(
1597 		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1598 		!swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1599 		!isDIGIT(*s)
1600 	    );
1601 	case NDIGITL:
1602 	    REXEC_FBC_CSCAN_TAINT(
1603 		!isDIGIT_LC_utf8((U8*)s),
1604 		!isDIGIT_LC(*s)
1605 	    );
1606 	case LNBREAK:
1607 	    REXEC_FBC_CSCAN(
1608 		is_LNBREAK_utf8(s),
1609 		is_LNBREAK_latin1(s)
1610 	    );
1611 	case VERTWS:
1612 	    REXEC_FBC_CSCAN(
1613 		is_VERTWS_utf8(s),
1614 		is_VERTWS_latin1(s)
1615 	    );
1616 	case NVERTWS:
1617 	    REXEC_FBC_CSCAN(
1618 		!is_VERTWS_utf8(s),
1619 		!is_VERTWS_latin1(s)
1620 	    );
1621 	case HORIZWS:
1622 	    REXEC_FBC_CSCAN(
1623 		is_HORIZWS_utf8(s),
1624 		is_HORIZWS_latin1(s)
1625 	    );
1626 	case NHORIZWS:
1627 	    REXEC_FBC_CSCAN(
1628 		!is_HORIZWS_utf8(s),
1629 		!is_HORIZWS_latin1(s)
1630 	    );
1631 	case AHOCORASICKC:
1632 	case AHOCORASICK:
1633 	    {
1634 	        DECL_TRIE_TYPE(c);
1635                 /* what trie are we using right now */
1636         	reg_ac_data *aho
1637         	    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1638         	reg_trie_data *trie
1639 		    = (reg_trie_data*)progi->data->data[ aho->trie ];
1640 		HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1641 
1642 		const char *last_start = strend - trie->minlen;
1643 #ifdef DEBUGGING
1644 		const char *real_start = s;
1645 #endif
1646 		STRLEN maxlen = trie->maxlen;
1647 		SV *sv_points;
1648 		U8 **points; /* map of where we were in the input string
1649 		                when reading a given char. For ASCII this
1650 		                is unnecessary overhead as the relationship
1651 		                is always 1:1, but for Unicode, especially
1652 		                case folded Unicode this is not true. */
1653 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1654 		U8 *bitmap=NULL;
1655 
1656 
1657                 GET_RE_DEBUG_FLAGS_DECL;
1658 
1659                 /* We can't just allocate points here. We need to wrap it in
1660                  * an SV so it gets freed properly if there is a croak while
1661                  * running the match */
1662                 ENTER;
1663 	        SAVETMPS;
1664                 sv_points=newSV(maxlen * sizeof(U8 *));
1665                 SvCUR_set(sv_points,
1666                     maxlen * sizeof(U8 *));
1667                 SvPOK_on(sv_points);
1668                 sv_2mortal(sv_points);
1669                 points=(U8**)SvPV_nolen(sv_points );
1670                 if ( trie_type != trie_utf8_fold
1671                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1672                 {
1673                     if (trie->bitmap)
1674                         bitmap=(U8*)trie->bitmap;
1675                     else
1676                         bitmap=(U8*)ANYOF_BITMAP(c);
1677                 }
1678                 /* this is the Aho-Corasick algorithm modified a touch
1679                    to include special handling for long "unknown char"
1680                    sequences. The basic idea being that we use AC as long
1681                    as we are dealing with a possible matching char, when
1682                    we encounter an unknown char (and we have not encountered
1683                    an accepting state) we scan forward until we find a legal
1684                    starting char.
1685                    AC matching is basically that of trie matching, except
1686                    that when we encounter a failing transition, we fall back
1687                    to the current states "fail state", and try the current char
1688                    again, a process we repeat until we reach the root state,
1689                    state 1, or a legal transition. If we fail on the root state
1690                    then we can either terminate if we have reached an accepting
1691                    state previously, or restart the entire process from the beginning
1692                    if we have not.
1693 
1694                  */
1695                 while (s <= last_start) {
1696                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1697                     U8 *uc = (U8*)s;
1698                     U16 charid = 0;
1699                     U32 base = 1;
1700                     U32 state = 1;
1701                     UV uvc = 0;
1702                     STRLEN len = 0;
1703                     STRLEN foldlen = 0;
1704                     U8 *uscan = (U8*)NULL;
1705                     U8 *leftmost = NULL;
1706 #ifdef DEBUGGING
1707                     U32 accepted_word= 0;
1708 #endif
1709                     U32 pointpos = 0;
1710 
1711                     while ( state && uc <= (U8*)strend ) {
1712                         int failed=0;
1713                         U32 word = aho->states[ state ].wordnum;
1714 
1715                         if( state==1 ) {
1716                             if ( bitmap ) {
1717                                 DEBUG_TRIE_EXECUTE_r(
1718                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1719                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1720                                             (char *)uc, do_utf8 );
1721                                         PerlIO_printf( Perl_debug_log,
1722                                             " Scanning for legal start char...\n");
1723                                     }
1724                                 );
1725                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1726                                     uc++;
1727                                 }
1728                                 s= (char *)uc;
1729                             }
1730                             if (uc >(U8*)last_start) break;
1731                         }
1732 
1733                         if ( word ) {
1734                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1735                             if (!leftmost || lpos < leftmost) {
1736                                 DEBUG_r(accepted_word=word);
1737                                 leftmost= lpos;
1738                             }
1739                             if (base==0) break;
1740 
1741                         }
1742                         points[pointpos++ % maxlen]= uc;
1743 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1744 					     uscan, len, uvc, charid, foldlen,
1745 					     foldbuf, uniflags);
1746                         DEBUG_TRIE_EXECUTE_r({
1747                             dump_exec_pos( (char *)uc, c, strend, real_start,
1748                                 s,   do_utf8 );
1749                             PerlIO_printf(Perl_debug_log,
1750                                 " Charid:%3u CP:%4"UVxf" ",
1751                                  charid, uvc);
1752                         });
1753 
1754                         do {
1755 #ifdef DEBUGGING
1756                             word = aho->states[ state ].wordnum;
1757 #endif
1758                             base = aho->states[ state ].trans.base;
1759 
1760                             DEBUG_TRIE_EXECUTE_r({
1761                                 if (failed)
1762                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1763                                         s,   do_utf8 );
1764                                 PerlIO_printf( Perl_debug_log,
1765                                     "%sState: %4"UVxf", word=%"UVxf,
1766                                     failed ? " Fail transition to " : "",
1767                                     (UV)state, (UV)word);
1768                             });
1769                             if ( base ) {
1770                                 U32 tmp;
1771                                 if (charid &&
1772                                      (base + charid > trie->uniquecharcount )
1773                                      && (base + charid - 1 - trie->uniquecharcount
1774                                             < trie->lasttrans)
1775                                      && trie->trans[base + charid - 1 -
1776                                             trie->uniquecharcount].check == state
1777                                      && (tmp=trie->trans[base + charid - 1 -
1778                                         trie->uniquecharcount ].next))
1779                                 {
1780                                     DEBUG_TRIE_EXECUTE_r(
1781                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1782                                     state = tmp;
1783                                     break;
1784                                 }
1785                                 else {
1786                                     DEBUG_TRIE_EXECUTE_r(
1787                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1788                                     failed = 1;
1789                                     state = aho->fail[state];
1790                                 }
1791                             }
1792                             else {
1793                                 /* we must be accepting here */
1794                                 DEBUG_TRIE_EXECUTE_r(
1795                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1796                                 failed = 1;
1797                                 break;
1798                             }
1799                         } while(state);
1800                         uc += len;
1801                         if (failed) {
1802                             if (leftmost)
1803                                 break;
1804                             if (!state) state = 1;
1805                         }
1806                     }
1807                     if ( aho->states[ state ].wordnum ) {
1808                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1809                         if (!leftmost || lpos < leftmost) {
1810                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1811                             leftmost = lpos;
1812                         }
1813                     }
1814                     if (leftmost) {
1815                         s = (char*)leftmost;
1816                         DEBUG_TRIE_EXECUTE_r({
1817                             PerlIO_printf(
1818                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1819                                 (UV)accepted_word, (IV)(s - real_start)
1820                             );
1821                         });
1822                         if (!reginfo || regtry(reginfo, &s)) {
1823                             FREETMPS;
1824 		            LEAVE;
1825                             goto got_it;
1826                         }
1827                         s = HOPc(s,1);
1828                         DEBUG_TRIE_EXECUTE_r({
1829                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1830                         });
1831                     } else {
1832                         DEBUG_TRIE_EXECUTE_r(
1833                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1834                         break;
1835                     }
1836                 }
1837                 FREETMPS;
1838                 LEAVE;
1839 	    }
1840 	    break;
1841 	default:
1842 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1843 	    break;
1844 	}
1845 	return 0;
1846       got_it:
1847 	return s;
1848 }
1849 
1850 
1851 /*
1852  - regexec_flags - match a regexp against a string
1853  */
1854 I32
1855 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1856 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1857 /* strend: pointer to null at end of string */
1858 /* strbeg: real beginning of string */
1859 /* minend: end of match must be >=minend after stringarg. */
1860 /* data: May be used for some additional optimizations.
1861          Currently its only used, with a U32 cast, for transmitting
1862          the ganch offset when doing a /g match. This will change */
1863 /* nosave: For optimizations. */
1864 {
1865     dVAR;
1866     struct regexp *const prog = (struct regexp *)SvANY(rx);
1867     /*register*/ char *s;
1868     register regnode *c;
1869     /*register*/ char *startpos = stringarg;
1870     I32 minlen;		/* must match at least this many chars */
1871     I32 dontbother = 0;	/* how many characters not to try at end */
1872     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1873     I32 scream_pos = -1;		/* Internal iterator of scream. */
1874     char *scream_olds = NULL;
1875     const bool do_utf8 = (bool)DO_UTF8(sv);
1876     I32 multiline;
1877     RXi_GET_DECL(prog,progi);
1878     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1879     regexp_paren_pair *swap = NULL;
1880     GET_RE_DEBUG_FLAGS_DECL;
1881 
1882     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1883     PERL_UNUSED_ARG(data);
1884 
1885     /* Be paranoid... */
1886     if (prog == NULL || startpos == NULL) {
1887 	Perl_croak(aTHX_ "NULL regexp parameter");
1888 	return 0;
1889     }
1890 
1891     multiline = prog->extflags & RXf_PMf_MULTILINE;
1892     reginfo.prog = rx;	 /* Yes, sorry that this is confusing.  */
1893 
1894     RX_MATCH_UTF8_set(rx, do_utf8);
1895     DEBUG_EXECUTE_r(
1896         debug_start_match(rx, do_utf8, startpos, strend,
1897         "Matching");
1898     );
1899 
1900     minlen = prog->minlen;
1901 
1902     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1903         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1904 			      "String too short [regexec_flags]...\n"));
1905 	goto phooey;
1906     }
1907 
1908 
1909     /* Check validity of program. */
1910     if (UCHARAT(progi->program) != REG_MAGIC) {
1911 	Perl_croak(aTHX_ "corrupted regexp program");
1912     }
1913 
1914     PL_reg_flags = 0;
1915     PL_reg_eval_set = 0;
1916     PL_reg_maxiter = 0;
1917 
1918     if (RX_UTF8(rx))
1919 	PL_reg_flags |= RF_utf8;
1920 
1921     /* Mark beginning of line for ^ and lookbehind. */
1922     reginfo.bol = startpos; /* XXX not used ??? */
1923     PL_bostr  = strbeg;
1924     reginfo.sv = sv;
1925 
1926     /* Mark end of line for $ (and such) */
1927     PL_regeol = strend;
1928 
1929     /* see how far we have to get to not match where we matched before */
1930     reginfo.till = startpos+minend;
1931 
1932     /* If there is a "must appear" string, look for it. */
1933     s = startpos;
1934 
1935     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1936 	MAGIC *mg;
1937 	if (flags & REXEC_IGNOREPOS){	/* Means: check only at start */
1938 	    reginfo.ganch = startpos + prog->gofs;
1939 	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1940 	      "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1941 	} else if (sv && SvTYPE(sv) >= SVt_PVMG
1942 		  && SvMAGIC(sv)
1943 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1944 		  && mg->mg_len >= 0) {
1945 	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
1946 	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1947 		"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1948 
1949 	    if (prog->extflags & RXf_ANCH_GPOS) {
1950 	        if (s > reginfo.ganch)
1951 		    goto phooey;
1952 		s = reginfo.ganch - prog->gofs;
1953 	        DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1954 		     "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1955 		if (s < strbeg)
1956 		    goto phooey;
1957 	    }
1958 	}
1959 	else if (data) {
1960 	    reginfo.ganch = strbeg + PTR2UV(data);
1961             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1962 		 "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1963 
1964 	} else {				/* pos() not defined */
1965 	    reginfo.ganch = strbeg;
1966             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1967 		 "GPOS: reginfo.ganch = strbeg\n"));
1968 	}
1969     }
1970     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1971         /* We have to be careful. If the previous successful match
1972            was from this regex we don't want a subsequent partially
1973            successful match to clobber the old results.
1974            So when we detect this possibility we add a swap buffer
1975            to the re, and switch the buffer each match. If we fail
1976            we switch it back, otherwise we leave it swapped.
1977         */
1978         swap = prog->offs;
1979         /* do we need a save destructor here for eval dies? */
1980         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1981     }
1982     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1983 	re_scream_pos_data d;
1984 
1985 	d.scream_olds = &scream_olds;
1986 	d.scream_pos = &scream_pos;
1987 	s = re_intuit_start(rx, sv, s, strend, flags, &d);
1988 	if (!s) {
1989 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1990 	    goto phooey;	/* not present */
1991 	}
1992     }
1993 
1994 
1995 
1996     /* Simplest case:  anchored match need be tried only once. */
1997     /*  [unless only anchor is BOL and multiline is set] */
1998     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1999 	if (s == startpos && regtry(&reginfo, &startpos))
2000 	    goto got_it;
2001 	else if (multiline || (prog->intflags & PREGf_IMPLICIT)
2002 		 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
2003 	{
2004 	    char *end;
2005 
2006 	    if (minlen)
2007 		dontbother = minlen - 1;
2008 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
2009 	    /* for multiline we only have to try after newlines */
2010 	    if (prog->check_substr || prog->check_utf8) {
2011                 /* because of the goto we can not easily reuse the macros for bifurcating the
2012                    unicode/non-unicode match modes here like we do elsewhere - demerphq */
2013                 if (do_utf8) {
2014                     if (s == startpos)
2015                         goto after_try_utf8;
2016                     while (1) {
2017                         if (regtry(&reginfo, &s)) {
2018                             goto got_it;
2019                         }
2020                       after_try_utf8:
2021                         if (s > end) {
2022                             goto phooey;
2023                         }
2024                         if (prog->extflags & RXf_USE_INTUIT) {
2025                             s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
2026                             if (!s) {
2027                                 goto phooey;
2028                             }
2029                         }
2030                         else {
2031                             s += UTF8SKIP(s);
2032                         }
2033                     }
2034                 } /* end search for check string in unicode */
2035                 else {
2036                     if (s == startpos) {
2037                         goto after_try_latin;
2038                     }
2039                     while (1) {
2040                         if (regtry(&reginfo, &s)) {
2041                             goto got_it;
2042                         }
2043                       after_try_latin:
2044                         if (s > end) {
2045                             goto phooey;
2046                         }
2047                         if (prog->extflags & RXf_USE_INTUIT) {
2048                             s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2049                             if (!s) {
2050                                 goto phooey;
2051                             }
2052                         }
2053                         else {
2054                             s++;
2055                         }
2056                     }
2057                 } /* end search for check string in latin*/
2058 	    } /* end search for check string */
2059 	    else { /* search for newline */
2060 		if (s > startpos) {
2061                     /*XXX: The s-- is almost definitely wrong here under unicode - demeprhq*/
2062 		    s--;
2063 		}
2064                 /* We can use a more efficient search as newlines are the same in unicode as they are in latin */
2065 		while (s < end) {
2066 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
2067 			if (regtry(&reginfo, &s))
2068 			    goto got_it;
2069 		    }
2070 		}
2071 	    } /* end search for newline */
2072 	} /* end anchored/multiline check string search */
2073 	goto phooey;
2074     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2075     {
2076         /* the warning about reginfo.ganch being used without intialization
2077            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2078            and we only enter this block when the same bit is set. */
2079         char *tmp_s = reginfo.ganch - prog->gofs;
2080 
2081 	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2082 	    goto got_it;
2083 	goto phooey;
2084     }
2085 
2086     /* Messy cases:  unanchored match. */
2087     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2088 	/* we have /x+whatever/ */
2089 	/* it must be a one character string (XXXX Except UTF?) */
2090 	char ch;
2091 #ifdef DEBUGGING
2092 	int did_match = 0;
2093 #endif
2094 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2095 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2096 	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
2097 
2098 	if (do_utf8) {
2099 	    REXEC_FBC_SCAN(
2100 		if (*s == ch) {
2101 		    DEBUG_EXECUTE_r( did_match = 1 );
2102 		    if (regtry(&reginfo, &s)) goto got_it;
2103 		    s += UTF8SKIP(s);
2104 		    while (s < strend && *s == ch)
2105 			s += UTF8SKIP(s);
2106 		}
2107 	    );
2108 	}
2109 	else {
2110 	    REXEC_FBC_SCAN(
2111 		if (*s == ch) {
2112 		    DEBUG_EXECUTE_r( did_match = 1 );
2113 		    if (regtry(&reginfo, &s)) goto got_it;
2114 		    s++;
2115 		    while (s < strend && *s == ch)
2116 			s++;
2117 		}
2118 	    );
2119 	}
2120 	DEBUG_EXECUTE_r(if (!did_match)
2121 		PerlIO_printf(Perl_debug_log,
2122                                   "Did not find anchored character...\n")
2123                );
2124     }
2125     else if (prog->anchored_substr != NULL
2126 	      || prog->anchored_utf8 != NULL
2127 	      || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2128 		  && prog->float_max_offset < strend - s)) {
2129 	SV *must;
2130 	I32 back_max;
2131 	I32 back_min;
2132 	char *last;
2133 	char *last1;		/* Last position checked before */
2134 #ifdef DEBUGGING
2135 	int did_match = 0;
2136 #endif
2137 	if (prog->anchored_substr || prog->anchored_utf8) {
2138 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2139 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2140 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
2141 	    back_max = back_min = prog->anchored_offset;
2142 	} else {
2143 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2144 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2145 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
2146 	    back_max = prog->float_max_offset;
2147 	    back_min = prog->float_min_offset;
2148 	}
2149 
2150 
2151 	if (must == &PL_sv_undef)
2152 	    /* could not downgrade utf8 check substring, so must fail */
2153 	    goto phooey;
2154 
2155         if (back_min<0) {
2156 	    last = strend;
2157 	} else {
2158             last = HOP3c(strend,	/* Cannot start after this */
2159         	  -(I32)(CHR_SVLEN(must)
2160         		 - (SvTAIL(must) != 0) + back_min), strbeg);
2161         }
2162 	if (s > PL_bostr)
2163 	    last1 = HOPc(s, -1);
2164 	else
2165 	    last1 = s - 1;	/* bogus */
2166 
2167 	/* XXXX check_substr already used to find "s", can optimize if
2168 	   check_substr==must. */
2169 	scream_pos = -1;
2170 	dontbother = end_shift;
2171 	strend = HOPc(strend, -dontbother);
2172 	while ( (s <= last) &&
2173 		((flags & REXEC_SCREAM)
2174 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2175 				    end_shift, &scream_pos, 0))
2176 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2177 				  (unsigned char*)strend, must,
2178 				  multiline ? FBMrf_MULTILINE : 0))) ) {
2179 	    /* we may be pointing at the wrong string */
2180 	    if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2181 		s = strbeg + (s - SvPVX_const(sv));
2182 	    DEBUG_EXECUTE_r( did_match = 1 );
2183 	    if (HOPc(s, -back_max) > last1) {
2184 		last1 = HOPc(s, -back_min);
2185 		s = HOPc(s, -back_max);
2186 	    }
2187 	    else {
2188 		char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2189 
2190 		last1 = HOPc(s, -back_min);
2191 		s = t;
2192 	    }
2193 	    if (do_utf8) {
2194 		while (s <= last1) {
2195 		    if (regtry(&reginfo, &s))
2196 			goto got_it;
2197 		    s += UTF8SKIP(s);
2198 		}
2199 	    }
2200 	    else {
2201 		while (s <= last1) {
2202 		    if (regtry(&reginfo, &s))
2203 			goto got_it;
2204 		    s++;
2205 		}
2206 	    }
2207 	}
2208 	DEBUG_EXECUTE_r(if (!did_match) {
2209             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2210                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2211             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2212 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
2213 			       ? "anchored" : "floating"),
2214                 quoted, RE_SV_TAIL(must));
2215         });
2216 	goto phooey;
2217     }
2218     else if ( (c = progi->regstclass) ) {
2219 	if (minlen) {
2220 	    const OPCODE op = OP(progi->regstclass);
2221 	    /* don't bother with what can't match */
2222 	    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2223 	        strend = HOPc(strend, -(minlen - 1));
2224 	}
2225 	DEBUG_EXECUTE_r({
2226 	    SV * const prop = sv_newmortal();
2227 	    regprop(prog, prop, c);
2228 	    {
2229 		RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2230 		    s,strend-s,60);
2231 		PerlIO_printf(Perl_debug_log,
2232 		    "Matching stclass %.*s against %s (%d chars)\n",
2233 		    (int)SvCUR(prop), SvPVX_const(prop),
2234 		     quoted, (int)(strend - s));
2235 	    }
2236 	});
2237         if (find_byclass(prog, c, s, strend, &reginfo))
2238 	    goto got_it;
2239 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2240     }
2241     else {
2242 	dontbother = 0;
2243 	if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2244 	    /* Trim the end. */
2245 	    char *last;
2246 	    SV* float_real;
2247 
2248 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2249 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2250 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2251 
2252 	    if (flags & REXEC_SCREAM) {
2253 		last = screaminstr(sv, float_real, s - strbeg,
2254 				   end_shift, &scream_pos, 1); /* last one */
2255 		if (!last)
2256 		    last = scream_olds; /* Only one occurrence. */
2257 		/* we may be pointing at the wrong string */
2258 		else if (RXp_MATCH_COPIED(prog))
2259 		    s = strbeg + (s - SvPVX_const(sv));
2260 	    }
2261 	    else {
2262 		STRLEN len;
2263                 const char * const little = SvPV_const(float_real, len);
2264 
2265 		if (SvTAIL(float_real)) {
2266 		    if (memEQ(strend - len + 1, little, len - 1))
2267 			last = strend - len + 1;
2268 		    else if (!multiline)
2269 			last = memEQ(strend - len, little, len)
2270 			    ? strend - len : NULL;
2271 		    else
2272 			goto find_last;
2273 		} else {
2274 		  find_last:
2275 		    if (len)
2276 			last = rninstr(s, strend, little, little + len);
2277 		    else
2278 			last = strend;	/* matching "$" */
2279 		}
2280 	    }
2281 	    if (last == NULL) {
2282 		DEBUG_EXECUTE_r(
2283 		    PerlIO_printf(Perl_debug_log,
2284 			"%sCan't trim the tail, match fails (should not happen)%s\n",
2285 	                PL_colors[4], PL_colors[5]));
2286 		goto phooey; /* Should not happen! */
2287 	    }
2288 	    dontbother = strend - last + prog->float_min_offset;
2289 	}
2290 	if (minlen && (dontbother < minlen))
2291 	    dontbother = minlen - 1;
2292 	strend -= dontbother; 		   /* this one's always in bytes! */
2293 	/* We don't know much -- general case. */
2294 	if (do_utf8) {
2295 	    for (;;) {
2296 		if (regtry(&reginfo, &s))
2297 		    goto got_it;
2298 		if (s >= strend)
2299 		    break;
2300 		s += UTF8SKIP(s);
2301 	    };
2302 	}
2303 	else {
2304 	    do {
2305 		if (regtry(&reginfo, &s))
2306 		    goto got_it;
2307 	    } while (s++ < strend);
2308 	}
2309     }
2310 
2311     /* Failure. */
2312     goto phooey;
2313 
2314 got_it:
2315     Safefree(swap);
2316     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2317 
2318     if (PL_reg_eval_set)
2319 	restore_pos(aTHX_ prog);
2320     if (RXp_PAREN_NAMES(prog))
2321         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2322 
2323     /* make sure $`, $&, $', and $digit will work later */
2324     if ( !(flags & REXEC_NOT_FIRST) ) {
2325 	RX_MATCH_COPY_FREE(rx);
2326 	if (flags & REXEC_COPY_STR) {
2327 	    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2328 #ifdef PERL_OLD_COPY_ON_WRITE
2329 	    if ((SvIsCOW(sv)
2330 		 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2331 		if (DEBUG_C_TEST) {
2332 		    PerlIO_printf(Perl_debug_log,
2333 				  "Copy on write: regexp capture, type %d\n",
2334 				  (int) SvTYPE(sv));
2335 		}
2336 		prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2337 		prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2338 		assert (SvPOKp(prog->saved_copy));
2339 	    } else
2340 #endif
2341 	    {
2342 		RX_MATCH_COPIED_on(rx);
2343 		s = savepvn(strbeg, i);
2344 		prog->subbeg = s;
2345 	    }
2346 	    prog->sublen = i;
2347 	}
2348 	else {
2349 	    prog->subbeg = strbeg;
2350 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2351 	}
2352     }
2353 
2354     return 1;
2355 
2356 phooey:
2357     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2358 			  PL_colors[4], PL_colors[5]));
2359     if (PL_reg_eval_set)
2360 	restore_pos(aTHX_ prog);
2361     if (swap) {
2362         /* we failed :-( roll it back */
2363         Safefree(prog->offs);
2364         prog->offs = swap;
2365     }
2366 
2367     return 0;
2368 }
2369 
2370 
2371 /*
2372  - regtry - try match at specific point
2373  */
2374 STATIC I32			/* 0 failure, 1 success */
2375 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2376 {
2377     dVAR;
2378     CHECKPOINT lastcp;
2379     REGEXP *const rx = reginfo->prog;
2380     regexp *const prog = (struct regexp *)SvANY(rx);
2381     RXi_GET_DECL(prog,progi);
2382     GET_RE_DEBUG_FLAGS_DECL;
2383 
2384     PERL_ARGS_ASSERT_REGTRY;
2385 
2386     reginfo->cutpoint=NULL;
2387 
2388     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2389 	MAGIC *mg;
2390 
2391 	PL_reg_eval_set = RS_init;
2392 	DEBUG_EXECUTE_r(DEBUG_s(
2393 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2394 			  (IV)(PL_stack_sp - PL_stack_base));
2395 	    ));
2396 	SAVESTACK_CXPOS();
2397 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2398 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2399 	SAVETMPS;
2400 	/* Apparently this is not needed, judging by wantarray. */
2401 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2402 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2403 
2404 	if (reginfo->sv) {
2405 	    /* Make $_ available to executed code. */
2406 	    if (reginfo->sv != DEFSV) {
2407 		SAVE_DEFSV;
2408 		DEFSV_set(reginfo->sv);
2409 	    }
2410 
2411 	    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2412 		  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2413 		/* prepare for quick setting of pos */
2414 #ifdef PERL_OLD_COPY_ON_WRITE
2415 		if (SvIsCOW(reginfo->sv))
2416 		    sv_force_normal_flags(reginfo->sv, 0);
2417 #endif
2418 		mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2419 				 &PL_vtbl_mglob, NULL, 0);
2420 		mg->mg_len = -1;
2421 	    }
2422 	    PL_reg_magic    = mg;
2423 	    PL_reg_oldpos   = mg->mg_len;
2424 	    SAVEDESTRUCTOR_X(restore_pos, prog);
2425         }
2426         if (!PL_reg_curpm) {
2427 	    Newxz(PL_reg_curpm, 1, PMOP);
2428 #ifdef USE_ITHREADS
2429             {
2430 		SV* const repointer = &PL_sv_undef;
2431                 /* this regexp is also owned by the new PL_reg_curpm, which
2432 		   will try to free it.  */
2433                 av_push(PL_regex_padav, repointer);
2434                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2435                 PL_regex_pad = AvARRAY(PL_regex_padav);
2436             }
2437 #endif
2438         }
2439 #ifdef USE_ITHREADS
2440 	/* It seems that non-ithreads works both with and without this code.
2441 	   So for efficiency reasons it seems best not to have the code
2442 	   compiled when it is not needed.  */
2443 	/* This is safe against NULLs: */
2444 	ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2445 	/* PM_reg_curpm owns a reference to this regexp.  */
2446 	ReREFCNT_inc(rx);
2447 #endif
2448 	PM_SETRE(PL_reg_curpm, rx);
2449 	PL_reg_oldcurpm = PL_curpm;
2450 	PL_curpm = PL_reg_curpm;
2451 	if (RXp_MATCH_COPIED(prog)) {
2452 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2453 		since it may be needed if this match fails.  Thus
2454 		$` inside (?{}) could fail... */
2455 	    PL_reg_oldsaved = prog->subbeg;
2456 	    PL_reg_oldsavedlen = prog->sublen;
2457 #ifdef PERL_OLD_COPY_ON_WRITE
2458 	    PL_nrs = prog->saved_copy;
2459 #endif
2460 	    RXp_MATCH_COPIED_off(prog);
2461 	}
2462 	else
2463 	    PL_reg_oldsaved = NULL;
2464 	prog->subbeg = PL_bostr;
2465 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2466     }
2467     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2468     prog->offs[0].start = *startpos - PL_bostr;
2469     PL_reginput = *startpos;
2470     PL_reglastparen = &prog->lastparen;
2471     PL_reglastcloseparen = &prog->lastcloseparen;
2472     prog->lastparen = 0;
2473     prog->lastcloseparen = 0;
2474     PL_regsize = 0;
2475     PL_regoffs = prog->offs;
2476     if (PL_reg_start_tmpl <= prog->nparens) {
2477 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2478         if(PL_reg_start_tmp)
2479             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2480         else
2481             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2482     }
2483 
2484     /* XXXX What this code is doing here?!!!  There should be no need
2485        to do this again and again, PL_reglastparen should take care of
2486        this!  --ilya*/
2487 
2488     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2489      * Actually, the code in regcppop() (which Ilya may be meaning by
2490      * PL_reglastparen), is not needed at all by the test suite
2491      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2492      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2493      * Meanwhile, this code *is* needed for the
2494      * above-mentioned test suite tests to succeed.  The common theme
2495      * on those tests seems to be returning null fields from matches.
2496      * --jhi updated by dapm */
2497 #if 1
2498     if (prog->nparens) {
2499 	regexp_paren_pair *pp = PL_regoffs;
2500 	register I32 i;
2501 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2502 	    ++pp;
2503 	    pp->start = -1;
2504 	    pp->end = -1;
2505 	}
2506     }
2507 #endif
2508     REGCP_SET(lastcp);
2509     if (regmatch(reginfo, progi->program + 1)) {
2510 	PL_regoffs[0].end = PL_reginput - PL_bostr;
2511 	return 1;
2512     }
2513     if (reginfo->cutpoint)
2514         *startpos= reginfo->cutpoint;
2515     REGCP_UNWIND(lastcp);
2516     return 0;
2517 }
2518 
2519 
2520 #define sayYES goto yes
2521 #define sayNO goto no
2522 #define sayNO_SILENT goto no_silent
2523 
2524 /* we dont use STMT_START/END here because it leads to
2525    "unreachable code" warnings, which are bogus, but distracting. */
2526 #define CACHEsayNO \
2527     if (ST.cache_mask) \
2528        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2529     sayNO
2530 
2531 /* this is used to determine how far from the left messages like
2532    'failed...' are printed. It should be set such that messages
2533    are inline with the regop output that created them.
2534 */
2535 #define REPORT_CODE_OFF 32
2536 
2537 
2538 /* Make sure there is a test for this +1 options in re_tests */
2539 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2540 
2541 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2542 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2543 
2544 #define SLAB_FIRST(s) (&(s)->states[0])
2545 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2546 
2547 /* grab a new slab and return the first slot in it */
2548 
2549 STATIC regmatch_state *
2550 S_push_slab(pTHX)
2551 {
2552 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2553     dMY_CXT;
2554 #endif
2555     regmatch_slab *s = PL_regmatch_slab->next;
2556     if (!s) {
2557 	Newx(s, 1, regmatch_slab);
2558 	s->prev = PL_regmatch_slab;
2559 	s->next = NULL;
2560 	PL_regmatch_slab->next = s;
2561     }
2562     PL_regmatch_slab = s;
2563     return SLAB_FIRST(s);
2564 }
2565 
2566 
2567 /* push a new state then goto it */
2568 
2569 #define PUSH_STATE_GOTO(state, node) \
2570     scan = node; \
2571     st->resume_state = state; \
2572     goto push_state;
2573 
2574 /* push a new state with success backtracking, then goto it */
2575 
2576 #define PUSH_YES_STATE_GOTO(state, node) \
2577     scan = node; \
2578     st->resume_state = state; \
2579     goto push_yes_state;
2580 
2581 
2582 
2583 /*
2584 
2585 regmatch() - main matching routine
2586 
2587 This is basically one big switch statement in a loop. We execute an op,
2588 set 'next' to point the next op, and continue. If we come to a point which
2589 we may need to backtrack to on failure such as (A|B|C), we push a
2590 backtrack state onto the backtrack stack. On failure, we pop the top
2591 state, and re-enter the loop at the state indicated. If there are no more
2592 states to pop, we return failure.
2593 
2594 Sometimes we also need to backtrack on success; for example /A+/, where
2595 after successfully matching one A, we need to go back and try to
2596 match another one; similarly for lookahead assertions: if the assertion
2597 completes successfully, we backtrack to the state just before the assertion
2598 and then carry on.  In these cases, the pushed state is marked as
2599 'backtrack on success too'. This marking is in fact done by a chain of
2600 pointers, each pointing to the previous 'yes' state. On success, we pop to
2601 the nearest yes state, discarding any intermediate failure-only states.
2602 Sometimes a yes state is pushed just to force some cleanup code to be
2603 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2604 it to free the inner regex.
2605 
2606 Note that failure backtracking rewinds the cursor position, while
2607 success backtracking leaves it alone.
2608 
2609 A pattern is complete when the END op is executed, while a subpattern
2610 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2611 ops trigger the "pop to last yes state if any, otherwise return true"
2612 behaviour.
2613 
2614 A common convention in this function is to use A and B to refer to the two
2615 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2616 the subpattern to be matched possibly multiple times, while B is the entire
2617 rest of the pattern. Variable and state names reflect this convention.
2618 
2619 The states in the main switch are the union of ops and failure/success of
2620 substates associated with with that op.  For example, IFMATCH is the op
2621 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2622 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2623 successfully matched A and IFMATCH_A_fail is a state saying that we have
2624 just failed to match A. Resume states always come in pairs. The backtrack
2625 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2626 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2627 on success or failure.
2628 
2629 The struct that holds a backtracking state is actually a big union, with
2630 one variant for each major type of op. The variable st points to the
2631 top-most backtrack struct. To make the code clearer, within each
2632 block of code we #define ST to alias the relevant union.
2633 
2634 Here's a concrete example of a (vastly oversimplified) IFMATCH
2635 implementation:
2636 
2637     switch (state) {
2638     ....
2639 
2640 #define ST st->u.ifmatch
2641 
2642     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2643 	ST.foo = ...; // some state we wish to save
2644 	...
2645 	// push a yes backtrack state with a resume value of
2646 	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2647 	// first node of A:
2648 	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2649 	// NOTREACHED
2650 
2651     case IFMATCH_A: // we have successfully executed A; now continue with B
2652 	next = B;
2653 	bar = ST.foo; // do something with the preserved value
2654 	break;
2655 
2656     case IFMATCH_A_fail: // A failed, so the assertion failed
2657 	...;   // do some housekeeping, then ...
2658 	sayNO; // propagate the failure
2659 
2660 #undef ST
2661 
2662     ...
2663     }
2664 
2665 For any old-timers reading this who are familiar with the old recursive
2666 approach, the code above is equivalent to:
2667 
2668     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2669     {
2670 	int foo = ...
2671 	...
2672 	if (regmatch(A)) {
2673 	    next = B;
2674 	    bar = foo;
2675 	    break;
2676 	}
2677 	...;   // do some housekeeping, then ...
2678 	sayNO; // propagate the failure
2679     }
2680 
2681 The topmost backtrack state, pointed to by st, is usually free. If you
2682 want to claim it, populate any ST.foo fields in it with values you wish to
2683 save, then do one of
2684 
2685 	PUSH_STATE_GOTO(resume_state, node);
2686 	PUSH_YES_STATE_GOTO(resume_state, node);
2687 
2688 which sets that backtrack state's resume value to 'resume_state', pushes a
2689 new free entry to the top of the backtrack stack, then goes to 'node'.
2690 On backtracking, the free slot is popped, and the saved state becomes the
2691 new free state. An ST.foo field in this new top state can be temporarily
2692 accessed to retrieve values, but once the main loop is re-entered, it
2693 becomes available for reuse.
2694 
2695 Note that the depth of the backtrack stack constantly increases during the
2696 left-to-right execution of the pattern, rather than going up and down with
2697 the pattern nesting. For example the stack is at its maximum at Z at the
2698 end of the pattern, rather than at X in the following:
2699 
2700     /(((X)+)+)+....(Y)+....Z/
2701 
2702 The only exceptions to this are lookahead/behind assertions and the cut,
2703 (?>A), which pop all the backtrack states associated with A before
2704 continuing.
2705 
2706 Bascktrack state structs are allocated in slabs of about 4K in size.
2707 PL_regmatch_state and st always point to the currently active state,
2708 and PL_regmatch_slab points to the slab currently containing
2709 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2710 allocated, and is never freed until interpreter destruction. When the slab
2711 is full, a new one is allocated and chained to the end. At exit from
2712 regmatch(), slabs allocated since entry are freed.
2713 
2714 */
2715 
2716 
2717 #define DEBUG_STATE_pp(pp)				    \
2718     DEBUG_STATE_r({					    \
2719 	DUMP_EXEC_POS(locinput, scan, do_utf8);		    \
2720 	PerlIO_printf(Perl_debug_log,			    \
2721 	    "    %*s"pp" %s%s%s%s%s\n",			    \
2722 	    depth*2, "",				    \
2723 	    PL_reg_name[st->resume_state],                     \
2724 	    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2725 	    ((st==yes_state) ? "Y" : ""),                   \
2726 	    ((st==mark_state) ? "M" : ""),                  \
2727 	    ((st==yes_state||st==mark_state) ? "]" : "")    \
2728 	);                                                  \
2729     });
2730 
2731 
2732 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2733 
2734 #ifdef DEBUGGING
2735 
2736 STATIC void
2737 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2738     const char *start, const char *end, const char *blurb)
2739 {
2740     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2741 
2742     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2743 
2744     if (!PL_colorset)
2745             reginitcolors();
2746     {
2747         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2748             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2749 
2750         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2751             start, end - start, 60);
2752 
2753         PerlIO_printf(Perl_debug_log,
2754             "%s%s REx%s %s against %s\n",
2755 		       PL_colors[4], blurb, PL_colors[5], s0, s1);
2756 
2757         if (do_utf8||utf8_pat)
2758             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2759                 utf8_pat ? "pattern" : "",
2760                 utf8_pat && do_utf8 ? " and " : "",
2761                 do_utf8 ? "string" : ""
2762             );
2763     }
2764 }
2765 
2766 STATIC void
2767 S_dump_exec_pos(pTHX_ const char *locinput,
2768                       const regnode *scan,
2769                       const char *loc_regeol,
2770                       const char *loc_bostr,
2771                       const char *loc_reg_starttry,
2772                       const bool do_utf8)
2773 {
2774     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2775     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2776     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2777     /* The part of the string before starttry has one color
2778        (pref0_len chars), between starttry and current
2779        position another one (pref_len - pref0_len chars),
2780        after the current position the third one.
2781        We assume that pref0_len <= pref_len, otherwise we
2782        decrease pref0_len.  */
2783     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2784 	? (5 + taill) - l : locinput - loc_bostr;
2785     int pref0_len;
2786 
2787     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2788 
2789     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2790 	pref_len++;
2791     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2792     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2793 	l = ( loc_regeol - locinput > (5 + taill) - pref_len
2794 	      ? (5 + taill) - pref_len : loc_regeol - locinput);
2795     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2796 	l--;
2797     if (pref0_len < 0)
2798 	pref0_len = 0;
2799     if (pref0_len > pref_len)
2800 	pref0_len = pref_len;
2801     {
2802 	const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2803 
2804 	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2805 	    (locinput - pref_len),pref0_len, 60, 4, 5);
2806 
2807 	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2808 		    (locinput - pref_len + pref0_len),
2809 		    pref_len - pref0_len, 60, 2, 3);
2810 
2811 	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2812 		    locinput, loc_regeol - locinput, 10, 0, 1);
2813 
2814 	const STRLEN tlen=len0+len1+len2;
2815 	PerlIO_printf(Perl_debug_log,
2816 		    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2817 		    (IV)(locinput - loc_bostr),
2818 		    len0, s0,
2819 		    len1, s1,
2820 		    (docolor ? "" : "> <"),
2821 		    len2, s2,
2822 		    (int)(tlen > 19 ? 0 :  19 - tlen),
2823 		    "");
2824     }
2825 }
2826 
2827 #endif
2828 
2829 /* reg_check_named_buff_matched()
2830  * Checks to see if a named buffer has matched. The data array of
2831  * buffer numbers corresponding to the buffer is expected to reside
2832  * in the regexp->data->data array in the slot stored in the ARG() of
2833  * node involved. Note that this routine doesn't actually care about the
2834  * name, that information is not preserved from compilation to execution.
2835  * Returns the index of the leftmost defined buffer with the given name
2836  * or 0 if non of the buffers matched.
2837  */
2838 STATIC I32
2839 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2840 {
2841     I32 n;
2842     RXi_GET_DECL(rex,rexi);
2843     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2844     I32 *nums=(I32*)SvPVX(sv_dat);
2845 
2846     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2847 
2848     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2849         if ((I32)*PL_reglastparen >= nums[n] &&
2850             PL_regoffs[nums[n]].end != -1)
2851         {
2852             return nums[n];
2853         }
2854     }
2855     return 0;
2856 }
2857 
2858 
2859 /* free all slabs above current one  - called during LEAVE_SCOPE */
2860 
2861 STATIC void
2862 S_clear_backtrack_stack(pTHX_ void *p)
2863 {
2864     regmatch_slab *s = PL_regmatch_slab->next;
2865     PERL_UNUSED_ARG(p);
2866 
2867     if (!s)
2868 	return;
2869     PL_regmatch_slab->next = NULL;
2870     while (s) {
2871 	regmatch_slab * const osl = s;
2872 	s = s->next;
2873 	Safefree(osl);
2874     }
2875 }
2876 
2877 
2878 #define SETREX(Re1,Re2) \
2879     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2880     Re1 = (Re2)
2881 
2882 STATIC I32			/* 0 failure, 1 success */
2883 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2884 {
2885 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2886     dMY_CXT;
2887 #endif
2888     dVAR;
2889     register const bool do_utf8 = PL_reg_match_utf8;
2890     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2891     REGEXP *rex_sv = reginfo->prog;
2892     regexp *rex = (struct regexp *)SvANY(rex_sv);
2893     RXi_GET_DECL(rex,rexi);
2894     I32	oldsave;
2895     /* the current state. This is a cached copy of PL_regmatch_state */
2896     register regmatch_state *st;
2897     /* cache heavy used fields of st in registers */
2898     register regnode *scan;
2899     register regnode *next;
2900     register U32 n = 0;	/* general value; init to avoid compiler warning */
2901     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2902     register char *locinput = PL_reginput;
2903     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2904 
2905     bool result = 0;	    /* return value of S_regmatch */
2906     int depth = 0;	    /* depth of backtrack stack */
2907     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2908     const U32 max_nochange_depth =
2909         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2910         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2911     regmatch_state *yes_state = NULL; /* state to pop to on success of
2912 							    subpattern */
2913     /* mark_state piggy backs on the yes_state logic so that when we unwind
2914        the stack on success we can update the mark_state as we go */
2915     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2916     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2917     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2918     U32 state_num;
2919     bool no_final = 0;      /* prevent failure from backtracking? */
2920     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2921     char *startpoint = PL_reginput;
2922     SV *popmark = NULL;     /* are we looking for a mark? */
2923     SV *sv_commit = NULL;   /* last mark name seen in failure */
2924     SV *sv_yes_mark = NULL; /* last mark name we have seen
2925                                during a successfull match */
2926     U32 lastopen = 0;       /* last open we saw */
2927     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2928     SV* const oreplsv = GvSV(PL_replgv);
2929     /* these three flags are set by various ops to signal information to
2930      * the very next op. They have a useful lifetime of exactly one loop
2931      * iteration, and are not preserved or restored by state pushes/pops
2932      */
2933     bool sw = 0;	    /* the condition value in (?(cond)a|b) */
2934     bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
2935     int logical = 0;	    /* the following EVAL is:
2936 				0: (?{...})
2937 				1: (?(?{...})X|Y)
2938 				2: (??{...})
2939 			       or the following IFMATCH/UNLESSM is:
2940 			        false: plain (?=foo)
2941 				true:  used as a condition: (?(?=foo))
2942 			    */
2943 #ifdef DEBUGGING
2944     GET_RE_DEBUG_FLAGS_DECL;
2945 #endif
2946 
2947     PERL_ARGS_ASSERT_REGMATCH;
2948 
2949     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2950 	    PerlIO_printf(Perl_debug_log,"regmatch start\n");
2951     }));
2952     /* on first ever call to regmatch, allocate first slab */
2953     if (!PL_regmatch_slab) {
2954 	Newx(PL_regmatch_slab, 1, regmatch_slab);
2955 	PL_regmatch_slab->prev = NULL;
2956 	PL_regmatch_slab->next = NULL;
2957 	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2958     }
2959 
2960     oldsave = PL_savestack_ix;
2961     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2962     SAVEVPTR(PL_regmatch_slab);
2963     SAVEVPTR(PL_regmatch_state);
2964 
2965     /* grab next free state slot */
2966     st = ++PL_regmatch_state;
2967     if (st >  SLAB_LAST(PL_regmatch_slab))
2968 	st = PL_regmatch_state = S_push_slab(aTHX);
2969 
2970     /* Note that nextchr is a byte even in UTF */
2971     nextchr = UCHARAT(locinput);
2972     scan = prog;
2973     while (scan != NULL) {
2974 
2975         DEBUG_EXECUTE_r( {
2976 	    SV * const prop = sv_newmortal();
2977 	    regnode *rnext=regnext(scan);
2978 	    DUMP_EXEC_POS( locinput, scan, do_utf8 );
2979 	    regprop(rex, prop, scan);
2980 
2981 	    PerlIO_printf(Perl_debug_log,
2982 		    "%3"IVdf":%*s%s(%"IVdf")\n",
2983 		    (IV)(scan - rexi->program), depth*2, "",
2984 		    SvPVX_const(prop),
2985 		    (PL_regkind[OP(scan)] == END || !rnext) ?
2986 		        0 : (IV)(rnext - rexi->program));
2987 	});
2988 
2989 	next = scan + NEXT_OFF(scan);
2990 	if (next == scan)
2991 	    next = NULL;
2992 	state_num = OP(scan);
2993 
2994       reenter_switch:
2995 
2996 	assert(PL_reglastparen == &rex->lastparen);
2997 	assert(PL_reglastcloseparen == &rex->lastcloseparen);
2998 	assert(PL_regoffs == rex->offs);
2999 
3000 	switch (state_num) {
3001 	case BOL:
3002 	    if (locinput == PL_bostr)
3003 	    {
3004 		/* reginfo->till = reginfo->bol; */
3005 		break;
3006 	    }
3007 	    sayNO;
3008 	case MBOL:
3009 	    if (locinput == PL_bostr ||
3010 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
3011 	    {
3012 		break;
3013 	    }
3014 	    sayNO;
3015 	case SBOL:
3016 	    if (locinput == PL_bostr)
3017 		break;
3018 	    sayNO;
3019 	case GPOS:
3020 	    if (locinput == reginfo->ganch)
3021 		break;
3022 	    sayNO;
3023 
3024 	case KEEPS:
3025 	    /* update the startpoint */
3026 	    st->u.keeper.val = PL_regoffs[0].start;
3027 	    PL_reginput = locinput;
3028 	    PL_regoffs[0].start = locinput - PL_bostr;
3029 	    PUSH_STATE_GOTO(KEEPS_next, next);
3030 	    /*NOT-REACHED*/
3031 	case KEEPS_next_fail:
3032 	    /* rollback the start point change */
3033 	    PL_regoffs[0].start = st->u.keeper.val;
3034 	    sayNO_SILENT;
3035 	    /*NOT-REACHED*/
3036 	case EOL:
3037 		goto seol;
3038 	case MEOL:
3039 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3040 		sayNO;
3041 	    break;
3042 	case SEOL:
3043 	  seol:
3044 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
3045 		sayNO;
3046 	    if (PL_regeol - locinput > 1)
3047 		sayNO;
3048 	    break;
3049 	case EOS:
3050 	    if (PL_regeol != locinput)
3051 		sayNO;
3052 	    break;
3053 	case SANY:
3054 	    if (!nextchr && locinput >= PL_regeol)
3055 		sayNO;
3056  	    if (do_utf8) {
3057 	        locinput += PL_utf8skip[nextchr];
3058 		if (locinput > PL_regeol)
3059  		    sayNO;
3060  		nextchr = UCHARAT(locinput);
3061  	    }
3062  	    else
3063  		nextchr = UCHARAT(++locinput);
3064 	    break;
3065 	case CANY:
3066 	    if (!nextchr && locinput >= PL_regeol)
3067 		sayNO;
3068 	    nextchr = UCHARAT(++locinput);
3069 	    break;
3070 	case REG_ANY:
3071 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3072 		sayNO;
3073 	    if (do_utf8) {
3074 		locinput += PL_utf8skip[nextchr];
3075 		if (locinput > PL_regeol)
3076 		    sayNO;
3077 		nextchr = UCHARAT(locinput);
3078 	    }
3079 	    else
3080 		nextchr = UCHARAT(++locinput);
3081 	    break;
3082 
3083 #undef  ST
3084 #define ST st->u.trie
3085         case TRIEC:
3086             /* In this case the charclass data is available inline so
3087                we can fail fast without a lot of extra overhead.
3088              */
3089             if (scan->flags == EXACT || !do_utf8) {
3090                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3091                     DEBUG_EXECUTE_r(
3092                         PerlIO_printf(Perl_debug_log,
3093                     	          "%*s  %sfailed to match trie start class...%s\n",
3094                     	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3095                     );
3096                     sayNO_SILENT;
3097                     /* NOTREACHED */
3098                 }
3099             }
3100             /* FALL THROUGH */
3101 	case TRIE:
3102 	    {
3103                 /* what type of TRIE am I? (utf8 makes this contextual) */
3104                 DECL_TRIE_TYPE(scan);
3105 
3106                 /* what trie are we using right now */
3107 		reg_trie_data * const trie
3108         	    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3109 		HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3110                 U32 state = trie->startstate;
3111 
3112         	if (trie->bitmap && trie_type != trie_utf8_fold &&
3113         	    !TRIE_BITMAP_TEST(trie,*locinput)
3114         	) {
3115         	    if (trie->states[ state ].wordnum) {
3116         	         DEBUG_EXECUTE_r(
3117                             PerlIO_printf(Perl_debug_log,
3118                         	          "%*s  %smatched empty string...%s\n",
3119                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3120                         );
3121         	        break;
3122         	    } else {
3123         	        DEBUG_EXECUTE_r(
3124                             PerlIO_printf(Perl_debug_log,
3125                         	          "%*s  %sfailed to match trie start class...%s\n",
3126                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3127                         );
3128         	        sayNO_SILENT;
3129         	   }
3130                 }
3131 
3132             {
3133 		U8 *uc = ( U8* )locinput;
3134 
3135 		STRLEN len = 0;
3136 		STRLEN foldlen = 0;
3137 		U8 *uscan = (U8*)NULL;
3138 		STRLEN bufflen=0;
3139 		SV *sv_accept_buff = NULL;
3140 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3141 
3142 	    	ST.accepted = 0; /* how many accepting states we have seen */
3143 		ST.B = next;
3144 		ST.jump = trie->jump;
3145 		ST.me = scan;
3146 	        /*
3147         	   traverse the TRIE keeping track of all accepting states
3148         	   we transition through until we get to a failing node.
3149         	*/
3150 
3151 		while ( state && uc <= (U8*)PL_regeol ) {
3152                     U32 base = trie->states[ state ].trans.base;
3153                     UV uvc = 0;
3154                     U16 charid;
3155                     /* We use charid to hold the wordnum as we don't use it
3156                        for charid until after we have done the wordnum logic.
3157                        We define an alias just so that the wordnum logic reads
3158                        more naturally. */
3159 
3160 #define got_wordnum charid
3161                     got_wordnum = trie->states[ state ].wordnum;
3162 
3163 		    if ( got_wordnum ) {
3164 			if ( ! ST.accepted ) {
3165 			    ENTER;
3166 			    SAVETMPS; /* XXX is this necessary? dmq */
3167 			    bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3168 			    sv_accept_buff=newSV(bufflen *
3169 					    sizeof(reg_trie_accepted) - 1);
3170 			    SvCUR_set(sv_accept_buff, 0);
3171 			    SvPOK_on(sv_accept_buff);
3172 			    sv_2mortal(sv_accept_buff);
3173 			    SAVETMPS;
3174 			    ST.accept_buff =
3175 				(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3176 			}
3177 			do {
3178 			    if (ST.accepted >= bufflen) {
3179 				bufflen *= 2;
3180 				ST.accept_buff =(reg_trie_accepted*)
3181 				    SvGROW(sv_accept_buff,
3182 				       	bufflen * sizeof(reg_trie_accepted));
3183 			    }
3184 			    SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3185 				+ sizeof(reg_trie_accepted));
3186 
3187 
3188 			    ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3189 			    ST.accept_buff[ST.accepted].endpos = uc;
3190 			    ++ST.accepted;
3191 		        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3192 		    }
3193 #undef got_wordnum
3194 
3195 		    DEBUG_TRIE_EXECUTE_r({
3196 		                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3197 			        PerlIO_printf( Perl_debug_log,
3198 			            "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3199 			            2+depth * 2, "", PL_colors[4],
3200 			            (UV)state, (UV)ST.accepted );
3201 		    });
3202 
3203 		    if ( base ) {
3204 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3205 					     uscan, len, uvc, charid, foldlen,
3206 					     foldbuf, uniflags);
3207 
3208 			if (charid &&
3209 			     (base + charid > trie->uniquecharcount )
3210 			     && (base + charid - 1 - trie->uniquecharcount
3211 				    < trie->lasttrans)
3212 			     && trie->trans[base + charid - 1 -
3213 				    trie->uniquecharcount].check == state)
3214 			{
3215 			    state = trie->trans[base + charid - 1 -
3216 				trie->uniquecharcount ].next;
3217 			}
3218 			else {
3219 			    state = 0;
3220 			}
3221 			uc += len;
3222 
3223 		    }
3224 		    else {
3225 			state = 0;
3226 		    }
3227 		    DEBUG_TRIE_EXECUTE_r(
3228 		        PerlIO_printf( Perl_debug_log,
3229 		            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3230 		            charid, uvc, (UV)state, PL_colors[5] );
3231 		    );
3232 		}
3233 		if (!ST.accepted )
3234 		   sayNO;
3235 
3236 		DEBUG_EXECUTE_r(
3237 		    PerlIO_printf( Perl_debug_log,
3238 			"%*s  %sgot %"IVdf" possible matches%s\n",
3239 			REPORT_CODE_OFF + depth * 2, "",
3240 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3241 		);
3242 	    }}
3243             goto trie_first_try; /* jump into the fail handler */
3244 	    /* NOTREACHED */
3245 	case TRIE_next_fail: /* we failed - try next alterative */
3246             if ( ST.jump) {
3247                 REGCP_UNWIND(ST.cp);
3248 	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
3249 		    PL_regoffs[n].end = -1;
3250 	        *PL_reglastparen = n;
3251 	    }
3252           trie_first_try:
3253             if (do_cutgroup) {
3254                 do_cutgroup = 0;
3255                 no_final = 0;
3256             }
3257 
3258             if ( ST.jump) {
3259                 ST.lastparen = *PL_reglastparen;
3260 	        REGCP_SET(ST.cp);
3261             }
3262 	    if ( ST.accepted == 1 ) {
3263 		/* only one choice left - just continue */
3264 		DEBUG_EXECUTE_r({
3265 		    AV *const trie_words
3266 			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3267 		    SV ** const tmp = av_fetch( trie_words,
3268 		        ST.accept_buff[ 0 ].wordnum-1, 0 );
3269 		    SV *sv= tmp ? sv_newmortal() : NULL;
3270 
3271 		    PerlIO_printf( Perl_debug_log,
3272 			"%*s  %sonly one match left: #%d <%s>%s\n",
3273 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3274 			ST.accept_buff[ 0 ].wordnum,
3275 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3276 	                        PL_colors[0], PL_colors[1],
3277 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3278                             )
3279 			: "not compiled under -Dr",
3280 			PL_colors[5] );
3281 		});
3282 		PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3283 		/* in this case we free tmps/leave before we call regmatch
3284 		   as we wont be using accept_buff again. */
3285 
3286 		locinput = PL_reginput;
3287 		nextchr = UCHARAT(locinput);
3288     		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3289     		    scan = ST.B;
3290     		else
3291     		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3292 		if (!has_cutgroup) {
3293 		    FREETMPS;
3294 		    LEAVE;
3295                 } else {
3296                     ST.accepted--;
3297                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3298                 }
3299 
3300 		continue; /* execute rest of RE */
3301 	    }
3302 
3303 	    if ( !ST.accepted-- ) {
3304 	        DEBUG_EXECUTE_r({
3305 		    PerlIO_printf( Perl_debug_log,
3306 			"%*s  %sTRIE failed...%s\n",
3307 			REPORT_CODE_OFF+depth*2, "",
3308 			PL_colors[4],
3309 			PL_colors[5] );
3310 		});
3311 		FREETMPS;
3312 		LEAVE;
3313 		sayNO_SILENT;
3314 		/*NOTREACHED*/
3315 	    }
3316 
3317 	    /*
3318 	       There are at least two accepting states left.  Presumably
3319 	       the number of accepting states is going to be low,
3320 	       typically two. So we simply scan through to find the one
3321 	       with lowest wordnum.  Once we find it, we swap the last
3322 	       state into its place and decrement the size. We then try to
3323 	       match the rest of the pattern at the point where the word
3324 	       ends. If we succeed, control just continues along the
3325 	       regex; if we fail we return here to try the next accepting
3326 	       state
3327 	     */
3328 
3329 	    {
3330 		U32 best = 0;
3331 		U32 cur;
3332 		for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3333 		    DEBUG_TRIE_EXECUTE_r(
3334 			PerlIO_printf( Perl_debug_log,
3335 			    "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3336 			    REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3337 			    (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3338 			    ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3339 		    );
3340 
3341 		    if (ST.accept_buff[cur].wordnum <
3342 			    ST.accept_buff[best].wordnum)
3343 			best = cur;
3344 		}
3345 
3346 		DEBUG_EXECUTE_r({
3347 		    AV *const trie_words
3348 			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3349 		    SV ** const tmp = av_fetch( trie_words,
3350 		        ST.accept_buff[ best ].wordnum - 1, 0 );
3351 		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3352 		                    ST.B :
3353 		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];
3354 		    SV *sv= tmp ? sv_newmortal() : NULL;
3355 
3356 		    PerlIO_printf( Perl_debug_log,
3357 		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3358 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3359 			ST.accept_buff[best].wordnum,
3360 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3361 	                        PL_colors[0], PL_colors[1],
3362 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3363                             ) : "not compiled under -Dr",
3364 			    REG_NODE_NUM(nextop),
3365 			PL_colors[5] );
3366 		});
3367 
3368 		if ( best<ST.accepted ) {
3369 		    reg_trie_accepted tmp = ST.accept_buff[ best ];
3370 		    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3371 		    ST.accept_buff[ ST.accepted ] = tmp;
3372 		    best = ST.accepted;
3373 		}
3374 		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3375 		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3376 		    scan = ST.B;
3377 		} else {
3378 		    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3379                 }
3380                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3381                 /* NOTREACHED */
3382 	    }
3383 	    /* NOTREACHED */
3384         case TRIE_next:
3385 	    /* we dont want to throw this away, see bug 57042*/
3386 	    if (oreplsv != GvSV(PL_replgv))
3387 		sv_setsv(oreplsv, GvSV(PL_replgv));
3388             FREETMPS;
3389 	    LEAVE;
3390 	    sayYES;
3391 #undef  ST
3392 
3393 	case EXACT: {
3394 	    char *s = STRING(scan);
3395 	    ln = STR_LEN(scan);
3396 	    if (do_utf8 != UTF) {
3397 		/* The target and the pattern have differing utf8ness. */
3398 		char *l = locinput;
3399 		const char * const e = s + ln;
3400 
3401 		if (do_utf8) {
3402 		    /* The target is utf8, the pattern is not utf8. */
3403 		    while (s < e) {
3404 			STRLEN ulen;
3405 			if (l >= PL_regeol)
3406 			     sayNO;
3407 			if (NATIVE_TO_UNI(*(U8*)s) !=
3408 			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3409 					    uniflags))
3410 			     sayNO;
3411 			l += ulen;
3412 			s ++;
3413 		    }
3414 		}
3415 		else {
3416 		    /* The target is not utf8, the pattern is utf8. */
3417 		    while (s < e) {
3418 			STRLEN ulen;
3419 			if (l >= PL_regeol)
3420 			    sayNO;
3421 			if (NATIVE_TO_UNI(*((U8*)l)) !=
3422 			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3423 					   uniflags))
3424 			    sayNO;
3425 			s += ulen;
3426 			l ++;
3427 		    }
3428 		}
3429 		locinput = l;
3430 		nextchr = UCHARAT(locinput);
3431 		break;
3432 	    }
3433 	    /* The target and the pattern have the same utf8ness. */
3434 	    /* Inline the first character, for speed. */
3435 	    if (UCHARAT(s) != nextchr)
3436 		sayNO;
3437 	    if (PL_regeol - locinput < ln)
3438 		sayNO;
3439 	    if (ln > 1 && memNE(s, locinput, ln))
3440 		sayNO;
3441 	    locinput += ln;
3442 	    nextchr = UCHARAT(locinput);
3443 	    break;
3444 	    }
3445 	case EXACTFL:
3446 	    PL_reg_flags |= RF_tainted;
3447 	    /* FALL THROUGH */
3448 	case EXACTF: {
3449 	    char * const s = STRING(scan);
3450 	    ln = STR_LEN(scan);
3451 
3452 	    if (do_utf8 || UTF) {
3453 	      /* Either target or the pattern are utf8. */
3454 		const char * const l = locinput;
3455 		char *e = PL_regeol;
3456 
3457 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3458 			       l, &e, 0,  do_utf8)) {
3459 		     /* One more case for the sharp s:
3460 		      * pack("U0U*", 0xDF) =~ /ss/i,
3461 		      * the 0xC3 0x9F are the UTF-8
3462 		      * byte sequence for the U+00DF. */
3463 
3464 		     if (!(do_utf8 &&
3465 		           toLOWER(s[0]) == 's' &&
3466 			   ln >= 2 &&
3467 			   toLOWER(s[1]) == 's' &&
3468 			   (U8)l[0] == 0xC3 &&
3469 			   e - l >= 2 &&
3470 			   (U8)l[1] == 0x9F))
3471 			  sayNO;
3472 		}
3473 		locinput = e;
3474 		nextchr = UCHARAT(locinput);
3475 		break;
3476 	    }
3477 
3478 	    /* Neither the target and the pattern are utf8. */
3479 
3480 	    /* Inline the first character, for speed. */
3481 	    if (UCHARAT(s) != nextchr &&
3482 		UCHARAT(s) != ((OP(scan) == EXACTF)
3483 			       ? PL_fold : PL_fold_locale)[nextchr])
3484 		sayNO;
3485 	    if (PL_regeol - locinput < ln)
3486 		sayNO;
3487 	    if (ln > 1 && (OP(scan) == EXACTF
3488 			   ? ibcmp(s, locinput, ln)
3489 			   : ibcmp_locale(s, locinput, ln)))
3490 		sayNO;
3491 	    locinput += ln;
3492 	    nextchr = UCHARAT(locinput);
3493 	    break;
3494 	    }
3495 	case BOUNDL:
3496 	case NBOUNDL:
3497 	    PL_reg_flags |= RF_tainted;
3498 	    /* FALL THROUGH */
3499 	case BOUND:
3500 	case NBOUND:
3501 	    /* was last char in word? */
3502 	    if (do_utf8) {
3503 		if (locinput == PL_bostr)
3504 		    ln = '\n';
3505 		else {
3506 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3507 
3508 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3509 		}
3510 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3511 		    ln = isALNUM_uni(ln);
3512 		    LOAD_UTF8_CHARCLASS_ALNUM();
3513 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3514 		}
3515 		else {
3516 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3517 		    n = isALNUM_LC_utf8((U8*)locinput);
3518 		}
3519 	    }
3520 	    else {
3521 		ln = (locinput != PL_bostr) ?
3522 		    UCHARAT(locinput - 1) : '\n';
3523 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3524 		    ln = isALNUM(ln);
3525 		    n = isALNUM(nextchr);
3526 		}
3527 		else {
3528 		    ln = isALNUM_LC(ln);
3529 		    n = isALNUM_LC(nextchr);
3530 		}
3531 	    }
3532 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3533 				    OP(scan) == BOUNDL))
3534 		    sayNO;
3535 	    break;
3536 	case ANYOF:
3537 	    if (do_utf8) {
3538 	        STRLEN inclasslen = PL_regeol - locinput;
3539 
3540 	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3541 		    goto anyof_fail;
3542 		if (locinput >= PL_regeol)
3543 		    sayNO;
3544 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3545 		nextchr = UCHARAT(locinput);
3546 		break;
3547 	    }
3548 	    else {
3549 		if (nextchr < 0)
3550 		    nextchr = UCHARAT(locinput);
3551 		if (!REGINCLASS(rex, scan, (U8*)locinput))
3552 		    goto anyof_fail;
3553 		if (!nextchr && locinput >= PL_regeol)
3554 		    sayNO;
3555 		nextchr = UCHARAT(++locinput);
3556 		break;
3557 	    }
3558 	anyof_fail:
3559 	    /* If we might have the case of the German sharp s
3560 	     * in a casefolding Unicode character class. */
3561 
3562 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3563 		 locinput += SHARP_S_SKIP;
3564 		 nextchr = UCHARAT(locinput);
3565 	    }
3566 	    else
3567 		 sayNO;
3568 	    break;
3569 	/* Special char classes - The defines start on line 129 or so */
3570 	CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3571 	CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3572 
3573 	CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3574 	CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3575 
3576 	CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3577 	CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3578 
3579 	case CLUMP: /* Match \X: logical Unicode character.  This is defined as
3580 		       a Unicode extended Grapheme Cluster */
3581 	    /* From http://www.unicode.org/reports/tr29 (5.2 version).  An
3582 	      extended Grapheme Cluster is:
3583 
3584 	       CR LF
3585 	       | Prepend* Begin Extend*
3586 	       | .
3587 
3588 	       Begin is (Hangul-syllable | ! Control)
3589 	       Extend is (Grapheme_Extend | Spacing_Mark)
3590 	       Control is [ GCB_Control CR LF ]
3591 
3592 	       The discussion below shows how the code for CLUMP is derived
3593 	       from this regex.  Note that most of these concepts are from
3594 	       property values of the Grapheme Cluster Boundary (GCB) property.
3595 	       No code point can have multiple property values for a given
3596 	       property.  Thus a code point in Prepend can't be in Control, but
3597 	       it must be in !Control.  This is why Control above includes
3598 	       GCB_Control plus CR plus LF.  The latter two are used in the GCB
3599 	       property separately, and so can't be in GCB_Control, even though
3600 	       they logically are controls.  Control is not the same as gc=cc,
3601 	       but includes format and other characters as well.
3602 
3603 	       The Unicode definition of Hangul-syllable is:
3604 		   L+
3605 		   | (L* ( ( V | LV ) V* | LVT ) T*)
3606 		   | T+
3607 		  )
3608 	       Each of these is a value for the GCB property, and hence must be
3609 	       disjoint, so the order they are tested is immaterial, so the
3610 	       above can safely be changed to
3611 		   T+
3612 		   | L+
3613 		   | (L* ( LVT | ( V | LV ) V*) T*)
3614 
3615 	       The last two terms can be combined like this:
3616 		   L* ( L
3617 		        | (( LVT | ( V | LV ) V*) T*))
3618 
3619 	       And refactored into this:
3620 		   L* (L | LVT T* | V  V* T* | LV  V* T*)
3621 
3622 	       That means that if we have seen any L's at all we can quit
3623 	       there, but if the next character is a LVT, a V or and LV we
3624 	       should keep going.
3625 
3626 	       There is a subtlety with Prepend* which showed up in testing.
3627 	       Note that the Begin, and only the Begin is required in:
3628 	        | Prepend* Begin Extend*
3629 	       Also, Begin contains '! Control'.  A Prepend must be a '!
3630 	       Control', which means it must be a Begin.  What it comes down to
3631 	       is that if we match Prepend* and then find no suitable Begin
3632 	       afterwards, that if we backtrack the last Prepend, that one will
3633 	       be a suitable Begin.
3634 	    */
3635 
3636 	    if (locinput >= PL_regeol)
3637 		sayNO;
3638 	    if  (! do_utf8) {
3639 
3640 		/* Match either CR LF  or '.', as all the other possibilities
3641 		 * require utf8 */
3642 		locinput++;	    /* Match the . or CR */
3643 		if (nextchr == '\r'
3644 		    && locinput < PL_regeol
3645 		    && UCHARAT(locinput) == '\n') locinput++;
3646 	    }
3647 	    else {
3648 
3649 		/* Utf8: See if is ( CR LF ); already know that locinput <
3650 		 * PL_regeol, so locinput+1 is in bounds */
3651 		if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
3652 		    locinput += 2;
3653 		}
3654 		else {
3655 		    /* In case have to backtrack to beginning, then match '.' */
3656 		    char *starting = locinput;
3657 
3658 		    /* In case have to backtrack the last prepend */
3659 		    char *previous_prepend = 0;
3660 
3661 		    LOAD_UTF8_CHARCLASS_GCB();
3662 
3663 		    /* Match (prepend)* */
3664 		    while (locinput < PL_regeol
3665 			   && swash_fetch(PL_utf8_X_prepend,
3666 					  (U8*)locinput, do_utf8))
3667 		    {
3668 			previous_prepend = locinput;
3669 			locinput += UTF8SKIP(locinput);
3670 		    }
3671 
3672 		    /* As noted above, if we matched a prepend character, but
3673 		     * the next thing won't match, back off the last prepend we
3674 		     * matched, as it is guaranteed to match the begin */
3675 		    if (previous_prepend
3676 			&& (locinput >=  PL_regeol
3677 			    || ! swash_fetch(PL_utf8_X_begin,
3678 					     (U8*)locinput, do_utf8)))
3679 		    {
3680 			locinput = previous_prepend;
3681 		    }
3682 
3683 		    /* Note that here we know PL_regeol > locinput, as we
3684 		     * tested that upon input to this switch case, and if we
3685 		     * moved locinput forward, we tested the result just above
3686 		     * and it either passed, or we backed off so that it will
3687 		     * now pass */
3688 		    if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, do_utf8)) {
3689 
3690 			/* Here did not match the required 'Begin' in the
3691 			 * second term.  So just match the very first
3692 			 * character, the '.' of the final term of the regex */
3693 			locinput = starting + UTF8SKIP(starting);
3694 		    } else {
3695 
3696 			/* Here is the beginning of a character that can have
3697 			 * an extender.  It is either a hangul syllable, or a
3698 			 * non-control */
3699 			if (swash_fetch(PL_utf8_X_non_hangul,
3700 					(U8*)locinput, do_utf8))
3701 			{
3702 
3703 			    /* Here not a Hangul syllable, must be a
3704 			     * ('!  * Control') */
3705 			    locinput += UTF8SKIP(locinput);
3706 			} else {
3707 
3708 			    /* Here is a Hangul syllable.  It can be composed
3709 			     * of several individual characters.  One
3710 			     * possibility is T+ */
3711 			    if (swash_fetch(PL_utf8_X_T,
3712 					    (U8*)locinput, do_utf8))
3713 			    {
3714 				while (locinput < PL_regeol
3715 					&& swash_fetch(PL_utf8_X_T,
3716 							(U8*)locinput, do_utf8))
3717 				{
3718 				    locinput += UTF8SKIP(locinput);
3719 				}
3720 			    } else {
3721 
3722 				/* Here, not T+, but is a Hangul.  That means
3723 				 * it is one of the others: L, LV, LVT or V,
3724 				 * and matches:
3725 				 * L* (L | LVT T* | V  V* T* | LV  V* T*) */
3726 
3727 				/* Match L*           */
3728 				while (locinput < PL_regeol
3729 					&& swash_fetch(PL_utf8_X_L,
3730 							(U8*)locinput, do_utf8))
3731 				{
3732 				    locinput += UTF8SKIP(locinput);
3733 				}
3734 
3735 				/* Here, have exhausted L*.  If the next
3736 				 * character is not an LV, LVT nor V, it means
3737 				 * we had to have at least one L, so matches L+
3738 				 * in the original equation, we have a complete
3739 				 * hangul syllable.  Are done. */
3740 
3741 				if (locinput < PL_regeol
3742 				    && swash_fetch(PL_utf8_X_LV_LVT_V,
3743 						    (U8*)locinput, do_utf8))
3744 				{
3745 
3746 				    /* Otherwise keep going.  Must be LV, LVT
3747 				     * or V.  See if LVT */
3748 				    if (swash_fetch(PL_utf8_X_LVT,
3749 						    (U8*)locinput, do_utf8))
3750 				    {
3751 					locinput += UTF8SKIP(locinput);
3752 				    } else {
3753 
3754 					/* Must be  V or LV.  Take it, then
3755 					 * match V*     */
3756 					locinput += UTF8SKIP(locinput);
3757 					while (locinput < PL_regeol
3758 						&& swash_fetch(PL_utf8_X_V,
3759 							 (U8*)locinput, do_utf8))
3760 					{
3761 					    locinput += UTF8SKIP(locinput);
3762 					}
3763 				    }
3764 
3765 				    /* And any of LV, LVT, or V can be followed
3766 				     * by T*            */
3767 				    while (locinput < PL_regeol
3768 					   && swash_fetch(PL_utf8_X_T,
3769 							   (U8*)locinput,
3770 							   do_utf8))
3771 				    {
3772 					locinput += UTF8SKIP(locinput);
3773 				    }
3774 				}
3775 			    }
3776 			}
3777 
3778 			/* Match any extender */
3779 			while (locinput < PL_regeol
3780 				&& swash_fetch(PL_utf8_X_extend,
3781 						(U8*)locinput, do_utf8))
3782 			{
3783 			    locinput += UTF8SKIP(locinput);
3784 			}
3785 		    }
3786 		}
3787 		if (locinput > PL_regeol) sayNO;
3788 	    }
3789 	    nextchr = UCHARAT(locinput);
3790 	    break;
3791 
3792 	case NREFFL:
3793 	{
3794 	    char *s;
3795 	    char type;
3796 	    PL_reg_flags |= RF_tainted;
3797 	    /* FALL THROUGH */
3798 	case NREF:
3799 	case NREFF:
3800 	    type = OP(scan);
3801 	    n = reg_check_named_buff_matched(rex,scan);
3802 
3803             if ( n ) {
3804                 type = REF + ( type - NREF );
3805                 goto do_ref;
3806             } else {
3807                 sayNO;
3808             }
3809             /* unreached */
3810 	case REFFL:
3811 	    PL_reg_flags |= RF_tainted;
3812 	    /* FALL THROUGH */
3813         case REF:
3814 	case REFF:
3815 	    n = ARG(scan);  /* which paren pair */
3816 	    type = OP(scan);
3817 	  do_ref:
3818 	    ln = PL_regoffs[n].start;
3819 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3820 	    if (*PL_reglastparen < n || ln == -1)
3821 		sayNO;			/* Do not match unless seen CLOSEn. */
3822 	    if (ln == PL_regoffs[n].end)
3823 		break;
3824 
3825 	    s = PL_bostr + ln;
3826 	    if (do_utf8 && type != REF) {	/* REF can do byte comparison */
3827 		char *l = locinput;
3828 		const char *e = PL_bostr + PL_regoffs[n].end;
3829 		/*
3830 		 * Note that we can't do the "other character" lookup trick as
3831 		 * in the 8-bit case (no pun intended) because in Unicode we
3832 		 * have to map both upper and title case to lower case.
3833 		 */
3834 		if (type == REFF) {
3835 		    while (s < e) {
3836 			STRLEN ulen1, ulen2;
3837 			U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3838 			U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3839 
3840 			if (l >= PL_regeol)
3841 			    sayNO;
3842 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3843 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3844 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3845 			    sayNO;
3846 			s += ulen1;
3847 			l += ulen2;
3848 		    }
3849 		}
3850 		locinput = l;
3851 		nextchr = UCHARAT(locinput);
3852 		break;
3853 	    }
3854 
3855 	    /* Inline the first character, for speed. */
3856 	    if (UCHARAT(s) != nextchr &&
3857 		(type == REF ||
3858 		 (UCHARAT(s) != (type == REFF
3859 				  ? PL_fold : PL_fold_locale)[nextchr])))
3860 		sayNO;
3861 	    ln = PL_regoffs[n].end - ln;
3862 	    if (locinput + ln > PL_regeol)
3863 		sayNO;
3864 	    if (ln > 1 && (type == REF
3865 			   ? memNE(s, locinput, ln)
3866 			   : (type == REFF
3867 			      ? ibcmp(s, locinput, ln)
3868 			      : ibcmp_locale(s, locinput, ln))))
3869 		sayNO;
3870 	    locinput += ln;
3871 	    nextchr = UCHARAT(locinput);
3872 	    break;
3873 	}
3874 	case NOTHING:
3875 	case TAIL:
3876 	    break;
3877 	case BACK:
3878 	    break;
3879 
3880 #undef  ST
3881 #define ST st->u.eval
3882 	{
3883 	    SV *ret;
3884 	    REGEXP *re_sv;
3885             regexp *re;
3886             regexp_internal *rei;
3887             regnode *startpoint;
3888 
3889 	case GOSTART:
3890 	case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3891 	    if (cur_eval && cur_eval->locinput==locinput) {
3892                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3893                     Perl_croak(aTHX_ "Infinite recursion in regex");
3894                 if ( ++nochange_depth > max_nochange_depth )
3895                     Perl_croak(aTHX_
3896                         "Pattern subroutine nesting without pos change"
3897                         " exceeded limit in regex");
3898             } else {
3899                 nochange_depth = 0;
3900             }
3901 	    re_sv = rex_sv;
3902             re = rex;
3903             rei = rexi;
3904             (void)ReREFCNT_inc(rex_sv);
3905             if (OP(scan)==GOSUB) {
3906                 startpoint = scan + ARG2L(scan);
3907                 ST.close_paren = ARG(scan);
3908             } else {
3909                 startpoint = rei->program+1;
3910                 ST.close_paren = 0;
3911             }
3912             goto eval_recurse_doit;
3913             /* NOTREACHED */
3914         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3915             if (cur_eval && cur_eval->locinput==locinput) {
3916 		if ( ++nochange_depth > max_nochange_depth )
3917                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3918             } else {
3919                 nochange_depth = 0;
3920             }
3921 	    {
3922 		/* execute the code in the {...} */
3923 		dSP;
3924 		SV ** const before = SP;
3925 		OP_4tree * const oop = PL_op;
3926 		COP * const ocurcop = PL_curcop;
3927 		PAD *old_comppad;
3928 		char *saved_regeol = PL_regeol;
3929 
3930 		n = ARG(scan);
3931 		PL_op = (OP_4tree*)rexi->data->data[n];
3932 		DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3933 		    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3934 		PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3935 		PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3936 
3937                 if (sv_yes_mark) {
3938                     SV *sv_mrk = get_sv("REGMARK", 1);
3939                     sv_setsv(sv_mrk, sv_yes_mark);
3940                 }
3941 
3942 		CALLRUNOPS(aTHX);			/* Scalar context. */
3943 		SPAGAIN;
3944 		if (SP == before)
3945 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3946 		else {
3947 		    ret = POPs;
3948 		    PUTBACK;
3949 		}
3950 
3951 		PL_op = oop;
3952 		PAD_RESTORE_LOCAL(old_comppad);
3953 		PL_curcop = ocurcop;
3954 		PL_regeol = saved_regeol;
3955 		if (!logical) {
3956 		    /* /(?{...})/ */
3957 		    sv_setsv(save_scalar(PL_replgv), ret);
3958 		    break;
3959 		}
3960 	    }
3961 	    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3962 		logical = 0;
3963 		{
3964 		    /* extract RE object from returned value; compiling if
3965 		     * necessary */
3966 		    MAGIC *mg = NULL;
3967 		    REGEXP *rx = NULL;
3968 
3969 		    if (SvROK(ret)) {
3970 			SV *const sv = SvRV(ret);
3971 
3972 			if (SvTYPE(sv) == SVt_REGEXP) {
3973 			    rx = (REGEXP*) sv;
3974 			} else if (SvSMAGICAL(sv)) {
3975 			    mg = mg_find(sv, PERL_MAGIC_qr);
3976 			    assert(mg);
3977 			}
3978 		    } else if (SvTYPE(ret) == SVt_REGEXP) {
3979 			rx = (REGEXP*) ret;
3980 		    } else if (SvSMAGICAL(ret)) {
3981 			if (SvGMAGICAL(ret)) {
3982 			    /* I don't believe that there is ever qr magic
3983 			       here.  */
3984 			    assert(!mg_find(ret, PERL_MAGIC_qr));
3985 			    sv_unmagic(ret, PERL_MAGIC_qr);
3986 			}
3987 			else {
3988 			    mg = mg_find(ret, PERL_MAGIC_qr);
3989 			    /* testing suggests mg only ends up non-NULL for
3990 			       scalars who were upgraded and compiled in the
3991 			       else block below. In turn, this is only
3992 			       triggered in the "postponed utf8 string" tests
3993 			       in t/op/pat.t  */
3994 			}
3995 		    }
3996 
3997 		    if (mg) {
3998 			rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3999 			assert(rx);
4000 		    }
4001 		    if (rx) {
4002 			rx = reg_temp_copy(NULL, rx);
4003 		    }
4004 		    else {
4005 			U32 pm_flags = 0;
4006 			const I32 osize = PL_regsize;
4007 
4008 			if (DO_UTF8(ret)) {
4009 			    assert (SvUTF8(ret));
4010 			} else if (SvUTF8(ret)) {
4011 			    /* Not doing UTF-8, despite what the SV says. Is
4012 			       this only if we're trapped in use 'bytes'?  */
4013 			    /* Make a copy of the octet sequence, but without
4014 			       the flag on, as the compiler now honours the
4015 			       SvUTF8 flag on ret.  */
4016 			    STRLEN len;
4017 			    const char *const p = SvPV(ret, len);
4018 			    ret = newSVpvn_flags(p, len, SVs_TEMP);
4019 			}
4020 			rx = CALLREGCOMP(ret, pm_flags);
4021 			if (!(SvFLAGS(ret)
4022 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
4023 				 | SVs_GMG))) {
4024 			    /* This isn't a first class regexp. Instead, it's
4025 			       caching a regexp onto an existing, Perl visible
4026 			       scalar.  */
4027 			    sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
4028 			}
4029 			PL_regsize = osize;
4030 		    }
4031 		    re_sv = rx;
4032 		    re = (struct regexp *)SvANY(rx);
4033 		}
4034                 RXp_MATCH_COPIED_off(re);
4035                 re->subbeg = rex->subbeg;
4036                 re->sublen = rex->sublen;
4037 		rei = RXi_GET(re);
4038                 DEBUG_EXECUTE_r(
4039                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
4040                         "Matching embedded");
4041 		);
4042 		startpoint = rei->program + 1;
4043                	ST.close_paren = 0; /* only used for GOSUB */
4044                	/* borrowed from regtry */
4045                 if (PL_reg_start_tmpl <= re->nparens) {
4046                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
4047                     if(PL_reg_start_tmp)
4048                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4049                     else
4050                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
4051                 }
4052 
4053         eval_recurse_doit: /* Share code with GOSUB below this line */
4054 		/* run the pattern returned from (??{...}) */
4055 		ST.cp = regcppush(0);	/* Save *all* the positions. */
4056 		REGCP_SET(ST.lastcp);
4057 
4058 		PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
4059 
4060 		/* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
4061 		PL_reglastparen = &re->lastparen;
4062 		PL_reglastcloseparen = &re->lastcloseparen;
4063 		re->lastparen = 0;
4064 		re->lastcloseparen = 0;
4065 
4066 		PL_reginput = locinput;
4067 		PL_regsize = 0;
4068 
4069 		/* XXXX This is too dramatic a measure... */
4070 		PL_reg_maxiter = 0;
4071 
4072 		ST.toggle_reg_flags = PL_reg_flags;
4073 		if (RX_UTF8(re_sv))
4074 		    PL_reg_flags |= RF_utf8;
4075 		else
4076 		    PL_reg_flags &= ~RF_utf8;
4077 		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
4078 
4079 		ST.prev_rex = rex_sv;
4080 		ST.prev_curlyx = cur_curlyx;
4081 		SETREX(rex_sv,re_sv);
4082 		rex = re;
4083 		rexi = rei;
4084 		cur_curlyx = NULL;
4085 		ST.B = next;
4086 		ST.prev_eval = cur_eval;
4087 		cur_eval = st;
4088 		/* now continue from first node in postoned RE */
4089 		PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
4090 		/* NOTREACHED */
4091 	    }
4092 	    /* logical is 1,   /(?(?{...})X|Y)/ */
4093 	    sw = (bool)SvTRUE(ret);
4094 	    logical = 0;
4095 	    break;
4096 	}
4097 
4098 	case EVAL_AB: /* cleanup after a successful (??{A})B */
4099 	    /* note: this is called twice; first after popping B, then A */
4100 	    PL_reg_flags ^= ST.toggle_reg_flags;
4101 	    ReREFCNT_dec(rex_sv);
4102 	    SETREX(rex_sv,ST.prev_rex);
4103 	    rex = (struct regexp *)SvANY(rex_sv);
4104 	    rexi = RXi_GET(rex);
4105 	    regcpblow(ST.cp);
4106 	    cur_eval = ST.prev_eval;
4107 	    cur_curlyx = ST.prev_curlyx;
4108 
4109 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4110 	    PL_reglastparen = &rex->lastparen;
4111 	    PL_reglastcloseparen = &rex->lastcloseparen;
4112 	    /* also update PL_regoffs */
4113 	    PL_regoffs = rex->offs;
4114 
4115 	    /* XXXX This is too dramatic a measure... */
4116 	    PL_reg_maxiter = 0;
4117             if ( nochange_depth )
4118 	        nochange_depth--;
4119 	    sayYES;
4120 
4121 
4122 	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
4123 	    /* note: this is called twice; first after popping B, then A */
4124 	    PL_reg_flags ^= ST.toggle_reg_flags;
4125 	    ReREFCNT_dec(rex_sv);
4126 	    SETREX(rex_sv,ST.prev_rex);
4127 	    rex = (struct regexp *)SvANY(rex_sv);
4128 	    rexi = RXi_GET(rex);
4129 	    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4130 	    PL_reglastparen = &rex->lastparen;
4131 	    PL_reglastcloseparen = &rex->lastcloseparen;
4132 
4133 	    PL_reginput = locinput;
4134 	    REGCP_UNWIND(ST.lastcp);
4135 	    regcppop(rex);
4136 	    cur_eval = ST.prev_eval;
4137 	    cur_curlyx = ST.prev_curlyx;
4138 	    /* XXXX This is too dramatic a measure... */
4139 	    PL_reg_maxiter = 0;
4140 	    if ( nochange_depth )
4141 	        nochange_depth--;
4142 	    sayNO_SILENT;
4143 #undef ST
4144 
4145 	case OPEN:
4146 	    n = ARG(scan);  /* which paren pair */
4147 	    PL_reg_start_tmp[n] = locinput;
4148 	    if (n > PL_regsize)
4149 		PL_regsize = n;
4150             lastopen = n;
4151 	    break;
4152 	case CLOSE:
4153 	    n = ARG(scan);  /* which paren pair */
4154 	    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
4155 	    PL_regoffs[n].end = locinput - PL_bostr;
4156 	    /*if (n > PL_regsize)
4157 		PL_regsize = n;*/
4158 	    if (n > *PL_reglastparen)
4159 		*PL_reglastparen = n;
4160 	    *PL_reglastcloseparen = n;
4161             if (cur_eval && cur_eval->u.eval.close_paren == n) {
4162 	        goto fake_end;
4163 	    }
4164 	    break;
4165         case ACCEPT:
4166             if (ARG(scan)){
4167                 regnode *cursor;
4168                 for (cursor=scan;
4169                      cursor && OP(cursor)!=END;
4170                      cursor=regnext(cursor))
4171                 {
4172                     if ( OP(cursor)==CLOSE ){
4173                         n = ARG(cursor);
4174                         if ( n <= lastopen ) {
4175                             PL_regoffs[n].start
4176 				= PL_reg_start_tmp[n] - PL_bostr;
4177                             PL_regoffs[n].end = locinput - PL_bostr;
4178                             /*if (n > PL_regsize)
4179                             PL_regsize = n;*/
4180                             if (n > *PL_reglastparen)
4181                                 *PL_reglastparen = n;
4182                             *PL_reglastcloseparen = n;
4183                             if ( n == ARG(scan) || (cur_eval &&
4184                                 cur_eval->u.eval.close_paren == n))
4185                                 break;
4186                         }
4187                     }
4188                 }
4189             }
4190 	    goto fake_end;
4191 	    /*NOTREACHED*/
4192 	case GROUPP:
4193 	    n = ARG(scan);  /* which paren pair */
4194 	    sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
4195 	    break;
4196 	case NGROUPP:
4197 	    /* reg_check_named_buff_matched returns 0 for no match */
4198 	    sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
4199 	    break;
4200         case INSUBP:
4201             n = ARG(scan);
4202             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
4203             break;
4204         case DEFINEP:
4205             sw = 0;
4206             break;
4207 	case IFTHEN:
4208 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
4209 	    if (sw)
4210 		next = NEXTOPER(NEXTOPER(scan));
4211 	    else {
4212 		next = scan + ARG(scan);
4213 		if (OP(next) == IFTHEN) /* Fake one. */
4214 		    next = NEXTOPER(NEXTOPER(next));
4215 	    }
4216 	    break;
4217 	case LOGICAL:
4218 	    logical = scan->flags;
4219 	    break;
4220 
4221 /*******************************************************************
4222 
4223 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
4224 pattern, where A and B are subpatterns. (For simple A, CURLYM or
4225 STAR/PLUS/CURLY/CURLYN are used instead.)
4226 
4227 A*B is compiled as <CURLYX><A><WHILEM><B>
4228 
4229 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
4230 state, which contains the current count, initialised to -1. It also sets
4231 cur_curlyx to point to this state, with any previous value saved in the
4232 state block.
4233 
4234 CURLYX then jumps straight to the WHILEM op, rather than executing A,
4235 since the pattern may possibly match zero times (i.e. it's a while {} loop
4236 rather than a do {} while loop).
4237 
4238 Each entry to WHILEM represents a successful match of A. The count in the
4239 CURLYX block is incremented, another WHILEM state is pushed, and execution
4240 passes to A or B depending on greediness and the current count.
4241 
4242 For example, if matching against the string a1a2a3b (where the aN are
4243 substrings that match /A/), then the match progresses as follows: (the
4244 pushed states are interspersed with the bits of strings matched so far):
4245 
4246     <CURLYX cnt=-1>
4247     <CURLYX cnt=0><WHILEM>
4248     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4249     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4250     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4251     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4252 
4253 (Contrast this with something like CURLYM, which maintains only a single
4254 backtrack state:
4255 
4256     <CURLYM cnt=0> a1
4257     a1 <CURLYM cnt=1> a2
4258     a1 a2 <CURLYM cnt=2> a3
4259     a1 a2 a3 <CURLYM cnt=3> b
4260 )
4261 
4262 Each WHILEM state block marks a point to backtrack to upon partial failure
4263 of A or B, and also contains some minor state data related to that
4264 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4265 overall state, such as the count, and pointers to the A and B ops.
4266 
4267 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4268 must always point to the *current* CURLYX block, the rules are:
4269 
4270 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4271 and set cur_curlyx to point the new block.
4272 
4273 When popping the CURLYX block after a successful or unsuccessful match,
4274 restore the previous cur_curlyx.
4275 
4276 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4277 to the outer one saved in the CURLYX block.
4278 
4279 When popping the WHILEM block after a successful or unsuccessful B match,
4280 restore the previous cur_curlyx.
4281 
4282 Here's an example for the pattern (AI* BI)*BO
4283 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4284 
4285 cur_
4286 curlyx backtrack stack
4287 ------ ---------------
4288 NULL
4289 CO     <CO prev=NULL> <WO>
4290 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4291 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4292 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4293 
4294 At this point the pattern succeeds, and we work back down the stack to
4295 clean up, restoring as we go:
4296 
4297 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4298 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4299 CO     <CO prev=NULL> <WO>
4300 NULL
4301 
4302 *******************************************************************/
4303 
4304 #define ST st->u.curlyx
4305 
4306 	case CURLYX:    /* start of /A*B/  (for complex A) */
4307 	{
4308 	    /* No need to save/restore up to this paren */
4309 	    I32 parenfloor = scan->flags;
4310 
4311 	    assert(next); /* keep Coverity happy */
4312 	    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4313 		next += ARG(next);
4314 
4315 	    /* XXXX Probably it is better to teach regpush to support
4316 	       parenfloor > PL_regsize... */
4317 	    if (parenfloor > (I32)*PL_reglastparen)
4318 		parenfloor = *PL_reglastparen; /* Pessimization... */
4319 
4320 	    ST.prev_curlyx= cur_curlyx;
4321 	    cur_curlyx = st;
4322 	    ST.cp = PL_savestack_ix;
4323 
4324 	    /* these fields contain the state of the current curly.
4325 	     * they are accessed by subsequent WHILEMs */
4326 	    ST.parenfloor = parenfloor;
4327 	    ST.min = ARG1(scan);
4328 	    ST.max = ARG2(scan);
4329 	    ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4330 	    ST.B = next;
4331 	    ST.minmod = minmod;
4332 	    minmod = 0;
4333 	    ST.count = -1;	/* this will be updated by WHILEM */
4334 	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4335 
4336 	    PL_reginput = locinput;
4337 	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4338 	    /* NOTREACHED */
4339 	}
4340 
4341 	case CURLYX_end: /* just finished matching all of A*B */
4342 	    cur_curlyx = ST.prev_curlyx;
4343 	    sayYES;
4344 	    /* NOTREACHED */
4345 
4346 	case CURLYX_end_fail: /* just failed to match all of A*B */
4347 	    regcpblow(ST.cp);
4348 	    cur_curlyx = ST.prev_curlyx;
4349 	    sayNO;
4350 	    /* NOTREACHED */
4351 
4352 
4353 #undef ST
4354 #define ST st->u.whilem
4355 
4356 	case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4357 	{
4358 	    /* see the discussion above about CURLYX/WHILEM */
4359 	    I32 n;
4360 	    assert(cur_curlyx); /* keep Coverity happy */
4361 	    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4362 	    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4363 	    ST.cache_offset = 0;
4364 	    ST.cache_mask = 0;
4365 
4366 	    PL_reginput = locinput;
4367 
4368 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4369 		  "%*s  whilem: matched %ld out of %ld..%ld\n",
4370 		  REPORT_CODE_OFF+depth*2, "", (long)n,
4371 		  (long)cur_curlyx->u.curlyx.min,
4372 		  (long)cur_curlyx->u.curlyx.max)
4373 	    );
4374 
4375 	    /* First just match a string of min A's. */
4376 
4377 	    if (n < cur_curlyx->u.curlyx.min) {
4378 		cur_curlyx->u.curlyx.lastloc = locinput;
4379 		PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4380 		/* NOTREACHED */
4381 	    }
4382 
4383 	    /* If degenerate A matches "", assume A done. */
4384 
4385 	    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4386 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4387 		   "%*s  whilem: empty match detected, trying continuation...\n",
4388 		   REPORT_CODE_OFF+depth*2, "")
4389 		);
4390 		goto do_whilem_B_max;
4391 	    }
4392 
4393 	    /* super-linear cache processing */
4394 
4395 	    if (scan->flags) {
4396 
4397 		if (!PL_reg_maxiter) {
4398 		    /* start the countdown: Postpone detection until we
4399 		     * know the match is not *that* much linear. */
4400 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4401 		    /* possible overflow for long strings and many CURLYX's */
4402 		    if (PL_reg_maxiter < 0)
4403 			PL_reg_maxiter = I32_MAX;
4404 		    PL_reg_leftiter = PL_reg_maxiter;
4405 		}
4406 
4407 		if (PL_reg_leftiter-- == 0) {
4408 		    /* initialise cache */
4409 		    const I32 size = (PL_reg_maxiter + 7)/8;
4410 		    if (PL_reg_poscache) {
4411 			if ((I32)PL_reg_poscache_size < size) {
4412 			    Renew(PL_reg_poscache, size, char);
4413 			    PL_reg_poscache_size = size;
4414 			}
4415 			Zero(PL_reg_poscache, size, char);
4416 		    }
4417 		    else {
4418 			PL_reg_poscache_size = size;
4419 			Newxz(PL_reg_poscache, size, char);
4420 		    }
4421 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4422       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4423 			      PL_colors[4], PL_colors[5])
4424 		    );
4425 		}
4426 
4427 		if (PL_reg_leftiter < 0) {
4428 		    /* have we already failed at this position? */
4429 		    I32 offset, mask;
4430 		    offset  = (scan->flags & 0xf) - 1
4431 		  		+ (locinput - PL_bostr)  * (scan->flags>>4);
4432 		    mask    = 1 << (offset % 8);
4433 		    offset /= 8;
4434 		    if (PL_reg_poscache[offset] & mask) {
4435 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4436 			    "%*s  whilem: (cache) already tried at this position...\n",
4437 			    REPORT_CODE_OFF+depth*2, "")
4438 			);
4439 			sayNO; /* cache records failure */
4440 		    }
4441 		    ST.cache_offset = offset;
4442 		    ST.cache_mask   = mask;
4443 		}
4444 	    }
4445 
4446 	    /* Prefer B over A for minimal matching. */
4447 
4448 	    if (cur_curlyx->u.curlyx.minmod) {
4449 		ST.save_curlyx = cur_curlyx;
4450 		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4451 		ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4452 		REGCP_SET(ST.lastcp);
4453 		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4454 		/* NOTREACHED */
4455 	    }
4456 
4457 	    /* Prefer A over B for maximal matching. */
4458 
4459 	    if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4460 		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4461 		cur_curlyx->u.curlyx.lastloc = locinput;
4462 		REGCP_SET(ST.lastcp);
4463 		PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4464 		/* NOTREACHED */
4465 	    }
4466 	    goto do_whilem_B_max;
4467 	}
4468 	/* NOTREACHED */
4469 
4470 	case WHILEM_B_min: /* just matched B in a minimal match */
4471 	case WHILEM_B_max: /* just matched B in a maximal match */
4472 	    cur_curlyx = ST.save_curlyx;
4473 	    sayYES;
4474 	    /* NOTREACHED */
4475 
4476 	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4477 	    cur_curlyx = ST.save_curlyx;
4478 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4479 	    cur_curlyx->u.curlyx.count--;
4480 	    CACHEsayNO;
4481 	    /* NOTREACHED */
4482 
4483 	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4484 	    REGCP_UNWIND(ST.lastcp);
4485 	    regcppop(rex);
4486 	    /* FALL THROUGH */
4487 	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4488 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4489 	    cur_curlyx->u.curlyx.count--;
4490 	    CACHEsayNO;
4491 	    /* NOTREACHED */
4492 
4493 	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4494 	    REGCP_UNWIND(ST.lastcp);
4495 	    regcppop(rex);	/* Restore some previous $<digit>s? */
4496 	    PL_reginput = locinput;
4497 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4498 		"%*s  whilem: failed, trying continuation...\n",
4499 		REPORT_CODE_OFF+depth*2, "")
4500 	    );
4501 	  do_whilem_B_max:
4502 	    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4503 		&& ckWARN(WARN_REGEXP)
4504 		&& !(PL_reg_flags & RF_warned))
4505 	    {
4506 		PL_reg_flags |= RF_warned;
4507 		Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4508 		     "Complex regular subexpression recursion",
4509 		     REG_INFTY - 1);
4510 	    }
4511 
4512 	    /* now try B */
4513 	    ST.save_curlyx = cur_curlyx;
4514 	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4515 	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4516 	    /* NOTREACHED */
4517 
4518 	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4519 	    cur_curlyx = ST.save_curlyx;
4520 	    REGCP_UNWIND(ST.lastcp);
4521 	    regcppop(rex);
4522 
4523 	    if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4524 		/* Maximum greed exceeded */
4525 		if (cur_curlyx->u.curlyx.count >= REG_INFTY
4526 		    && ckWARN(WARN_REGEXP)
4527 		    && !(PL_reg_flags & RF_warned))
4528 		{
4529 		    PL_reg_flags |= RF_warned;
4530 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4531 			"%s limit (%d) exceeded",
4532 			"Complex regular subexpression recursion",
4533 			REG_INFTY - 1);
4534 		}
4535 		cur_curlyx->u.curlyx.count--;
4536 		CACHEsayNO;
4537 	    }
4538 
4539 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4540 		"%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4541 	    );
4542 	    /* Try grabbing another A and see if it helps. */
4543 	    PL_reginput = locinput;
4544 	    cur_curlyx->u.curlyx.lastloc = locinput;
4545 	    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4546 	    REGCP_SET(ST.lastcp);
4547 	    PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4548 	    /* NOTREACHED */
4549 
4550 #undef  ST
4551 #define ST st->u.branch
4552 
4553 	case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
4554 	    next = scan + ARG(scan);
4555 	    if (next == scan)
4556 		next = NULL;
4557 	    scan = NEXTOPER(scan);
4558 	    /* FALL THROUGH */
4559 
4560 	case BRANCH:	    /*  /(...|A|...)/ */
4561 	    scan = NEXTOPER(scan); /* scan now points to inner node */
4562 	    ST.lastparen = *PL_reglastparen;
4563 	    ST.next_branch = next;
4564 	    REGCP_SET(ST.cp);
4565 	    PL_reginput = locinput;
4566 
4567 	    /* Now go into the branch */
4568 	    if (has_cutgroup) {
4569 	        PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4570 	    } else {
4571 	        PUSH_STATE_GOTO(BRANCH_next, scan);
4572 	    }
4573 	    /* NOTREACHED */
4574         case CUTGROUP:
4575             PL_reginput = locinput;
4576             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4577                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4578             PUSH_STATE_GOTO(CUTGROUP_next,next);
4579             /* NOTREACHED */
4580         case CUTGROUP_next_fail:
4581             do_cutgroup = 1;
4582             no_final = 1;
4583             if (st->u.mark.mark_name)
4584                 sv_commit = st->u.mark.mark_name;
4585             sayNO;
4586             /* NOTREACHED */
4587         case BRANCH_next:
4588             sayYES;
4589             /* NOTREACHED */
4590 	case BRANCH_next_fail: /* that branch failed; try the next, if any */
4591 	    if (do_cutgroup) {
4592 	        do_cutgroup = 0;
4593 	        no_final = 0;
4594 	    }
4595 	    REGCP_UNWIND(ST.cp);
4596 	    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4597 		PL_regoffs[n].end = -1;
4598 	    *PL_reglastparen = n;
4599 	    /*dmq: *PL_reglastcloseparen = n; */
4600 	    scan = ST.next_branch;
4601 	    /* no more branches? */
4602 	    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4603 	        DEBUG_EXECUTE_r({
4604 		    PerlIO_printf( Perl_debug_log,
4605 			"%*s  %sBRANCH failed...%s\n",
4606 			REPORT_CODE_OFF+depth*2, "",
4607 			PL_colors[4],
4608 			PL_colors[5] );
4609 		});
4610 		sayNO_SILENT;
4611             }
4612 	    continue; /* execute next BRANCH[J] op */
4613 	    /* NOTREACHED */
4614 
4615 	case MINMOD:
4616 	    minmod = 1;
4617 	    break;
4618 
4619 #undef  ST
4620 #define ST st->u.curlym
4621 
4622 	case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
4623 
4624 	    /* This is an optimisation of CURLYX that enables us to push
4625 	     * only a single backtracking state, no matter how many matches
4626 	     * there are in {m,n}. It relies on the pattern being constant
4627 	     * length, with no parens to influence future backrefs
4628 	     */
4629 
4630 	    ST.me = scan;
4631 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4632 
4633 	    /* if paren positive, emulate an OPEN/CLOSE around A */
4634 	    if (ST.me->flags) {
4635 		U32 paren = ST.me->flags;
4636 		if (paren > PL_regsize)
4637 		    PL_regsize = paren;
4638 		if (paren > *PL_reglastparen)
4639 		    *PL_reglastparen = paren;
4640 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
4641 	    }
4642 	    ST.A = scan;
4643 	    ST.B = next;
4644 	    ST.alen = 0;
4645 	    ST.count = 0;
4646 	    ST.minmod = minmod;
4647 	    minmod = 0;
4648 	    ST.c1 = CHRTEST_UNINIT;
4649 	    REGCP_SET(ST.cp);
4650 
4651 	    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4652 		goto curlym_do_B;
4653 
4654 	  curlym_do_A: /* execute the A in /A{m,n}B/  */
4655 	    PL_reginput = locinput;
4656 	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4657 	    /* NOTREACHED */
4658 
4659 	case CURLYM_A: /* we've just matched an A */
4660 	    locinput = st->locinput;
4661 	    nextchr = UCHARAT(locinput);
4662 
4663 	    ST.count++;
4664 	    /* after first match, determine A's length: u.curlym.alen */
4665 	    if (ST.count == 1) {
4666 		if (PL_reg_match_utf8) {
4667 		    char *s = locinput;
4668 		    while (s < PL_reginput) {
4669 			ST.alen++;
4670 			s += UTF8SKIP(s);
4671 		    }
4672 		}
4673 		else {
4674 		    ST.alen = PL_reginput - locinput;
4675 		}
4676 		if (ST.alen == 0)
4677 		    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4678 	    }
4679 	    DEBUG_EXECUTE_r(
4680 		PerlIO_printf(Perl_debug_log,
4681 			  "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4682 			  (int)(REPORT_CODE_OFF+(depth*2)), "",
4683 			  (IV) ST.count, (IV)ST.alen)
4684 	    );
4685 
4686 	    locinput = PL_reginput;
4687 
4688 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4689 	        cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4690 	        goto fake_end;
4691 
4692 	    {
4693 		I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4694 		if ( max == REG_INFTY || ST.count < max )
4695 		    goto curlym_do_A; /* try to match another A */
4696 	    }
4697 	    goto curlym_do_B; /* try to match B */
4698 
4699 	case CURLYM_A_fail: /* just failed to match an A */
4700 	    REGCP_UNWIND(ST.cp);
4701 
4702 	    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4703 	        || (cur_eval && cur_eval->u.eval.close_paren &&
4704 	            cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4705 		sayNO;
4706 
4707 	  curlym_do_B: /* execute the B in /A{m,n}B/  */
4708 	    PL_reginput = locinput;
4709 	    if (ST.c1 == CHRTEST_UNINIT) {
4710 		/* calculate c1 and c2 for possible match of 1st char
4711 		 * following curly */
4712 		ST.c1 = ST.c2 = CHRTEST_VOID;
4713 		if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4714 		    regnode *text_node = ST.B;
4715 		    if (! HAS_TEXT(text_node))
4716 			FIND_NEXT_IMPT(text_node);
4717 	            /* this used to be
4718 
4719 	                (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4720 
4721 	            	But the former is redundant in light of the latter.
4722 
4723 	            	if this changes back then the macro for
4724 	            	IS_TEXT and friends need to change.
4725 	             */
4726 		    if (PL_regkind[OP(text_node)] == EXACT)
4727 		    {
4728 
4729 			ST.c1 = (U8)*STRING(text_node);
4730 			ST.c2 =
4731 			    (IS_TEXTF(text_node))
4732 			    ? PL_fold[ST.c1]
4733 			    : (IS_TEXTFL(text_node))
4734 				? PL_fold_locale[ST.c1]
4735 				: ST.c1;
4736 		    }
4737 		}
4738 	    }
4739 
4740 	    DEBUG_EXECUTE_r(
4741 		PerlIO_printf(Perl_debug_log,
4742 		    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4743 		    (int)(REPORT_CODE_OFF+(depth*2)),
4744 		    "", (IV)ST.count)
4745 		);
4746 	    if (ST.c1 != CHRTEST_VOID
4747 		    && UCHARAT(PL_reginput) != ST.c1
4748 		    && UCHARAT(PL_reginput) != ST.c2)
4749 	    {
4750 		/* simulate B failing */
4751 		DEBUG_OPTIMISE_r(
4752 		    PerlIO_printf(Perl_debug_log,
4753 		        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4754 		        (int)(REPORT_CODE_OFF+(depth*2)),"",
4755 		        (IV)ST.c1,(IV)ST.c2
4756 		));
4757 		state_num = CURLYM_B_fail;
4758 		goto reenter_switch;
4759 	    }
4760 
4761 	    if (ST.me->flags) {
4762 		/* mark current A as captured */
4763 		I32 paren = ST.me->flags;
4764 		if (ST.count) {
4765 		    PL_regoffs[paren].start
4766 			= HOPc(PL_reginput, -ST.alen) - PL_bostr;
4767 		    PL_regoffs[paren].end = PL_reginput - PL_bostr;
4768 		    /*dmq: *PL_reglastcloseparen = paren; */
4769 		}
4770 		else
4771 		    PL_regoffs[paren].end = -1;
4772 		if (cur_eval && cur_eval->u.eval.close_paren &&
4773 		    cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4774 		{
4775 		    if (ST.count)
4776 	                goto fake_end;
4777 	            else
4778 	                sayNO;
4779 	        }
4780 	    }
4781 
4782 	    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4783 	    /* NOTREACHED */
4784 
4785 	case CURLYM_B_fail: /* just failed to match a B */
4786 	    REGCP_UNWIND(ST.cp);
4787 	    if (ST.minmod) {
4788 		I32 max = ARG2(ST.me);
4789 		if (max != REG_INFTY && ST.count == max)
4790 		    sayNO;
4791 		goto curlym_do_A; /* try to match a further A */
4792 	    }
4793 	    /* backtrack one A */
4794 	    if (ST.count == ARG1(ST.me) /* min */)
4795 		sayNO;
4796 	    ST.count--;
4797 	    locinput = HOPc(locinput, -ST.alen);
4798 	    goto curlym_do_B; /* try to match B */
4799 
4800 #undef ST
4801 #define ST st->u.curly
4802 
4803 #define CURLY_SETPAREN(paren, success) \
4804     if (paren) { \
4805 	if (success) { \
4806 	    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4807 	    PL_regoffs[paren].end = locinput - PL_bostr; \
4808 	    *PL_reglastcloseparen = paren; \
4809 	} \
4810 	else \
4811 	    PL_regoffs[paren].end = -1; \
4812     }
4813 
4814 	case STAR:		/*  /A*B/ where A is width 1 */
4815 	    ST.paren = 0;
4816 	    ST.min = 0;
4817 	    ST.max = REG_INFTY;
4818 	    scan = NEXTOPER(scan);
4819 	    goto repeat;
4820 	case PLUS:		/*  /A+B/ where A is width 1 */
4821 	    ST.paren = 0;
4822 	    ST.min = 1;
4823 	    ST.max = REG_INFTY;
4824 	    scan = NEXTOPER(scan);
4825 	    goto repeat;
4826 	case CURLYN:		/*  /(A){m,n}B/ where A is width 1 */
4827 	    ST.paren = scan->flags;	/* Which paren to set */
4828 	    if (ST.paren > PL_regsize)
4829 		PL_regsize = ST.paren;
4830 	    if (ST.paren > *PL_reglastparen)
4831 		*PL_reglastparen = ST.paren;
4832 	    ST.min = ARG1(scan);  /* min to match */
4833 	    ST.max = ARG2(scan);  /* max to match */
4834 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4835 	        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4836 	        ST.min=1;
4837 	        ST.max=1;
4838 	    }
4839             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4840 	    goto repeat;
4841 	case CURLY:		/*  /A{m,n}B/ where A is width 1 */
4842 	    ST.paren = 0;
4843 	    ST.min = ARG1(scan);  /* min to match */
4844 	    ST.max = ARG2(scan);  /* max to match */
4845 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4846 	  repeat:
4847 	    /*
4848 	    * Lookahead to avoid useless match attempts
4849 	    * when we know what character comes next.
4850 	    *
4851 	    * Used to only do .*x and .*?x, but now it allows
4852 	    * for )'s, ('s and (?{ ... })'s to be in the way
4853 	    * of the quantifier and the EXACT-like node.  -- japhy
4854 	    */
4855 
4856 	    if (ST.min > ST.max) /* XXX make this a compile-time check? */
4857 		sayNO;
4858 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
4859 		U8 *s;
4860 		regnode *text_node = next;
4861 
4862 		if (! HAS_TEXT(text_node))
4863 		    FIND_NEXT_IMPT(text_node);
4864 
4865 		if (! HAS_TEXT(text_node))
4866 		    ST.c1 = ST.c2 = CHRTEST_VOID;
4867 		else {
4868 		    if ( PL_regkind[OP(text_node)] != EXACT ) {
4869 			ST.c1 = ST.c2 = CHRTEST_VOID;
4870 			goto assume_ok_easy;
4871 		    }
4872 		    else
4873 			s = (U8*)STRING(text_node);
4874 
4875                     /*  Currently we only get here when
4876 
4877                         PL_rekind[OP(text_node)] == EXACT
4878 
4879                         if this changes back then the macro for IS_TEXT and
4880                         friends need to change. */
4881 		    if (!UTF) {
4882 			ST.c2 = ST.c1 = *s;
4883 			if (IS_TEXTF(text_node))
4884 			    ST.c2 = PL_fold[ST.c1];
4885 			else if (IS_TEXTFL(text_node))
4886 			    ST.c2 = PL_fold_locale[ST.c1];
4887 		    }
4888 		    else { /* UTF */
4889 			if (IS_TEXTF(text_node)) {
4890 			     STRLEN ulen1, ulen2;
4891 			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4892 			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4893 
4894 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4895 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4896 #ifdef EBCDIC
4897 			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4898 						    ckWARN(WARN_UTF8) ?
4899                                                     0 : UTF8_ALLOW_ANY);
4900 			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4901                                                     ckWARN(WARN_UTF8) ?
4902                                                     0 : UTF8_ALLOW_ANY);
4903 #else
4904 			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4905 						    uniflags);
4906 			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4907 						    uniflags);
4908 #endif
4909 			}
4910 			else {
4911 			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4912 						     uniflags);
4913 			}
4914 		    }
4915 		}
4916 	    }
4917 	    else
4918 		ST.c1 = ST.c2 = CHRTEST_VOID;
4919 	assume_ok_easy:
4920 
4921 	    ST.A = scan;
4922 	    ST.B = next;
4923 	    PL_reginput = locinput;
4924 	    if (minmod) {
4925 		minmod = 0;
4926 		if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4927 		    sayNO;
4928 		ST.count = ST.min;
4929 		locinput = PL_reginput;
4930 		REGCP_SET(ST.cp);
4931 		if (ST.c1 == CHRTEST_VOID)
4932 		    goto curly_try_B_min;
4933 
4934 		ST.oldloc = locinput;
4935 
4936 		/* set ST.maxpos to the furthest point along the
4937 		 * string that could possibly match */
4938 		if  (ST.max == REG_INFTY) {
4939 		    ST.maxpos = PL_regeol - 1;
4940 		    if (do_utf8)
4941 			while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4942 			    ST.maxpos--;
4943 		}
4944 		else if (do_utf8) {
4945 		    int m = ST.max - ST.min;
4946 		    for (ST.maxpos = locinput;
4947 			 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4948 			ST.maxpos += UTF8SKIP(ST.maxpos);
4949 		}
4950 		else {
4951 		    ST.maxpos = locinput + ST.max - ST.min;
4952 		    if (ST.maxpos >= PL_regeol)
4953 			ST.maxpos = PL_regeol - 1;
4954 		}
4955 		goto curly_try_B_min_known;
4956 
4957 	    }
4958 	    else {
4959 		ST.count = regrepeat(rex, ST.A, ST.max, depth);
4960 		locinput = PL_reginput;
4961 		if (ST.count < ST.min)
4962 		    sayNO;
4963 		if ((ST.count > ST.min)
4964 		    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4965 		{
4966 		    /* A{m,n} must come at the end of the string, there's
4967 		     * no point in backing off ... */
4968 		    ST.min = ST.count;
4969 		    /* ...except that $ and \Z can match before *and* after
4970 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4971 		       We may back off by one in this case. */
4972 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4973 			ST.min--;
4974 		}
4975 		REGCP_SET(ST.cp);
4976 		goto curly_try_B_max;
4977 	    }
4978 	    /* NOTREACHED */
4979 
4980 
4981 	case CURLY_B_min_known_fail:
4982 	    /* failed to find B in a non-greedy match where c1,c2 valid */
4983 	    if (ST.paren && ST.count)
4984 		PL_regoffs[ST.paren].end = -1;
4985 
4986 	    PL_reginput = locinput;	/* Could be reset... */
4987 	    REGCP_UNWIND(ST.cp);
4988 	    /* Couldn't or didn't -- move forward. */
4989 	    ST.oldloc = locinput;
4990 	    if (do_utf8)
4991 		locinput += UTF8SKIP(locinput);
4992 	    else
4993 		locinput++;
4994 	    ST.count++;
4995 	  curly_try_B_min_known:
4996 	     /* find the next place where 'B' could work, then call B */
4997 	    {
4998 		int n;
4999 		if (do_utf8) {
5000 		    n = (ST.oldloc == locinput) ? 0 : 1;
5001 		    if (ST.c1 == ST.c2) {
5002 			STRLEN len;
5003 			/* set n to utf8_distance(oldloc, locinput) */
5004 			while (locinput <= ST.maxpos &&
5005 			       utf8n_to_uvchr((U8*)locinput,
5006 					      UTF8_MAXBYTES, &len,
5007 					      uniflags) != (UV)ST.c1) {
5008 			    locinput += len;
5009 			    n++;
5010 			}
5011 		    }
5012 		    else {
5013 			/* set n to utf8_distance(oldloc, locinput) */
5014 			while (locinput <= ST.maxpos) {
5015 			    STRLEN len;
5016 			    const UV c = utf8n_to_uvchr((U8*)locinput,
5017 						  UTF8_MAXBYTES, &len,
5018 						  uniflags);
5019 			    if (c == (UV)ST.c1 || c == (UV)ST.c2)
5020 				break;
5021 			    locinput += len;
5022 			    n++;
5023 			}
5024 		    }
5025 		}
5026 		else {
5027 		    if (ST.c1 == ST.c2) {
5028 			while (locinput <= ST.maxpos &&
5029 			       UCHARAT(locinput) != ST.c1)
5030 			    locinput++;
5031 		    }
5032 		    else {
5033 			while (locinput <= ST.maxpos
5034 			       && UCHARAT(locinput) != ST.c1
5035 			       && UCHARAT(locinput) != ST.c2)
5036 			    locinput++;
5037 		    }
5038 		    n = locinput - ST.oldloc;
5039 		}
5040 		if (locinput > ST.maxpos)
5041 		    sayNO;
5042 		/* PL_reginput == oldloc now */
5043 		if (n) {
5044 		    ST.count += n;
5045 		    if (regrepeat(rex, ST.A, n, depth) < n)
5046 			sayNO;
5047 		}
5048 		PL_reginput = locinput;
5049 		CURLY_SETPAREN(ST.paren, ST.count);
5050 		if (cur_eval && cur_eval->u.eval.close_paren &&
5051 		    cur_eval->u.eval.close_paren == (U32)ST.paren) {
5052 		    goto fake_end;
5053 	        }
5054 		PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
5055 	    }
5056 	    /* NOTREACHED */
5057 
5058 
5059 	case CURLY_B_min_fail:
5060 	    /* failed to find B in a non-greedy match where c1,c2 invalid */
5061 	    if (ST.paren && ST.count)
5062 		PL_regoffs[ST.paren].end = -1;
5063 
5064 	    REGCP_UNWIND(ST.cp);
5065 	    /* failed -- move forward one */
5066 	    PL_reginput = locinput;
5067 	    if (regrepeat(rex, ST.A, 1, depth)) {
5068 		ST.count++;
5069 		locinput = PL_reginput;
5070 		if (ST.count <= ST.max || (ST.max == REG_INFTY &&
5071 			ST.count > 0)) /* count overflow ? */
5072 		{
5073 		  curly_try_B_min:
5074 		    CURLY_SETPAREN(ST.paren, ST.count);
5075 		    if (cur_eval && cur_eval->u.eval.close_paren &&
5076 		        cur_eval->u.eval.close_paren == (U32)ST.paren) {
5077                         goto fake_end;
5078                     }
5079 		    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
5080 		}
5081 	    }
5082 	    sayNO;
5083 	    /* NOTREACHED */
5084 
5085 
5086 	curly_try_B_max:
5087 	    /* a successful greedy match: now try to match B */
5088             if (cur_eval && cur_eval->u.eval.close_paren &&
5089                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
5090                 goto fake_end;
5091             }
5092 	    {
5093 		UV c = 0;
5094 		if (ST.c1 != CHRTEST_VOID)
5095 		    c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
5096 					   UTF8_MAXBYTES, 0, uniflags)
5097 				: (UV) UCHARAT(PL_reginput);
5098 		/* If it could work, try it. */
5099 		if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
5100 		    CURLY_SETPAREN(ST.paren, ST.count);
5101 		    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
5102 		    /* NOTREACHED */
5103 		}
5104 	    }
5105 	    /* FALL THROUGH */
5106 	case CURLY_B_max_fail:
5107 	    /* failed to find B in a greedy match */
5108 	    if (ST.paren && ST.count)
5109 		PL_regoffs[ST.paren].end = -1;
5110 
5111 	    REGCP_UNWIND(ST.cp);
5112 	    /*  back up. */
5113 	    if (--ST.count < ST.min)
5114 		sayNO;
5115 	    PL_reginput = locinput = HOPc(locinput, -1);
5116 	    goto curly_try_B_max;
5117 
5118 #undef ST
5119 
5120 	case END:
5121 	    fake_end:
5122 	    if (cur_eval) {
5123 		/* we've just finished A in /(??{A})B/; now continue with B */
5124 		I32 tmpix;
5125 		st->u.eval.toggle_reg_flags
5126 			    = cur_eval->u.eval.toggle_reg_flags;
5127 		PL_reg_flags ^= st->u.eval.toggle_reg_flags;
5128 
5129 		st->u.eval.prev_rex = rex_sv;		/* inner */
5130 		SETREX(rex_sv,cur_eval->u.eval.prev_rex);
5131 		rex = (struct regexp *)SvANY(rex_sv);
5132 		rexi = RXi_GET(rex);
5133 		cur_curlyx = cur_eval->u.eval.prev_curlyx;
5134 		ReREFCNT_inc(rex_sv);
5135 		st->u.eval.cp = regcppush(0);	/* Save *all* the positions. */
5136 
5137 		/* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
5138 		PL_reglastparen = &rex->lastparen;
5139 		PL_reglastcloseparen = &rex->lastcloseparen;
5140 
5141 		REGCP_SET(st->u.eval.lastcp);
5142 		PL_reginput = locinput;
5143 
5144 		/* Restore parens of the outer rex without popping the
5145 		 * savestack */
5146 		tmpix = PL_savestack_ix;
5147 		PL_savestack_ix = cur_eval->u.eval.lastcp;
5148 		regcppop(rex);
5149 		PL_savestack_ix = tmpix;
5150 
5151 		st->u.eval.prev_eval = cur_eval;
5152 		cur_eval = cur_eval->u.eval.prev_eval;
5153 		DEBUG_EXECUTE_r(
5154 		    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
5155 				      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
5156                 if ( nochange_depth )
5157 	            nochange_depth--;
5158 
5159                 PUSH_YES_STATE_GOTO(EVAL_AB,
5160 			st->u.eval.prev_eval->u.eval.B); /* match B */
5161 	    }
5162 
5163 	    if (locinput < reginfo->till) {
5164 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
5165 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
5166 				      PL_colors[4],
5167 				      (long)(locinput - PL_reg_starttry),
5168 				      (long)(reginfo->till - PL_reg_starttry),
5169 				      PL_colors[5]));
5170 
5171 		sayNO_SILENT;		/* Cannot match: too short. */
5172 	    }
5173 	    PL_reginput = locinput;	/* put where regtry can find it */
5174 	    sayYES;			/* Success! */
5175 
5176 	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
5177 	    DEBUG_EXECUTE_r(
5178 	    PerlIO_printf(Perl_debug_log,
5179 		"%*s  %ssubpattern success...%s\n",
5180 		REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
5181 	    PL_reginput = locinput;	/* put where regtry can find it */
5182 	    sayYES;			/* Success! */
5183 
5184 #undef  ST
5185 #define ST st->u.ifmatch
5186 
5187 	case SUSPEND:	/* (?>A) */
5188 	    ST.wanted = 1;
5189 	    PL_reginput = locinput;
5190 	    goto do_ifmatch;
5191 
5192 	case UNLESSM:	/* -ve lookaround: (?!A), or with flags, (?<!A) */
5193 	    ST.wanted = 0;
5194 	    goto ifmatch_trivial_fail_test;
5195 
5196 	case IFMATCH:	/* +ve lookaround: (?=A), or with flags, (?<=A) */
5197 	    ST.wanted = 1;
5198 	  ifmatch_trivial_fail_test:
5199 	    if (scan->flags) {
5200 		char * const s = HOPBACKc(locinput, scan->flags);
5201 		if (!s) {
5202 		    /* trivial fail */
5203 		    if (logical) {
5204 			logical = 0;
5205 			sw = 1 - (bool)ST.wanted;
5206 		    }
5207 		    else if (ST.wanted)
5208 			sayNO;
5209 		    next = scan + ARG(scan);
5210 		    if (next == scan)
5211 			next = NULL;
5212 		    break;
5213 		}
5214 		PL_reginput = s;
5215 	    }
5216 	    else
5217 		PL_reginput = locinput;
5218 
5219 	  do_ifmatch:
5220 	    ST.me = scan;
5221 	    ST.logical = logical;
5222 	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
5223 
5224 	    /* execute body of (?...A) */
5225 	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
5226 	    /* NOTREACHED */
5227 
5228 	case IFMATCH_A_fail: /* body of (?...A) failed */
5229 	    ST.wanted = !ST.wanted;
5230 	    /* FALL THROUGH */
5231 
5232 	case IFMATCH_A: /* body of (?...A) succeeded */
5233 	    if (ST.logical) {
5234 		sw = (bool)ST.wanted;
5235 	    }
5236 	    else if (!ST.wanted)
5237 		sayNO;
5238 
5239 	    if (OP(ST.me) == SUSPEND)
5240 		locinput = PL_reginput;
5241 	    else {
5242 		locinput = PL_reginput = st->locinput;
5243 		nextchr = UCHARAT(locinput);
5244 	    }
5245 	    scan = ST.me + ARG(ST.me);
5246 	    if (scan == ST.me)
5247 		scan = NULL;
5248 	    continue; /* execute B */
5249 
5250 #undef ST
5251 
5252 	case LONGJMP:
5253 	    next = scan + ARG(scan);
5254 	    if (next == scan)
5255 		next = NULL;
5256 	    break;
5257 	case COMMIT:
5258 	    reginfo->cutpoint = PL_regeol;
5259 	    /* FALLTHROUGH */
5260 	case PRUNE:
5261 	    PL_reginput = locinput;
5262 	    if (!scan->flags)
5263 	        sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5264 	    PUSH_STATE_GOTO(COMMIT_next,next);
5265 	    /* NOTREACHED */
5266 	case COMMIT_next_fail:
5267 	    no_final = 1;
5268 	    /* FALLTHROUGH */
5269 	case OPFAIL:
5270 	    sayNO;
5271 	    /* NOTREACHED */
5272 
5273 #define ST st->u.mark
5274         case MARKPOINT:
5275             ST.prev_mark = mark_state;
5276             ST.mark_name = sv_commit = sv_yes_mark
5277                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5278             mark_state = st;
5279             ST.mark_loc = PL_reginput = locinput;
5280             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5281             /* NOTREACHED */
5282         case MARKPOINT_next:
5283             mark_state = ST.prev_mark;
5284             sayYES;
5285             /* NOTREACHED */
5286         case MARKPOINT_next_fail:
5287             if (popmark && sv_eq(ST.mark_name,popmark))
5288             {
5289                 if (ST.mark_loc > startpoint)
5290 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5291                 popmark = NULL; /* we found our mark */
5292                 sv_commit = ST.mark_name;
5293 
5294                 DEBUG_EXECUTE_r({
5295                         PerlIO_printf(Perl_debug_log,
5296 		            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5297 		            REPORT_CODE_OFF+depth*2, "",
5298 		            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5299 		});
5300             }
5301             mark_state = ST.prev_mark;
5302             sv_yes_mark = mark_state ?
5303                 mark_state->u.mark.mark_name : NULL;
5304             sayNO;
5305             /* NOTREACHED */
5306         case SKIP:
5307             PL_reginput = locinput;
5308             if (scan->flags) {
5309                 /* (*SKIP) : if we fail we cut here*/
5310                 ST.mark_name = NULL;
5311                 ST.mark_loc = locinput;
5312                 PUSH_STATE_GOTO(SKIP_next,next);
5313             } else {
5314                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5315                    otherwise do nothing.  Meaning we need to scan
5316                  */
5317                 regmatch_state *cur = mark_state;
5318                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5319 
5320                 while (cur) {
5321                     if ( sv_eq( cur->u.mark.mark_name,
5322                                 find ) )
5323                     {
5324                         ST.mark_name = find;
5325                         PUSH_STATE_GOTO( SKIP_next, next );
5326                     }
5327                     cur = cur->u.mark.prev_mark;
5328                 }
5329             }
5330             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5331             break;
5332 	case SKIP_next_fail:
5333 	    if (ST.mark_name) {
5334 	        /* (*CUT:NAME) - Set up to search for the name as we
5335 	           collapse the stack*/
5336 	        popmark = ST.mark_name;
5337 	    } else {
5338 	        /* (*CUT) - No name, we cut here.*/
5339 	        if (ST.mark_loc > startpoint)
5340 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5341 	        /* but we set sv_commit to latest mark_name if there
5342 	           is one so they can test to see how things lead to this
5343 	           cut */
5344                 if (mark_state)
5345                     sv_commit=mark_state->u.mark.mark_name;
5346             }
5347             no_final = 1;
5348             sayNO;
5349             /* NOTREACHED */
5350 #undef ST
5351         case FOLDCHAR:
5352             n = ARG(scan);
5353             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5354                 locinput += ln;
5355             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5356                 sayNO;
5357             } else  {
5358                 U8 folded[UTF8_MAXBYTES_CASE+1];
5359                 STRLEN foldlen;
5360                 const char * const l = locinput;
5361                 char *e = PL_regeol;
5362                 to_uni_fold(n, folded, &foldlen);
5363 
5364 		if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5365                 	       l, &e, 0,  do_utf8)) {
5366                         sayNO;
5367                 }
5368                 locinput = e;
5369             }
5370             nextchr = UCHARAT(locinput);
5371             break;
5372         case LNBREAK:
5373             if ((n=is_LNBREAK(locinput,do_utf8))) {
5374                 locinput += n;
5375                 nextchr = UCHARAT(locinput);
5376             } else
5377                 sayNO;
5378             break;
5379 
5380 #define CASE_CLASS(nAmE)                              \
5381         case nAmE:                                    \
5382             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5383                 locinput += n;                        \
5384                 nextchr = UCHARAT(locinput);          \
5385             } else                                    \
5386                 sayNO;                                \
5387             break;                                    \
5388         case N##nAmE:                                 \
5389             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5390                 sayNO;                                \
5391             } else {                                  \
5392                 locinput += UTF8SKIP(locinput);       \
5393                 nextchr = UCHARAT(locinput);          \
5394             }                                         \
5395             break
5396 
5397         CASE_CLASS(VERTWS);
5398         CASE_CLASS(HORIZWS);
5399 #undef CASE_CLASS
5400 
5401 	default:
5402 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5403 			  PTR2UV(scan), OP(scan));
5404 	    Perl_croak(aTHX_ "regexp memory corruption");
5405 
5406 	} /* end switch */
5407 
5408         /* switch break jumps here */
5409 	scan = next; /* prepare to execute the next op and ... */
5410 	continue;    /* ... jump back to the top, reusing st */
5411 	/* NOTREACHED */
5412 
5413       push_yes_state:
5414 	/* push a state that backtracks on success */
5415 	st->u.yes.prev_yes_state = yes_state;
5416 	yes_state = st;
5417 	/* FALL THROUGH */
5418       push_state:
5419 	/* push a new regex state, then continue at scan  */
5420 	{
5421 	    regmatch_state *newst;
5422 
5423 	    DEBUG_STACK_r({
5424 	        regmatch_state *cur = st;
5425 	        regmatch_state *curyes = yes_state;
5426 	        int curd = depth;
5427 	        regmatch_slab *slab = PL_regmatch_slab;
5428                 for (;curd > -1;cur--,curd--) {
5429                     if (cur < SLAB_FIRST(slab)) {
5430                 	slab = slab->prev;
5431                 	cur = SLAB_LAST(slab);
5432                     }
5433                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5434                         REPORT_CODE_OFF + 2 + depth * 2,"",
5435                         curd, PL_reg_name[cur->resume_state],
5436                         (curyes == cur) ? "yes" : ""
5437                     );
5438                     if (curyes == cur)
5439 	                curyes = cur->u.yes.prev_yes_state;
5440                 }
5441             } else
5442                 DEBUG_STATE_pp("push")
5443             );
5444 	    depth++;
5445 	    st->locinput = locinput;
5446 	    newst = st+1;
5447 	    if (newst >  SLAB_LAST(PL_regmatch_slab))
5448 		newst = S_push_slab(aTHX);
5449 	    PL_regmatch_state = newst;
5450 
5451 	    locinput = PL_reginput;
5452 	    nextchr = UCHARAT(locinput);
5453 	    st = newst;
5454 	    continue;
5455 	    /* NOTREACHED */
5456 	}
5457     }
5458 
5459     /*
5460     * We get here only if there's trouble -- normally "case END" is
5461     * the terminating point.
5462     */
5463     Perl_croak(aTHX_ "corrupted regexp pointers");
5464     /*NOTREACHED*/
5465     sayNO;
5466 
5467 yes:
5468     if (yes_state) {
5469 	/* we have successfully completed a subexpression, but we must now
5470 	 * pop to the state marked by yes_state and continue from there */
5471 	assert(st != yes_state);
5472 #ifdef DEBUGGING
5473 	while (st != yes_state) {
5474 	    st--;
5475 	    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5476 		PL_regmatch_slab = PL_regmatch_slab->prev;
5477 		st = SLAB_LAST(PL_regmatch_slab);
5478 	    }
5479 	    DEBUG_STATE_r({
5480 	        if (no_final) {
5481 	            DEBUG_STATE_pp("pop (no final)");
5482 	        } else {
5483 	            DEBUG_STATE_pp("pop (yes)");
5484 	        }
5485 	    });
5486 	    depth--;
5487 	}
5488 #else
5489 	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5490 	    || yes_state > SLAB_LAST(PL_regmatch_slab))
5491 	{
5492 	    /* not in this slab, pop slab */
5493 	    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5494 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5495 	    st = SLAB_LAST(PL_regmatch_slab);
5496 	}
5497 	depth -= (st - yes_state);
5498 #endif
5499 	st = yes_state;
5500 	yes_state = st->u.yes.prev_yes_state;
5501 	PL_regmatch_state = st;
5502 
5503         if (no_final) {
5504             locinput= st->locinput;
5505             nextchr = UCHARAT(locinput);
5506         }
5507 	state_num = st->resume_state + no_final;
5508 	goto reenter_switch;
5509     }
5510 
5511     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5512 			  PL_colors[4], PL_colors[5]));
5513 
5514     if (PL_reg_eval_set) {
5515 	/* each successfully executed (?{...}) block does the equivalent of
5516 	 *   local $^R = do {...}
5517 	 * When popping the save stack, all these locals would be undone;
5518 	 * bypass this by setting the outermost saved $^R to the latest
5519 	 * value */
5520 	if (oreplsv != GvSV(PL_replgv))
5521 	    sv_setsv(oreplsv, GvSV(PL_replgv));
5522     }
5523     result = 1;
5524     goto final_exit;
5525 
5526 no:
5527     DEBUG_EXECUTE_r(
5528 	PerlIO_printf(Perl_debug_log,
5529             "%*s  %sfailed...%s\n",
5530             REPORT_CODE_OFF+depth*2, "",
5531             PL_colors[4], PL_colors[5])
5532 	);
5533 
5534 no_silent:
5535     if (no_final) {
5536         if (yes_state) {
5537             goto yes;
5538         } else {
5539             goto final_exit;
5540         }
5541     }
5542     if (depth) {
5543 	/* there's a previous state to backtrack to */
5544 	st--;
5545 	if (st < SLAB_FIRST(PL_regmatch_slab)) {
5546 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5547 	    st = SLAB_LAST(PL_regmatch_slab);
5548 	}
5549 	PL_regmatch_state = st;
5550 	locinput= st->locinput;
5551 	nextchr = UCHARAT(locinput);
5552 
5553 	DEBUG_STATE_pp("pop");
5554 	depth--;
5555 	if (yes_state == st)
5556 	    yes_state = st->u.yes.prev_yes_state;
5557 
5558 	state_num = st->resume_state + 1; /* failure = success + 1 */
5559 	goto reenter_switch;
5560     }
5561     result = 0;
5562 
5563   final_exit:
5564     if (rex->intflags & PREGf_VERBARG_SEEN) {
5565         SV *sv_err = get_sv("REGERROR", 1);
5566         SV *sv_mrk = get_sv("REGMARK", 1);
5567         if (result) {
5568             sv_commit = &PL_sv_no;
5569             if (!sv_yes_mark)
5570                 sv_yes_mark = &PL_sv_yes;
5571         } else {
5572             if (!sv_commit)
5573                 sv_commit = &PL_sv_yes;
5574             sv_yes_mark = &PL_sv_no;
5575         }
5576         sv_setsv(sv_err, sv_commit);
5577         sv_setsv(sv_mrk, sv_yes_mark);
5578     }
5579 
5580     /* clean up; in particular, free all slabs above current one */
5581     LEAVE_SCOPE(oldsave);
5582 
5583     return result;
5584 }
5585 
5586 /*
5587  - regrepeat - repeatedly match something simple, report how many
5588  */
5589 /*
5590  * [This routine now assumes that it will only match on things of length 1.
5591  * That was true before, but now we assume scan - reginput is the count,
5592  * rather than incrementing count on every character.  [Er, except utf8.]]
5593  */
5594 STATIC I32
5595 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5596 {
5597     dVAR;
5598     register char *scan;
5599     register I32 c;
5600     register char *loceol = PL_regeol;
5601     register I32 hardcount = 0;
5602     register bool do_utf8 = PL_reg_match_utf8;
5603 #ifndef DEBUGGING
5604     PERL_UNUSED_ARG(depth);
5605 #endif
5606 
5607     PERL_ARGS_ASSERT_REGREPEAT;
5608 
5609     scan = PL_reginput;
5610     if (max == REG_INFTY)
5611 	max = I32_MAX;
5612     else if (max < loceol - scan)
5613 	loceol = scan + max;
5614     switch (OP(p)) {
5615     case REG_ANY:
5616 	if (do_utf8) {
5617 	    loceol = PL_regeol;
5618 	    while (scan < loceol && hardcount < max && *scan != '\n') {
5619 		scan += UTF8SKIP(scan);
5620 		hardcount++;
5621 	    }
5622 	} else {
5623 	    while (scan < loceol && *scan != '\n')
5624 		scan++;
5625 	}
5626 	break;
5627     case SANY:
5628         if (do_utf8) {
5629 	    loceol = PL_regeol;
5630 	    while (scan < loceol && hardcount < max) {
5631 	        scan += UTF8SKIP(scan);
5632 		hardcount++;
5633 	    }
5634 	}
5635 	else
5636 	    scan = loceol;
5637 	break;
5638     case CANY:
5639 	scan = loceol;
5640 	break;
5641     case EXACT:		/* length of string is 1 */
5642 	c = (U8)*STRING(p);
5643 	while (scan < loceol && UCHARAT(scan) == c)
5644 	    scan++;
5645 	break;
5646     case EXACTF:	/* length of string is 1 */
5647 	c = (U8)*STRING(p);
5648 	while (scan < loceol &&
5649 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5650 	    scan++;
5651 	break;
5652     case EXACTFL:	/* length of string is 1 */
5653 	PL_reg_flags |= RF_tainted;
5654 	c = (U8)*STRING(p);
5655 	while (scan < loceol &&
5656 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5657 	    scan++;
5658 	break;
5659     case ANYOF:
5660 	if (do_utf8) {
5661 	    loceol = PL_regeol;
5662 	    while (hardcount < max && scan < loceol &&
5663 		   reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5664 		scan += UTF8SKIP(scan);
5665 		hardcount++;
5666 	    }
5667 	} else {
5668 	    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5669 		scan++;
5670 	}
5671 	break;
5672     case ALNUM:
5673 	if (do_utf8) {
5674 	    loceol = PL_regeol;
5675 	    LOAD_UTF8_CHARCLASS_ALNUM();
5676 	    while (hardcount < max && scan < loceol &&
5677 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5678 		scan += UTF8SKIP(scan);
5679 		hardcount++;
5680 	    }
5681 	} else {
5682 	    while (scan < loceol && isALNUM(*scan))
5683 		scan++;
5684 	}
5685 	break;
5686     case ALNUML:
5687 	PL_reg_flags |= RF_tainted;
5688 	if (do_utf8) {
5689 	    loceol = PL_regeol;
5690 	    while (hardcount < max && scan < loceol &&
5691 		   isALNUM_LC_utf8((U8*)scan)) {
5692 		scan += UTF8SKIP(scan);
5693 		hardcount++;
5694 	    }
5695 	} else {
5696 	    while (scan < loceol && isALNUM_LC(*scan))
5697 		scan++;
5698 	}
5699 	break;
5700     case NALNUM:
5701 	if (do_utf8) {
5702 	    loceol = PL_regeol;
5703 	    LOAD_UTF8_CHARCLASS_ALNUM();
5704 	    while (hardcount < max && scan < loceol &&
5705 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5706 		scan += UTF8SKIP(scan);
5707 		hardcount++;
5708 	    }
5709 	} else {
5710 	    while (scan < loceol && !isALNUM(*scan))
5711 		scan++;
5712 	}
5713 	break;
5714     case NALNUML:
5715 	PL_reg_flags |= RF_tainted;
5716 	if (do_utf8) {
5717 	    loceol = PL_regeol;
5718 	    while (hardcount < max && scan < loceol &&
5719 		   !isALNUM_LC_utf8((U8*)scan)) {
5720 		scan += UTF8SKIP(scan);
5721 		hardcount++;
5722 	    }
5723 	} else {
5724 	    while (scan < loceol && !isALNUM_LC(*scan))
5725 		scan++;
5726 	}
5727 	break;
5728     case SPACE:
5729 	if (do_utf8) {
5730 	    loceol = PL_regeol;
5731 	    LOAD_UTF8_CHARCLASS_SPACE();
5732 	    while (hardcount < max && scan < loceol &&
5733 		   (*scan == ' ' ||
5734 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5735 		scan += UTF8SKIP(scan);
5736 		hardcount++;
5737 	    }
5738 	} else {
5739 	    while (scan < loceol && isSPACE(*scan))
5740 		scan++;
5741 	}
5742 	break;
5743     case SPACEL:
5744 	PL_reg_flags |= RF_tainted;
5745 	if (do_utf8) {
5746 	    loceol = PL_regeol;
5747 	    while (hardcount < max && scan < loceol &&
5748 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5749 		scan += UTF8SKIP(scan);
5750 		hardcount++;
5751 	    }
5752 	} else {
5753 	    while (scan < loceol && isSPACE_LC(*scan))
5754 		scan++;
5755 	}
5756 	break;
5757     case NSPACE:
5758 	if (do_utf8) {
5759 	    loceol = PL_regeol;
5760 	    LOAD_UTF8_CHARCLASS_SPACE();
5761 	    while (hardcount < max && scan < loceol &&
5762 		   !(*scan == ' ' ||
5763 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5764 		scan += UTF8SKIP(scan);
5765 		hardcount++;
5766 	    }
5767 	} else {
5768 	    while (scan < loceol && !isSPACE(*scan))
5769 		scan++;
5770 	}
5771 	break;
5772     case NSPACEL:
5773 	PL_reg_flags |= RF_tainted;
5774 	if (do_utf8) {
5775 	    loceol = PL_regeol;
5776 	    while (hardcount < max && scan < loceol &&
5777 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5778 		scan += UTF8SKIP(scan);
5779 		hardcount++;
5780 	    }
5781 	} else {
5782 	    while (scan < loceol && !isSPACE_LC(*scan))
5783 		scan++;
5784 	}
5785 	break;
5786     case DIGIT:
5787 	if (do_utf8) {
5788 	    loceol = PL_regeol;
5789 	    LOAD_UTF8_CHARCLASS_DIGIT();
5790 	    while (hardcount < max && scan < loceol &&
5791 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5792 		scan += UTF8SKIP(scan);
5793 		hardcount++;
5794 	    }
5795 	} else {
5796 	    while (scan < loceol && isDIGIT(*scan))
5797 		scan++;
5798 	}
5799 	break;
5800     case NDIGIT:
5801 	if (do_utf8) {
5802 	    loceol = PL_regeol;
5803 	    LOAD_UTF8_CHARCLASS_DIGIT();
5804 	    while (hardcount < max && scan < loceol &&
5805 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5806 		scan += UTF8SKIP(scan);
5807 		hardcount++;
5808 	    }
5809 	} else {
5810 	    while (scan < loceol && !isDIGIT(*scan))
5811 		scan++;
5812 	}
5813     case LNBREAK:
5814         if (do_utf8) {
5815 	    loceol = PL_regeol;
5816 	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5817 		scan += c;
5818 		hardcount++;
5819 	    }
5820 	} else {
5821 	    /*
5822 	      LNBREAK can match two latin chars, which is ok,
5823 	      because we have a null terminated string, but we
5824 	      have to use hardcount in this situation
5825 	    */
5826 	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5827 		scan+=c;
5828 		hardcount++;
5829 	    }
5830 	}
5831 	break;
5832     case HORIZWS:
5833         if (do_utf8) {
5834 	    loceol = PL_regeol;
5835 	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5836 		scan += c;
5837 		hardcount++;
5838 	    }
5839 	} else {
5840 	    while (scan < loceol && is_HORIZWS_latin1(scan))
5841 		scan++;
5842 	}
5843 	break;
5844     case NHORIZWS:
5845         if (do_utf8) {
5846 	    loceol = PL_regeol;
5847 	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5848 		scan += UTF8SKIP(scan);
5849 		hardcount++;
5850 	    }
5851 	} else {
5852 	    while (scan < loceol && !is_HORIZWS_latin1(scan))
5853 		scan++;
5854 
5855 	}
5856 	break;
5857     case VERTWS:
5858         if (do_utf8) {
5859 	    loceol = PL_regeol;
5860 	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5861 		scan += c;
5862 		hardcount++;
5863 	    }
5864 	} else {
5865 	    while (scan < loceol && is_VERTWS_latin1(scan))
5866 		scan++;
5867 
5868 	}
5869 	break;
5870     case NVERTWS:
5871         if (do_utf8) {
5872 	    loceol = PL_regeol;
5873 	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5874 		scan += UTF8SKIP(scan);
5875 		hardcount++;
5876 	    }
5877 	} else {
5878 	    while (scan < loceol && !is_VERTWS_latin1(scan))
5879 		scan++;
5880 
5881 	}
5882 	break;
5883 
5884     default:		/* Called on something of 0 width. */
5885 	break;		/* So match right here or not at all. */
5886     }
5887 
5888     if (hardcount)
5889 	c = hardcount;
5890     else
5891 	c = scan - PL_reginput;
5892     PL_reginput = scan;
5893 
5894     DEBUG_r({
5895 	GET_RE_DEBUG_FLAGS_DECL;
5896 	DEBUG_EXECUTE_r({
5897 	    SV * const prop = sv_newmortal();
5898 	    regprop(prog, prop, p);
5899 	    PerlIO_printf(Perl_debug_log,
5900 			"%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5901 			REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5902 	});
5903     });
5904 
5905     return(c);
5906 }
5907 
5908 
5909 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5910 /*
5911 - regclass_swash - prepare the utf8 swash
5912 */
5913 
5914 SV *
5915 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5916 {
5917     dVAR;
5918     SV *sw  = NULL;
5919     SV *si  = NULL;
5920     SV *alt = NULL;
5921     RXi_GET_DECL(prog,progi);
5922     const struct reg_data * const data = prog ? progi->data : NULL;
5923 
5924     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5925 
5926     if (data && data->count) {
5927 	const U32 n = ARG(node);
5928 
5929 	if (data->what[n] == 's') {
5930 	    SV * const rv = MUTABLE_SV(data->data[n]);
5931 	    AV * const av = MUTABLE_AV(SvRV(rv));
5932 	    SV **const ary = AvARRAY(av);
5933 	    SV **a, **b;
5934 
5935 	    /* See the end of regcomp.c:S_regclass() for
5936 	     * documentation of these array elements. */
5937 
5938 	    si = *ary;
5939 	    a  = SvROK(ary[1]) ? &ary[1] : NULL;
5940 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5941 
5942 	    if (a)
5943 		sw = *a;
5944 	    else if (si && doinit) {
5945 		sw = swash_init("utf8", "", si, 1, 0);
5946 		(void)av_store(av, 1, sw);
5947 	    }
5948 	    if (b)
5949 	        alt = *b;
5950 	}
5951     }
5952 
5953     if (listsvp)
5954 	*listsvp = si;
5955     if (altsvp)
5956 	*altsvp  = alt;
5957 
5958     return sw;
5959 }
5960 #endif
5961 
5962 /*
5963  - reginclass - determine if a character falls into a character class
5964 
5965   The n is the ANYOF regnode, the p is the target string, lenp
5966   is pointer to the maximum length of how far to go in the p
5967   (if the lenp is zero, UTF8SKIP(p) is used),
5968   do_utf8 tells whether the target string is in UTF-8.
5969 
5970  */
5971 
5972 STATIC bool
5973 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5974 {
5975     dVAR;
5976     const char flags = ANYOF_FLAGS(n);
5977     bool match = FALSE;
5978     UV c = *p;
5979     STRLEN len = 0;
5980     STRLEN plen;
5981 
5982     PERL_ARGS_ASSERT_REGINCLASS;
5983 
5984     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5985 	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5986 		(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
5987 		| UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
5988 		/* see [perl #37836] for UTF8_ALLOW_ANYUV; [perl #38293] for
5989 		 * UTF8_ALLOW_FFFF */
5990 	if (len == (STRLEN)-1)
5991 	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5992     }
5993 
5994     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5995     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5996         if (lenp)
5997 	    *lenp = 0;
5998 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
5999 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
6000 		match = TRUE;
6001 	}
6002 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
6003 	    match = TRUE;
6004 	if (!match) {
6005 	    AV *av;
6006 	    SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
6007 
6008 	    if (sw) {
6009 		U8 * utf8_p;
6010 		if (do_utf8) {
6011 		    utf8_p = (U8 *) p;
6012 		} else {
6013 		    STRLEN len = 1;
6014 		    utf8_p = bytes_to_utf8(p, &len);
6015 		}
6016 		if (swash_fetch(sw, utf8_p, 1))
6017 		    match = TRUE;
6018 		else if (flags & ANYOF_FOLD) {
6019 		    if (!match && lenp && av) {
6020 		        I32 i;
6021 			for (i = 0; i <= av_len(av); i++) {
6022 			    SV* const sv = *av_fetch(av, i, FALSE);
6023 			    STRLEN len;
6024 			    const char * const s = SvPV_const(sv, len);
6025 			    if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
6026 			        *lenp = len;
6027 				match = TRUE;
6028 				break;
6029 			    }
6030 			}
6031 		    }
6032 		    if (!match) {
6033 		        U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
6034 
6035 			STRLEN tmplen;
6036 			to_utf8_fold(utf8_p, tmpbuf, &tmplen);
6037 			if (swash_fetch(sw, tmpbuf, 1))
6038 			    match = TRUE;
6039 		    }
6040 		}
6041 
6042 		/* If we allocated a string above, free it */
6043 		if (! do_utf8) Safefree(utf8_p);
6044 	    }
6045 	}
6046 	if (match && lenp && *lenp == 0)
6047 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
6048     }
6049     if (!match && c < 256) {
6050 	if (ANYOF_BITMAP_TEST(n, c))
6051 	    match = TRUE;
6052 	else if (flags & ANYOF_FOLD) {
6053 	    U8 f;
6054 
6055 	    if (flags & ANYOF_LOCALE) {
6056 		PL_reg_flags |= RF_tainted;
6057 		f = PL_fold_locale[c];
6058 	    }
6059 	    else
6060 		f = PL_fold[c];
6061 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
6062 		match = TRUE;
6063 	}
6064 
6065 	if (!match && (flags & ANYOF_CLASS)) {
6066 	    PL_reg_flags |= RF_tainted;
6067 	    if (
6068 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
6069 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
6070 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
6071 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
6072 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
6073 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
6074 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
6075 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
6076 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
6077 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
6078 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
6079 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
6080 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
6081 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
6082 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
6083 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
6084 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
6085 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
6086 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
6087 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
6088 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
6089 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
6090 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
6091 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
6092 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
6093 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
6094 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
6095 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
6096 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
6097 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
6098 		) /* How's that for a conditional? */
6099 	    {
6100 		match = TRUE;
6101 	    }
6102 	}
6103     }
6104 
6105     return (flags & ANYOF_INVERT) ? !match : match;
6106 }
6107 
6108 STATIC U8 *
6109 S_reghop3(U8 *s, I32 off, const U8* lim)
6110 {
6111     dVAR;
6112 
6113     PERL_ARGS_ASSERT_REGHOP3;
6114 
6115     if (off >= 0) {
6116 	while (off-- && s < lim) {
6117 	    /* XXX could check well-formedness here */
6118 	    s += UTF8SKIP(s);
6119 	}
6120     }
6121     else {
6122         while (off++ && s > lim) {
6123             s--;
6124             if (UTF8_IS_CONTINUED(*s)) {
6125                 while (s > lim && UTF8_IS_CONTINUATION(*s))
6126                     s--;
6127 	    }
6128             /* XXX could check well-formedness here */
6129 	}
6130     }
6131     return s;
6132 }
6133 
6134 #ifdef XXX_dmq
6135 /* there are a bunch of places where we use two reghop3's that should
6136    be replaced with this routine. but since thats not done yet
6137    we ifdef it out - dmq
6138 */
6139 STATIC U8 *
6140 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
6141 {
6142     dVAR;
6143 
6144     PERL_ARGS_ASSERT_REGHOP4;
6145 
6146     if (off >= 0) {
6147         while (off-- && s < rlim) {
6148             /* XXX could check well-formedness here */
6149             s += UTF8SKIP(s);
6150         }
6151     }
6152     else {
6153         while (off++ && s > llim) {
6154             s--;
6155             if (UTF8_IS_CONTINUED(*s)) {
6156                 while (s > llim && UTF8_IS_CONTINUATION(*s))
6157                     s--;
6158             }
6159             /* XXX could check well-formedness here */
6160         }
6161     }
6162     return s;
6163 }
6164 #endif
6165 
6166 STATIC U8 *
6167 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
6168 {
6169     dVAR;
6170 
6171     PERL_ARGS_ASSERT_REGHOPMAYBE3;
6172 
6173     if (off >= 0) {
6174 	while (off-- && s < lim) {
6175 	    /* XXX could check well-formedness here */
6176 	    s += UTF8SKIP(s);
6177 	}
6178 	if (off >= 0)
6179 	    return NULL;
6180     }
6181     else {
6182         while (off++ && s > lim) {
6183             s--;
6184             if (UTF8_IS_CONTINUED(*s)) {
6185                 while (s > lim && UTF8_IS_CONTINUATION(*s))
6186                     s--;
6187 	    }
6188             /* XXX could check well-formedness here */
6189 	}
6190 	if (off <= 0)
6191 	    return NULL;
6192     }
6193     return s;
6194 }
6195 
6196 static void
6197 restore_pos(pTHX_ void *arg)
6198 {
6199     dVAR;
6200     regexp * const rex = (regexp *)arg;
6201     if (PL_reg_eval_set) {
6202 	if (PL_reg_oldsaved) {
6203 	    rex->subbeg = PL_reg_oldsaved;
6204 	    rex->sublen = PL_reg_oldsavedlen;
6205 #ifdef PERL_OLD_COPY_ON_WRITE
6206 	    rex->saved_copy = PL_nrs;
6207 #endif
6208 	    RXp_MATCH_COPIED_on(rex);
6209 	}
6210 	PL_reg_magic->mg_len = PL_reg_oldpos;
6211 	PL_reg_eval_set = 0;
6212 	PL_curpm = PL_reg_oldcurpm;
6213     }
6214 }
6215 
6216 STATIC void
6217 S_to_utf8_substr(pTHX_ register regexp *prog)
6218 {
6219     int i = 1;
6220 
6221     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
6222 
6223     do {
6224 	if (prog->substrs->data[i].substr
6225 	    && !prog->substrs->data[i].utf8_substr) {
6226 	    SV* const sv = newSVsv(prog->substrs->data[i].substr);
6227 	    prog->substrs->data[i].utf8_substr = sv;
6228 	    sv_utf8_upgrade(sv);
6229 	    if (SvVALID(prog->substrs->data[i].substr)) {
6230 		const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
6231 		if (flags & FBMcf_TAIL) {
6232 		    /* Trim the trailing \n that fbm_compile added last
6233 		       time.  */
6234 		    SvCUR_set(sv, SvCUR(sv) - 1);
6235 		    /* Whilst this makes the SV technically "invalid" (as its
6236 		       buffer is no longer followed by "\0") when fbm_compile()
6237 		       adds the "\n" back, a "\0" is restored.  */
6238 		}
6239 		fbm_compile(sv, flags);
6240 	    }
6241 	    if (prog->substrs->data[i].substr == prog->check_substr)
6242 		prog->check_utf8 = sv;
6243 	}
6244     } while (i--);
6245 }
6246 
6247 STATIC void
6248 S_to_byte_substr(pTHX_ register regexp *prog)
6249 {
6250     dVAR;
6251     int i = 1;
6252 
6253     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6254 
6255     do {
6256 	if (prog->substrs->data[i].utf8_substr
6257 	    && !prog->substrs->data[i].substr) {
6258 	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6259 	    if (sv_utf8_downgrade(sv, TRUE)) {
6260 		if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6261 		    const U8 flags
6262 			= BmFLAGS(prog->substrs->data[i].utf8_substr);
6263 		    if (flags & FBMcf_TAIL) {
6264 			/* Trim the trailing \n that fbm_compile added last
6265 			   time.  */
6266 			SvCUR_set(sv, SvCUR(sv) - 1);
6267 		    }
6268 		    fbm_compile(sv, flags);
6269 		}
6270 	    } else {
6271 		SvREFCNT_dec(sv);
6272 		sv = &PL_sv_undef;
6273 	    }
6274 	    prog->substrs->data[i].substr = sv;
6275 	    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6276 		prog->check_substr = sv;
6277 	}
6278     } while (i--);
6279 }
6280 
6281 /*
6282  * Local variables:
6283  * c-indentation-style: bsd
6284  * c-basic-offset: 4
6285  * indent-tabs-mode: t
6286  * End:
6287  *
6288  * ex: set ts=8 sts=4 sw=4 noet:
6289  */
6290