xref: /openbsd-src/gnu/usr.bin/perl/regexec.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*    regexec.c
2  */
3 
4 /*
5  * "One Ring to rule them all, One Ring to find them..."
6  */
7 
8 /* This file contains functions for executing a regular expression.  See
9  * also regcomp.c which funnily enough, contains functions for compiling
10  * a regular expression.
11  *
12  * This file is also copied at build time to ext/re/re_exec.c, where
13  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14  * This causes the main functions to be compiled under new names and with
15  * debugging support added, which makes "use re 'debug'" work.
16  */
17 
18 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
19  * confused with the original package (see point 3 below).  Thanks, Henry!
20  */
21 
22 /* Additional note: this code is very heavily munged from Henry's version
23  * in places.  In some spots I've traded clarity for efficiency, so don't
24  * blame Henry for some of the lack of readability.
25  */
26 
27 /* The names of the functions have been changed from regcomp and
28  * regexec to  pregcomp and pregexec in order to avoid conflicts
29  * with the POSIX routines of the same names.
30 */
31 
32 #ifdef PERL_EXT_RE_BUILD
33 #include "re_top.h"
34 #endif
35 
36 /*
37  * pregcomp and pregexec -- regsub and regerror are not used in perl
38  *
39  *	Copyright (c) 1986 by University of Toronto.
40  *	Written by Henry Spencer.  Not derived from licensed software.
41  *
42  *	Permission is granted to anyone to use this software for any
43  *	purpose on any computer system, and to redistribute it freely,
44  *	subject to the following restrictions:
45  *
46  *	1. The author is not responsible for the consequences of use of
47  *		this software, no matter how awful, even if they arise
48  *		from defects in it.
49  *
50  *	2. The origin of this software must not be misrepresented, either
51  *		by explicit claim or by omission.
52  *
53  *	3. Altered versions must be plainly marked as such, and must not
54  *		be misrepresented as being the original software.
55  *
56  ****    Alterations to Henry's code are...
57  ****
58  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
59  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 by Larry Wall and others
60  ****
61  ****    You may distribute under the terms of either the GNU General Public
62  ****    License or the Artistic License, as specified in the README file.
63  *
64  * Beware that some of this code is subtly aware of the way operator
65  * precedence is structured in regular expressions.  Serious changes in
66  * regular-expression syntax might require a total rethink.
67  */
68 #include "EXTERN.h"
69 #define PERL_IN_REGEXEC_C
70 #include "perl.h"
71 
72 #ifdef PERL_IN_XSUB_RE
73 #  include "re_comp.h"
74 #else
75 #  include "regcomp.h"
76 #endif
77 
78 #define RF_tainted	1		/* tainted information used? */
79 #define RF_warned	2		/* warned about big count? */
80 
81 #define RF_utf8		8		/* Pattern contains multibyte chars? */
82 
83 #define UTF ((PL_reg_flags & RF_utf8) != 0)
84 
85 #define RS_init		1		/* eval environment created */
86 #define RS_set		2		/* replsv value is set */
87 
88 #ifndef STATIC
89 #define	STATIC	static
90 #endif
91 
92 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
93 
94 /*
95  * Forwards.
96  */
97 
98 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
99 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
100 
101 #define HOPc(pos,off) \
102 	(char *)(PL_reg_match_utf8 \
103 	    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
104 	    : (U8*)(pos + off))
105 #define HOPBACKc(pos, off) \
106 	(char*)(PL_reg_match_utf8\
107 	    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
108 	    : (pos - off >= PL_bostr)		\
109 		? (U8*)pos - off		\
110 		: NULL)
111 
112 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
113 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
114 
115 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
116     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
117 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
118 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
119 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
120 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
121 
122 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
123 
124 /* for use after a quantifier and before an EXACT-like node -- japhy */
125 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
126 #define JUMPABLE(rn) (      \
127     OP(rn) == OPEN ||       \
128     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
129     OP(rn) == EVAL ||   \
130     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
131     OP(rn) == PLUS || OP(rn) == MINMOD || \
132     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
133     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
134 )
135 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
136 
137 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
138 
139 #if 0
140 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
141    we don't need this definition. */
142 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
143 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
144 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
145 
146 #else
147 /* ... so we use this as its faster. */
148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
151 
152 #endif
153 
154 /*
155   Search for mandatory following text node; for lookahead, the text must
156   follow but for lookbehind (rn->flags != 0) we skip to the next step.
157 */
158 #define FIND_NEXT_IMPT(rn) STMT_START { \
159     while (JUMPABLE(rn)) { \
160 	const OPCODE type = OP(rn); \
161 	if (type == SUSPEND || PL_regkind[type] == CURLY) \
162 	    rn = NEXTOPER(NEXTOPER(rn)); \
163 	else if (type == PLUS) \
164 	    rn = NEXTOPER(rn); \
165 	else if (type == IFMATCH) \
166 	    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
167 	else rn += NEXT_OFF(rn); \
168     } \
169 } STMT_END
170 
171 
172 static void restore_pos(pTHX_ void *arg);
173 
174 STATIC CHECKPOINT
175 S_regcppush(pTHX_ I32 parenfloor)
176 {
177     dVAR;
178     const int retval = PL_savestack_ix;
179 #define REGCP_PAREN_ELEMS 4
180     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
181     int p;
182     GET_RE_DEBUG_FLAGS_DECL;
183 
184     if (paren_elems_to_push < 0)
185 	Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
186 
187 #define REGCP_OTHER_ELEMS 7
188     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
189 
190     for (p = PL_regsize; p > parenfloor; p--) {
191 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
192 	SSPUSHINT(PL_regoffs[p].end);
193 	SSPUSHINT(PL_regoffs[p].start);
194 	SSPUSHPTR(PL_reg_start_tmp[p]);
195 	SSPUSHINT(p);
196 	DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
197 	  "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
198 		      (UV)p, (IV)PL_regoffs[p].start,
199 		      (IV)(PL_reg_start_tmp[p] - PL_bostr),
200 		      (IV)PL_regoffs[p].end
201 	));
202     }
203 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
204     SSPUSHPTR(PL_regoffs);
205     SSPUSHINT(PL_regsize);
206     SSPUSHINT(*PL_reglastparen);
207     SSPUSHINT(*PL_reglastcloseparen);
208     SSPUSHPTR(PL_reginput);
209 #define REGCP_FRAME_ELEMS 2
210 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
211  * are needed for the regexp context stack bookkeeping. */
212     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
213     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
214 
215     return retval;
216 }
217 
218 /* These are needed since we do not localize EVAL nodes: */
219 #define REGCP_SET(cp)                                           \
220     DEBUG_STATE_r(                                              \
221             PerlIO_printf(Perl_debug_log,		        \
222 	        "  Setting an EVAL scope, savestack=%"IVdf"\n",	\
223 	        (IV)PL_savestack_ix));                          \
224     cp = PL_savestack_ix
225 
226 #define REGCP_UNWIND(cp)                                        \
227     DEBUG_STATE_r(                                              \
228         if (cp != PL_savestack_ix) 		                \
229     	    PerlIO_printf(Perl_debug_log,		        \
230 		"  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
231 	        (IV)(cp), (IV)PL_savestack_ix));                \
232     regcpblow(cp)
233 
234 STATIC char *
235 S_regcppop(pTHX_ const regexp *rex)
236 {
237     dVAR;
238     U32 i;
239     char *input;
240 
241     GET_RE_DEBUG_FLAGS_DECL;
242 
243     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
244     i = SSPOPINT;
245     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
246     i = SSPOPINT; /* Parentheses elements to pop. */
247     input = (char *) SSPOPPTR;
248     *PL_reglastcloseparen = SSPOPINT;
249     *PL_reglastparen = SSPOPINT;
250     PL_regsize = SSPOPINT;
251     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
252 
253 
254     /* Now restore the parentheses context. */
255     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
256 	 i > 0; i -= REGCP_PAREN_ELEMS) {
257 	I32 tmps;
258 	U32 paren = (U32)SSPOPINT;
259 	PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
260 	PL_regoffs[paren].start = SSPOPINT;
261 	tmps = SSPOPINT;
262 	if (paren <= *PL_reglastparen)
263 	    PL_regoffs[paren].end = tmps;
264 	DEBUG_BUFFERS_r(
265 	    PerlIO_printf(Perl_debug_log,
266 			  "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
267 			  (UV)paren, (IV)PL_regoffs[paren].start,
268 			  (IV)(PL_reg_start_tmp[paren] - PL_bostr),
269 			  (IV)PL_regoffs[paren].end,
270 			  (paren > *PL_reglastparen ? "(no)" : ""));
271 	);
272     }
273     DEBUG_BUFFERS_r(
274 	if (*PL_reglastparen + 1 <= rex->nparens) {
275 	    PerlIO_printf(Perl_debug_log,
276 			  "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
277 			  (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
278 	}
279     );
280 #if 1
281     /* It would seem that the similar code in regtry()
282      * already takes care of this, and in fact it is in
283      * a better location to since this code can #if 0-ed out
284      * but the code in regtry() is needed or otherwise tests
285      * requiring null fields (pat.t#187 and split.t#{13,14}
286      * (as of patchlevel 7877)  will fail.  Then again,
287      * this code seems to be necessary or otherwise
288      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
289      * --jhi updated by dapm */
290     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
291 	if (i > PL_regsize)
292 	    PL_regoffs[i].start = -1;
293 	PL_regoffs[i].end = -1;
294     }
295 #endif
296     return input;
297 }
298 
299 #define regcpblow(cp) LEAVE_SCOPE(cp)	/* Ignores regcppush()ed data. */
300 
301 /*
302  * pregexec and friends
303  */
304 
305 #ifndef PERL_IN_XSUB_RE
306 /*
307  - pregexec - match a regexp against a string
308  */
309 I32
310 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
311 	 char *strbeg, I32 minend, SV *screamer, U32 nosave)
312 /* strend: pointer to null at end of string */
313 /* strbeg: real beginning of string */
314 /* minend: end of match must be >=minend after stringarg. */
315 /* nosave: For optimizations. */
316 {
317     return
318 	regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
319 		      nosave ? 0 : REXEC_COPY_STR);
320 }
321 #endif
322 
323 /*
324  * Need to implement the following flags for reg_anch:
325  *
326  * USE_INTUIT_NOML		- Useful to call re_intuit_start() first
327  * USE_INTUIT_ML
328  * INTUIT_AUTORITATIVE_NOML	- Can trust a positive answer
329  * INTUIT_AUTORITATIVE_ML
330  * INTUIT_ONCE_NOML		- Intuit can match in one location only.
331  * INTUIT_ONCE_ML
332  *
333  * Another flag for this function: SECOND_TIME (so that float substrs
334  * with giant delta may be not rechecked).
335  */
336 
337 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
338 
339 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
340    Otherwise, only SvCUR(sv) is used to get strbeg. */
341 
342 /* XXXX We assume that strpos is strbeg unless sv. */
343 
344 /* XXXX Some places assume that there is a fixed substring.
345 	An update may be needed if optimizer marks as "INTUITable"
346 	RExen without fixed substrings.  Similarly, it is assumed that
347 	lengths of all the strings are no more than minlen, thus they
348 	cannot come from lookahead.
349 	(Or minlen should take into account lookahead.)
350   NOTE: Some of this comment is not correct. minlen does now take account
351   of lookahead/behind. Further research is required. -- demerphq
352 
353 */
354 
355 /* A failure to find a constant substring means that there is no need to make
356    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
357    finding a substring too deep into the string means that less calls to
358    regtry() should be needed.
359 
360    REx compiler's optimizer found 4 possible hints:
361 	a) Anchored substring;
362 	b) Fixed substring;
363 	c) Whether we are anchored (beginning-of-line or \G);
364 	d) First node (of those at offset 0) which may distingush positions;
365    We use a)b)d) and multiline-part of c), and try to find a position in the
366    string which does not contradict any of them.
367  */
368 
369 /* Most of decisions we do here should have been done at compile time.
370    The nodes of the REx which we used for the search should have been
371    deleted from the finite automaton. */
372 
373 char *
374 Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
375 		     char *strend, const U32 flags, re_scream_pos_data *data)
376 {
377     dVAR;
378     register I32 start_shift = 0;
379     /* Should be nonnegative! */
380     register I32 end_shift   = 0;
381     register char *s;
382     register SV *check;
383     char *strbeg;
384     char *t;
385     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
386     I32 ml_anch;
387     register char *other_last = NULL;	/* other substr checked before this */
388     char *check_at = NULL;		/* check substr found at this pos */
389     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
390     RXi_GET_DECL(prog,progi);
391 #ifdef DEBUGGING
392     const char * const i_strpos = strpos;
393 #endif
394 
395     GET_RE_DEBUG_FLAGS_DECL;
396 
397     RX_MATCH_UTF8_set(prog,do_utf8);
398 
399     if (prog->extflags & RXf_UTF8) {
400 	PL_reg_flags |= RF_utf8;
401     }
402     DEBUG_EXECUTE_r(
403         debug_start_match(prog, do_utf8, strpos, strend,
404             sv ? "Guessing start of match in sv for"
405                : "Guessing start of match in string for");
406 	      );
407 
408     /* CHR_DIST() would be more correct here but it makes things slow. */
409     if (prog->minlen > strend - strpos) {
410 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
411 			      "String too short... [re_intuit_start]\n"));
412 	goto fail;
413     }
414 
415     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
416     PL_regeol = strend;
417     if (do_utf8) {
418 	if (!prog->check_utf8 && prog->check_substr)
419 	    to_utf8_substr(prog);
420 	check = prog->check_utf8;
421     } else {
422 	if (!prog->check_substr && prog->check_utf8)
423 	    to_byte_substr(prog);
424 	check = prog->check_substr;
425     }
426     if (check == &PL_sv_undef) {
427 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
428 		"Non-utf8 string cannot match utf8 check string\n"));
429 	goto fail;
430     }
431     if (prog->extflags & RXf_ANCH) {	/* Match at beg-of-str or after \n */
432 	ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
433 		     || ( (prog->extflags & RXf_ANCH_BOL)
434 			  && !multiline ) );	/* Check after \n? */
435 
436 	if (!ml_anch) {
437 	  if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
438 		&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
439 	       /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
440 	       && sv && !SvROK(sv)
441 	       && (strpos != strbeg)) {
442 	      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
443 	      goto fail;
444 	  }
445 	  if (prog->check_offset_min == prog->check_offset_max &&
446 	      !(prog->extflags & RXf_CANY_SEEN)) {
447 	    /* Substring at constant offset from beg-of-str... */
448 	    I32 slen;
449 
450 	    s = HOP3c(strpos, prog->check_offset_min, strend);
451 
452 	    if (SvTAIL(check)) {
453 		slen = SvCUR(check);	/* >= 1 */
454 
455 		if ( strend - s > slen || strend - s < slen - 1
456 		     || (strend - s == slen && strend[-1] != '\n')) {
457 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
458 		    goto fail_finish;
459 		}
460 		/* Now should match s[0..slen-2] */
461 		slen--;
462 		if (slen && (*SvPVX_const(check) != *s
463 			     || (slen > 1
464 				 && memNE(SvPVX_const(check), s, slen)))) {
465 		  report_neq:
466 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
467 		    goto fail_finish;
468 		}
469 	    }
470 	    else if (*SvPVX_const(check) != *s
471 		     || ((slen = SvCUR(check)) > 1
472 			 && memNE(SvPVX_const(check), s, slen)))
473 		goto report_neq;
474 	    check_at = s;
475 	    goto success_at_start;
476 	  }
477 	}
478 	/* Match is anchored, but substr is not anchored wrt beg-of-str. */
479 	s = strpos;
480 	start_shift = prog->check_offset_min; /* okay to underestimate on CC */
481 	end_shift = prog->check_end_shift;
482 
483 	if (!ml_anch) {
484 	    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 					 - (SvTAIL(check) != 0);
486 	    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
487 
488 	    if (end_shift < eshift)
489 		end_shift = eshift;
490 	}
491     }
492     else {				/* Can match at random position */
493 	ml_anch = 0;
494 	s = strpos;
495 	start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
496 	end_shift = prog->check_end_shift;
497 
498 	/* end shift should be non negative here */
499     }
500 
501 #ifdef QDEBUGGING	/* 7/99: reports of failure (with the older version) */
502     if (end_shift < 0)
503 	Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
504 		   (IV)end_shift, prog->precomp);
505 #endif
506 
507   restart:
508     /* Find a possible match in the region s..strend by looking for
509        the "check" substring in the region corrected by start/end_shift. */
510 
511     {
512         I32 srch_start_shift = start_shift;
513         I32 srch_end_shift = end_shift;
514         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
515 	    srch_end_shift -= ((strbeg - s) - srch_start_shift);
516 	    srch_start_shift = strbeg - s;
517 	}
518     DEBUG_OPTIMISE_MORE_r({
519         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
520             (IV)prog->check_offset_min,
521             (IV)srch_start_shift,
522             (IV)srch_end_shift,
523             (IV)prog->check_end_shift);
524     });
525 
526     if (flags & REXEC_SCREAM) {
527 	I32 p = -1;			/* Internal iterator of scream. */
528 	I32 * const pp = data ? data->scream_pos : &p;
529 
530 	if (PL_screamfirst[BmRARE(check)] >= 0
531 	    || ( BmRARE(check) == '\n'
532 		 && (BmPREVIOUS(check) == SvCUR(check) - 1)
533 		 && SvTAIL(check) ))
534 	    s = screaminstr(sv, check,
535 			    srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
536 	else
537 	    goto fail_finish;
538 	/* we may be pointing at the wrong string */
539 	if (s && RX_MATCH_COPIED(prog))
540 	    s = strbeg + (s - SvPVX_const(sv));
541 	if (data)
542 	    *data->scream_olds = s;
543     }
544     else {
545         U8* start_point;
546         U8* end_point;
547         if (prog->extflags & RXf_CANY_SEEN) {
548             start_point= (U8*)(s + srch_start_shift);
549             end_point= (U8*)(strend - srch_end_shift);
550         } else {
551 	    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
552             end_point= HOP3(strend, -srch_end_shift, strbeg);
553 	}
554 	DEBUG_OPTIMISE_MORE_r({
555             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
556                 (int)(end_point - start_point),
557                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
558                 start_point);
559         });
560 
561 	s = fbm_instr( start_point, end_point,
562 		      check, multiline ? FBMrf_MULTILINE : 0);
563     }
564     }
565     /* Update the count-of-usability, remove useless subpatterns,
566 	unshift s.  */
567 
568     DEBUG_EXECUTE_r({
569         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
570             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
571         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
572 			  (s ? "Found" : "Did not find"),
573 	    (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
574 	        ? "anchored" : "floating"),
575 	    quoted,
576 	    RE_SV_TAIL(check),
577 	    (s ? " at offset " : "...\n") );
578     });
579 
580     if (!s)
581 	goto fail_finish;
582     /* Finish the diagnostic message */
583     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
584 
585     /* XXX dmq: first branch is for positive lookbehind...
586        Our check string is offset from the beginning of the pattern.
587        So we need to do any stclass tests offset forward from that
588        point. I think. :-(
589      */
590 
591 
592 
593     check_at=s;
594 
595 
596     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
597        Start with the other substr.
598        XXXX no SCREAM optimization yet - and a very coarse implementation
599        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
600 		*always* match.  Probably should be marked during compile...
601        Probably it is right to do no SCREAM here...
602      */
603 
604     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
605                 : (prog->float_substr && prog->anchored_substr))
606     {
607 	/* Take into account the "other" substring. */
608 	/* XXXX May be hopelessly wrong for UTF... */
609 	if (!other_last)
610 	    other_last = strpos;
611 	if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
612 	  do_other_anchored:
613 	    {
614 		char * const last = HOP3c(s, -start_shift, strbeg);
615 		char *last1, *last2;
616 		char * const saved_s = s;
617 		SV* must;
618 
619 		t = s - prog->check_offset_max;
620 		if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
621 		    && (!do_utf8
622 			|| ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
623 			    && t > strpos)))
624 		    NOOP;
625 		else
626 		    t = strpos;
627 		t = HOP3c(t, prog->anchored_offset, strend);
628 		if (t < other_last)	/* These positions already checked */
629 		    t = other_last;
630 		last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
631 		if (last < last1)
632 		    last1 = last;
633                 /* XXXX It is not documented what units *_offsets are in.
634                    We assume bytes, but this is clearly wrong.
635                    Meaning this code needs to be carefully reviewed for errors.
636                    dmq.
637                   */
638 
639 		/* On end-of-str: see comment below. */
640 		must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
641 		if (must == &PL_sv_undef) {
642 		    s = (char*)NULL;
643 		    DEBUG_r(must = prog->anchored_utf8);	/* for debug */
644 		}
645 		else
646 		    s = fbm_instr(
647 			(unsigned char*)t,
648 			HOP3(HOP3(last1, prog->anchored_offset, strend)
649 				+ SvCUR(must), -(SvTAIL(must)!=0), strbeg),
650 			must,
651 			multiline ? FBMrf_MULTILINE : 0
652 		    );
653                 DEBUG_EXECUTE_r({
654                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
655                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
656                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
657 			(s ? "Found" : "Contradicts"),
658                         quoted, RE_SV_TAIL(must));
659                 });
660 
661 
662 		if (!s) {
663 		    if (last1 >= last2) {
664 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
665 						", giving up...\n"));
666 			goto fail_finish;
667 		    }
668 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
669 			", trying floating at offset %ld...\n",
670 			(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
671 		    other_last = HOP3c(last1, prog->anchored_offset+1, strend);
672 		    s = HOP3c(last, 1, strend);
673 		    goto restart;
674 		}
675 		else {
676 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
677 			  (long)(s - i_strpos)));
678 		    t = HOP3c(s, -prog->anchored_offset, strbeg);
679 		    other_last = HOP3c(s, 1, strend);
680 		    s = saved_s;
681 		    if (t == strpos)
682 			goto try_at_start;
683 		    goto try_at_offset;
684 		}
685 	    }
686 	}
687 	else {		/* Take into account the floating substring. */
688 	    char *last, *last1;
689 	    char * const saved_s = s;
690 	    SV* must;
691 
692 	    t = HOP3c(s, -start_shift, strbeg);
693 	    last1 = last =
694 		HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
695 	    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
696 		last = HOP3c(t, prog->float_max_offset, strend);
697 	    s = HOP3c(t, prog->float_min_offset, strend);
698 	    if (s < other_last)
699 		s = other_last;
700  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
701 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
702 	    /* fbm_instr() takes into account exact value of end-of-str
703 	       if the check is SvTAIL(ed).  Since false positives are OK,
704 	       and end-of-str is not later than strend we are OK. */
705 	    if (must == &PL_sv_undef) {
706 		s = (char*)NULL;
707 		DEBUG_r(must = prog->float_utf8);	/* for debug message */
708 	    }
709 	    else
710 		s = fbm_instr((unsigned char*)s,
711 			      (unsigned char*)last + SvCUR(must)
712 				  - (SvTAIL(must)!=0),
713 			      must, multiline ? FBMrf_MULTILINE : 0);
714 	    DEBUG_EXECUTE_r({
715 	        RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
716 	            SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
717 	        PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
718 		    (s ? "Found" : "Contradicts"),
719 		    quoted, RE_SV_TAIL(must));
720             });
721 	    if (!s) {
722 		if (last1 == last) {
723 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
724 					    ", giving up...\n"));
725 		    goto fail_finish;
726 		}
727 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
728 		    ", trying anchored starting at offset %ld...\n",
729 		    (long)(saved_s + 1 - i_strpos)));
730 		other_last = last;
731 		s = HOP3c(t, 1, strend);
732 		goto restart;
733 	    }
734 	    else {
735 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
736 		      (long)(s - i_strpos)));
737 		other_last = s; /* Fix this later. --Hugo */
738 		s = saved_s;
739 		if (t == strpos)
740 		    goto try_at_start;
741 		goto try_at_offset;
742 	    }
743 	}
744     }
745 
746 
747     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
748 
749     DEBUG_OPTIMISE_MORE_r(
750         PerlIO_printf(Perl_debug_log,
751             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
752             (IV)prog->check_offset_min,
753             (IV)prog->check_offset_max,
754             (IV)(s-strpos),
755             (IV)(t-strpos),
756             (IV)(t-s),
757             (IV)(strend-strpos)
758         )
759     );
760 
761     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
762         && (!do_utf8
763 	    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
764 		 && t > strpos)))
765     {
766 	/* Fixed substring is found far enough so that the match
767 	   cannot start at strpos. */
768       try_at_offset:
769 	if (ml_anch && t[-1] != '\n') {
770 	    /* Eventually fbm_*() should handle this, but often
771 	       anchored_offset is not 0, so this check will not be wasted. */
772 	    /* XXXX In the code below we prefer to look for "^" even in
773 	       presence of anchored substrings.  And we search even
774 	       beyond the found float position.  These pessimizations
775 	       are historical artefacts only.  */
776 	  find_anchor:
777 	    while (t < strend - prog->minlen) {
778 		if (*t == '\n') {
779 		    if (t < check_at - prog->check_offset_min) {
780 			if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
781 			    /* Since we moved from the found position,
782 			       we definitely contradict the found anchored
783 			       substr.  Due to the above check we do not
784 			       contradict "check" substr.
785 			       Thus we can arrive here only if check substr
786 			       is float.  Redo checking for "other"=="fixed".
787 			     */
788 			    strpos = t + 1;
789 			    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
790 				PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
791 			    goto do_other_anchored;
792 			}
793 			/* We don't contradict the found floating substring. */
794 			/* XXXX Why not check for STCLASS? */
795 			s = t + 1;
796 			DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
797 			    PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
798 			goto set_useful;
799 		    }
800 		    /* Position contradicts check-string */
801 		    /* XXXX probably better to look for check-string
802 		       than for "\n", so one should lower the limit for t? */
803 		    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
804 			PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
805 		    other_last = strpos = s = t + 1;
806 		    goto restart;
807 		}
808 		t++;
809 	    }
810 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
811 			PL_colors[0], PL_colors[1]));
812 	    goto fail_finish;
813 	}
814 	else {
815 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
816 			PL_colors[0], PL_colors[1]));
817 	}
818 	s = t;
819       set_useful:
820 	++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);	/* hooray/5 */
821     }
822     else {
823 	/* The found string does not prohibit matching at strpos,
824 	   - no optimization of calling REx engine can be performed,
825 	   unless it was an MBOL and we are not after MBOL,
826 	   or a future STCLASS check will fail this. */
827       try_at_start:
828 	/* Even in this situation we may use MBOL flag if strpos is offset
829 	   wrt the start of the string. */
830 	if (ml_anch && sv && !SvROK(sv)	/* See prev comment on SvROK */
831 	    && (strpos != strbeg) && strpos[-1] != '\n'
832 	    /* May be due to an implicit anchor of m{.*foo}  */
833 	    && !(prog->intflags & PREGf_IMPLICIT))
834 	{
835 	    t = strpos;
836 	    goto find_anchor;
837 	}
838 	DEBUG_EXECUTE_r( if (ml_anch)
839 	    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
840 			  (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
841 	);
842       success_at_start:
843 	if (!(prog->intflags & PREGf_NAUGHTY)	/* XXXX If strpos moved? */
844 	    && (do_utf8 ? (
845 		prog->check_utf8		/* Could be deleted already */
846 		&& --BmUSEFUL(prog->check_utf8) < 0
847 		&& (prog->check_utf8 == prog->float_utf8)
848 	    ) : (
849 		prog->check_substr		/* Could be deleted already */
850 		&& --BmUSEFUL(prog->check_substr) < 0
851 		&& (prog->check_substr == prog->float_substr)
852 	    )))
853 	{
854 	    /* If flags & SOMETHING - do not do it many times on the same match */
855 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
856 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
857 	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
858 		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
859 	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
860 	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
861 	    check = NULL;			/* abort */
862 	    s = strpos;
863 	    /* XXXX This is a remnant of the old implementation.  It
864 	            looks wasteful, since now INTUIT can use many
865 	            other heuristics. */
866 	    prog->extflags &= ~RXf_USE_INTUIT;
867 	}
868 	else
869 	    s = strpos;
870     }
871 
872     /* Last resort... */
873     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
874     /* trie stclasses are too expensive to use here, we are better off to
875        leave it to regmatch itself */
876     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
877 	/* minlen == 0 is possible if regstclass is \b or \B,
878 	   and the fixed substr is ''$.
879 	   Since minlen is already taken into account, s+1 is before strend;
880 	   accidentally, minlen >= 1 guaranties no false positives at s + 1
881 	   even for \b or \B.  But (minlen? 1 : 0) below assumes that
882 	   regstclass does not come from lookahead...  */
883 	/* If regstclass takes bytelength more than 1: If charlength==1, OK.
884 	   This leaves EXACTF only, which is dealt with in find_byclass().  */
885         const U8* const str = (U8*)STRING(progi->regstclass);
886         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
887 		    ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
888 		    : 1);
889 	char * endpos;
890 	if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
891             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
892         else if (prog->float_substr || prog->float_utf8)
893 	    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
894         else
895             endpos= strend;
896 
897         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
898 				      (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
899 
900 	t = s;
901         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
902 	if (!s) {
903 #ifdef DEBUGGING
904 	    const char *what = NULL;
905 #endif
906 	    if (endpos == strend) {
907 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
908 				"Could not match STCLASS...\n") );
909 		goto fail;
910 	    }
911 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
912 				   "This position contradicts STCLASS...\n") );
913 	    if ((prog->extflags & RXf_ANCH) && !ml_anch)
914 		goto fail;
915 	    /* Contradict one of substrings */
916 	    if (prog->anchored_substr || prog->anchored_utf8) {
917 		if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
918 		    DEBUG_EXECUTE_r( what = "anchored" );
919 		  hop_and_restart:
920 		    s = HOP3c(t, 1, strend);
921 		    if (s + start_shift + end_shift > strend) {
922 			/* XXXX Should be taken into account earlier? */
923 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
924 					       "Could not match STCLASS...\n") );
925 			goto fail;
926 		    }
927 		    if (!check)
928 			goto giveup;
929 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
930 				"Looking for %s substr starting at offset %ld...\n",
931 				 what, (long)(s + start_shift - i_strpos)) );
932 		    goto restart;
933 		}
934 		/* Have both, check_string is floating */
935 		if (t + start_shift >= check_at) /* Contradicts floating=check */
936 		    goto retry_floating_check;
937 		/* Recheck anchored substring, but not floating... */
938 		s = check_at;
939 		if (!check)
940 		    goto giveup;
941 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
942 			  "Looking for anchored substr starting at offset %ld...\n",
943 			  (long)(other_last - i_strpos)) );
944 		goto do_other_anchored;
945 	    }
946 	    /* Another way we could have checked stclass at the
947                current position only: */
948 	    if (ml_anch) {
949 		s = t = t + 1;
950 		if (!check)
951 		    goto giveup;
952 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
953 			  "Looking for /%s^%s/m starting at offset %ld...\n",
954 			  PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
955 		goto try_at_offset;
956 	    }
957 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))	/* Could have been deleted */
958 		goto fail;
959 	    /* Check is floating subtring. */
960 	  retry_floating_check:
961 	    t = check_at - start_shift;
962 	    DEBUG_EXECUTE_r( what = "floating" );
963 	    goto hop_and_restart;
964 	}
965 	if (t != s) {
966             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
967 			"By STCLASS: moving %ld --> %ld\n",
968                                   (long)(t - i_strpos), (long)(s - i_strpos))
969                    );
970         }
971         else {
972             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
973                                   "Does not contradict STCLASS...\n");
974                    );
975         }
976     }
977   giveup:
978     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
979 			  PL_colors[4], (check ? "Guessed" : "Giving up"),
980 			  PL_colors[5], (long)(s - i_strpos)) );
981     return s;
982 
983   fail_finish:				/* Substring not found */
984     if (prog->check_substr || prog->check_utf8)		/* could be removed already */
985 	BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
986   fail:
987     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
988 			  PL_colors[4], PL_colors[5]));
989     return NULL;
990 }
991 
992 #define DECL_TRIE_TYPE(scan) \
993     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
994 		    trie_type = (scan->flags != EXACT) \
995 		              ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
996                               : (do_utf8 ? trie_utf8 : trie_plain)
997 
998 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
999 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1000     switch (trie_type) {                                                    \
1001     case trie_utf8_fold:                                                    \
1002 	if ( foldlen>0 ) {                                                  \
1003 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1004 	    foldlen -= len;                                                 \
1005 	    uscan += len;                                                   \
1006 	    len=0;                                                          \
1007 	} else {                                                            \
1008 	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
1009 	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1010 	    foldlen -= UNISKIP( uvc );                                      \
1011 	    uscan = foldbuf + UNISKIP( uvc );                               \
1012 	}                                                                   \
1013 	break;                                                              \
1014     case trie_latin_utf8_fold:                                              \
1015 	if ( foldlen>0 ) {                                                  \
1016 	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1017 	    foldlen -= len;                                                 \
1018 	    uscan += len;                                                   \
1019 	    len=0;                                                          \
1020 	} else {                                                            \
1021 	    len = 1;                                                        \
1022 	    uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1023 	    foldlen -= UNISKIP( uvc );                                      \
1024 	    uscan = foldbuf + UNISKIP( uvc );                               \
1025 	}                                                                   \
1026 	break;                                                              \
1027     case trie_utf8:                                                         \
1028 	uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1029 	break;                                                              \
1030     case trie_plain:                                                        \
1031 	uvc = (UV)*uc;                                                      \
1032 	len = 1;                                                            \
1033     }                                                                       \
1034 									    \
1035     if (uvc < 256) {                                                        \
1036 	charid = trie->charmap[ uvc ];                                      \
1037     }                                                                       \
1038     else {                                                                  \
1039 	charid = 0;                                                         \
1040 	if (widecharmap) {                                                  \
1041 	    SV** const svpp = hv_fetch(widecharmap,                         \
1042 			(char*)&uvc, sizeof(UV), 0);                        \
1043 	    if (svpp)                                                       \
1044 		charid = (U16)SvIV(*svpp);                                  \
1045 	}                                                                   \
1046     }                                                                       \
1047 } STMT_END
1048 
1049 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1050 {                                                      \
1051     char *my_strend= (char *)strend;                   \
1052     if ( (CoNd)                                        \
1053 	 && (ln == len ||                              \
1054 	     !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1055 			m, NULL, ln, (bool)UTF))       \
1056 	 && (!reginfo || regtry(reginfo, &s)) )        \
1057 	goto got_it;                                   \
1058     else {                                             \
1059 	 U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1060 	 uvchr_to_utf8(tmpbuf, c);                     \
1061 	 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1062 	 if ( f != c                                   \
1063 	      && (f == c1 || f == c2)                  \
1064 	      && (ln == len ||                         \
1065 	        !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1066 			      m, NULL, ln, (bool)UTF)) \
1067 	      && (!reginfo || regtry(reginfo, &s)) )   \
1068 	      goto got_it;                             \
1069     }                                                  \
1070 }                                                      \
1071 s += len
1072 
1073 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1074 STMT_START {                                              \
1075     while (s <= e) {                                      \
1076 	if ( (CoNd)                                       \
1077 	     && (ln == 1 || !(OP(c) == EXACTF             \
1078 			      ? ibcmp(s, m, ln)           \
1079 			      : ibcmp_locale(s, m, ln)))  \
1080 	     && (!reginfo || regtry(reginfo, &s)) )        \
1081 	    goto got_it;                                  \
1082 	s++;                                              \
1083     }                                                     \
1084 } STMT_END
1085 
1086 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1087 STMT_START {                                          \
1088     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1089 	CoDe                                          \
1090 	s += uskip;                                   \
1091     }                                                 \
1092 } STMT_END
1093 
1094 #define REXEC_FBC_SCAN(CoDe)                          \
1095 STMT_START {                                          \
1096     while (s < strend) {                              \
1097 	CoDe                                          \
1098 	s++;                                          \
1099     }                                                 \
1100 } STMT_END
1101 
1102 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1103 REXEC_FBC_UTF8_SCAN(                                  \
1104     if (CoNd) {                                       \
1105 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1106 	    goto got_it;                              \
1107 	else                                          \
1108 	    tmp = doevery;                            \
1109     }                                                 \
1110     else                                              \
1111 	tmp = 1;                                      \
1112 )
1113 
1114 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1115 REXEC_FBC_SCAN(                                       \
1116     if (CoNd) {                                       \
1117 	if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1118 	    goto got_it;                              \
1119 	else                                          \
1120 	    tmp = doevery;                            \
1121     }                                                 \
1122     else                                              \
1123 	tmp = 1;                                      \
1124 )
1125 
1126 #define REXEC_FBC_TRYIT               \
1127 if ((!reginfo || regtry(reginfo, &s))) \
1128     goto got_it
1129 
1130 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1131     if (do_utf8) {                                             \
1132 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1133     }                                                          \
1134     else {                                                     \
1135 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1136     }                                                          \
1137     break
1138 
1139 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1140     if (do_utf8) {                                             \
1141 	UtFpReLoAd;                                            \
1142 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1143     }                                                          \
1144     else {                                                     \
1145 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1146     }                                                          \
1147     break
1148 
1149 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1150     PL_reg_flags |= RF_tainted;                                \
1151     if (do_utf8) {                                             \
1152 	REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1153     }                                                          \
1154     else {                                                     \
1155 	REXEC_FBC_CLASS_SCAN(CoNd);                            \
1156     }                                                          \
1157     break
1158 
1159 #define DUMP_EXEC_POS(li,s,doutf8) \
1160     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1161 
1162 /* We know what class REx starts with.  Try to find this position... */
1163 /* if reginfo is NULL, its a dryrun */
1164 /* annoyingly all the vars in this routine have different names from their counterparts
1165    in regmatch. /grrr */
1166 
1167 STATIC char *
1168 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1169     const char *strend, regmatch_info *reginfo)
1170 {
1171 	dVAR;
1172 	const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1173 	char *m;
1174 	STRLEN ln;
1175 	STRLEN lnc;
1176 	register STRLEN uskip;
1177 	unsigned int c1;
1178 	unsigned int c2;
1179 	char *e;
1180 	register I32 tmp = 1;	/* Scratch variable? */
1181 	register const bool do_utf8 = PL_reg_match_utf8;
1182         RXi_GET_DECL(prog,progi);
1183 
1184 	/* We know what class it must start with. */
1185 	switch (OP(c)) {
1186 	case ANYOF:
1187 	    if (do_utf8) {
1188 		 REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1189 			  !UTF8_IS_INVARIANT((U8)s[0]) ?
1190 			  reginclass(prog, c, (U8*)s, 0, do_utf8) :
1191 			  REGINCLASS(prog, c, (U8*)s));
1192 	    }
1193 	    else {
1194 		 while (s < strend) {
1195 		      STRLEN skip = 1;
1196 
1197 		      if (REGINCLASS(prog, c, (U8*)s) ||
1198 			  (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1199 			   /* The assignment of 2 is intentional:
1200 			    * for the folded sharp s, the skip is 2. */
1201 			   (skip = SHARP_S_SKIP))) {
1202 			   if (tmp && (!reginfo || regtry(reginfo, &s)))
1203 				goto got_it;
1204 			   else
1205 				tmp = doevery;
1206 		      }
1207 		      else
1208 			   tmp = 1;
1209 		      s += skip;
1210 		 }
1211 	    }
1212 	    break;
1213 	case CANY:
1214 	    REXEC_FBC_SCAN(
1215 	        if (tmp && (!reginfo || regtry(reginfo, &s)))
1216 		    goto got_it;
1217 		else
1218 		    tmp = doevery;
1219 	    );
1220 	    break;
1221 	case EXACTF:
1222 	    m   = STRING(c);
1223 	    ln  = STR_LEN(c);	/* length to match in octets/bytes */
1224 	    lnc = (I32) ln;	/* length to match in characters */
1225 	    if (UTF) {
1226 	        STRLEN ulen1, ulen2;
1227 		U8 *sm = (U8 *) m;
1228 		U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1229 		U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1230 		/* used by commented-out code below */
1231 		/*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1232 
1233                 /* XXX: Since the node will be case folded at compile
1234                    time this logic is a little odd, although im not
1235                    sure that its actually wrong. --dmq */
1236 
1237 		c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1238 		c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1239 
1240 		/* XXX: This is kinda strange. to_utf8_XYZ returns the
1241                    codepoint of the first character in the converted
1242                    form, yet originally we did the extra step.
1243                    No tests fail by commenting this code out however
1244                    so Ive left it out. -- dmq.
1245 
1246 		c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1247 				    0, uniflags);
1248 		c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1249 				    0, uniflags);
1250                 */
1251 
1252 		lnc = 0;
1253 		while (sm < ((U8 *) m + ln)) {
1254 		    lnc++;
1255 		    sm += UTF8SKIP(sm);
1256 		}
1257 	    }
1258 	    else {
1259 		c1 = *(U8*)m;
1260 		c2 = PL_fold[c1];
1261 	    }
1262 	    goto do_exactf;
1263 	case EXACTFL:
1264 	    m   = STRING(c);
1265 	    ln  = STR_LEN(c);
1266 	    lnc = (I32) ln;
1267 	    c1 = *(U8*)m;
1268 	    c2 = PL_fold_locale[c1];
1269 	  do_exactf:
1270 	    e = HOP3c(strend, -((I32)lnc), s);
1271 
1272 	    if (!reginfo && e < s)
1273 		e = s;			/* Due to minlen logic of intuit() */
1274 
1275 	    /* The idea in the EXACTF* cases is to first find the
1276 	     * first character of the EXACTF* node and then, if
1277 	     * necessary, case-insensitively compare the full
1278 	     * text of the node.  The c1 and c2 are the first
1279 	     * characters (though in Unicode it gets a bit
1280 	     * more complicated because there are more cases
1281 	     * than just upper and lower: one needs to use
1282 	     * the so-called folding case for case-insensitive
1283 	     * matching (called "loose matching" in Unicode).
1284 	     * ibcmp_utf8() will do just that. */
1285 
1286 	    if (do_utf8 || UTF) {
1287 	        UV c, f;
1288 	        U8 tmpbuf [UTF8_MAXBYTES+1];
1289 		STRLEN len = 1;
1290 		STRLEN foldlen;
1291 		const U32 uniflags = UTF8_ALLOW_DEFAULT;
1292 		if (c1 == c2) {
1293 		    /* Upper and lower of 1st char are equal -
1294 		     * probably not a "letter". */
1295 		    while (s <= e) {
1296 		        if (do_utf8) {
1297 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1298 					   uniflags);
1299                         } else {
1300                             c = *((U8*)s);
1301                         }
1302 			REXEC_FBC_EXACTISH_CHECK(c == c1);
1303 		    }
1304 		}
1305 		else {
1306 		    while (s <= e) {
1307 		        if (do_utf8) {
1308 		            c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1309 					   uniflags);
1310                         } else {
1311                             c = *((U8*)s);
1312                         }
1313 
1314 			/* Handle some of the three Greek sigmas cases.
1315 			 * Note that not all the possible combinations
1316 			 * are handled here: some of them are handled
1317 			 * by the standard folding rules, and some of
1318 			 * them (the character class or ANYOF cases)
1319 			 * are handled during compiletime in
1320 			 * regexec.c:S_regclass(). */
1321 			if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1322 			    c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1323 			    c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1324 
1325 			REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1326 		    }
1327 		}
1328 	    }
1329 	    else {
1330 	        /* Neither pattern nor string are UTF8 */
1331 		if (c1 == c2)
1332 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1333 		else
1334 		    REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1335 	    }
1336 	    break;
1337 	case BOUNDL:
1338 	    PL_reg_flags |= RF_tainted;
1339 	    /* FALL THROUGH */
1340 	case BOUND:
1341 	    if (do_utf8) {
1342 		if (s == PL_bostr)
1343 		    tmp = '\n';
1344 		else {
1345 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1346 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1347 		}
1348 		tmp = ((OP(c) == BOUND ?
1349 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1350 		LOAD_UTF8_CHARCLASS_ALNUM();
1351 		REXEC_FBC_UTF8_SCAN(
1352 		    if (tmp == !(OP(c) == BOUND ?
1353 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1354 				 isALNUM_LC_utf8((U8*)s)))
1355 		    {
1356 			tmp = !tmp;
1357 			REXEC_FBC_TRYIT;
1358 		}
1359 		);
1360 	    }
1361 	    else {
1362 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1363 		tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1364 		REXEC_FBC_SCAN(
1365 		    if (tmp ==
1366 			!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1367 			tmp = !tmp;
1368 			REXEC_FBC_TRYIT;
1369 		}
1370 		);
1371 	    }
1372 	    if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1373 		goto got_it;
1374 	    break;
1375 	case NBOUNDL:
1376 	    PL_reg_flags |= RF_tainted;
1377 	    /* FALL THROUGH */
1378 	case NBOUND:
1379 	    if (do_utf8) {
1380 		if (s == PL_bostr)
1381 		    tmp = '\n';
1382 		else {
1383 		    U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1384 		    tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1385 		}
1386 		tmp = ((OP(c) == NBOUND ?
1387 			isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1388 		LOAD_UTF8_CHARCLASS_ALNUM();
1389 		REXEC_FBC_UTF8_SCAN(
1390 		    if (tmp == !(OP(c) == NBOUND ?
1391 				 (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1392 				 isALNUM_LC_utf8((U8*)s)))
1393 			tmp = !tmp;
1394 		    else REXEC_FBC_TRYIT;
1395 		);
1396 	    }
1397 	    else {
1398 		tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1399 		tmp = ((OP(c) == NBOUND ?
1400 			isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1401 		REXEC_FBC_SCAN(
1402 		    if (tmp ==
1403 			!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1404 			tmp = !tmp;
1405 		    else REXEC_FBC_TRYIT;
1406 		);
1407 	    }
1408 	    if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1409 		goto got_it;
1410 	    break;
1411 	case ALNUM:
1412 	    REXEC_FBC_CSCAN_PRELOAD(
1413 		LOAD_UTF8_CHARCLASS_ALNUM(),
1414 		swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1415 		isALNUM(*s)
1416 	    );
1417 	case ALNUML:
1418 	    REXEC_FBC_CSCAN_TAINT(
1419 		isALNUM_LC_utf8((U8*)s),
1420 		isALNUM_LC(*s)
1421 	    );
1422 	case NALNUM:
1423 	    REXEC_FBC_CSCAN_PRELOAD(
1424 		LOAD_UTF8_CHARCLASS_ALNUM(),
1425 		!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1426 		!isALNUM(*s)
1427 	    );
1428 	case NALNUML:
1429 	    REXEC_FBC_CSCAN_TAINT(
1430 		!isALNUM_LC_utf8((U8*)s),
1431 		!isALNUM_LC(*s)
1432 	    );
1433 	case SPACE:
1434 	    REXEC_FBC_CSCAN_PRELOAD(
1435 		LOAD_UTF8_CHARCLASS_SPACE(),
1436 		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1437 		isSPACE(*s)
1438 	    );
1439 	case SPACEL:
1440 	    REXEC_FBC_CSCAN_TAINT(
1441 		*s == ' ' || isSPACE_LC_utf8((U8*)s),
1442 		isSPACE_LC(*s)
1443 	    );
1444 	case NSPACE:
1445 	    REXEC_FBC_CSCAN_PRELOAD(
1446 		LOAD_UTF8_CHARCLASS_SPACE(),
1447 		!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1448 		!isSPACE(*s)
1449 	    );
1450 	case NSPACEL:
1451 	    REXEC_FBC_CSCAN_TAINT(
1452 		!(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1453 		!isSPACE_LC(*s)
1454 	    );
1455 	case DIGIT:
1456 	    REXEC_FBC_CSCAN_PRELOAD(
1457 		LOAD_UTF8_CHARCLASS_DIGIT(),
1458 		swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1459 		isDIGIT(*s)
1460 	    );
1461 	case DIGITL:
1462 	    REXEC_FBC_CSCAN_TAINT(
1463 		isDIGIT_LC_utf8((U8*)s),
1464 		isDIGIT_LC(*s)
1465 	    );
1466 	case NDIGIT:
1467 	    REXEC_FBC_CSCAN_PRELOAD(
1468 		LOAD_UTF8_CHARCLASS_DIGIT(),
1469 		!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1470 		!isDIGIT(*s)
1471 	    );
1472 	case NDIGITL:
1473 	    REXEC_FBC_CSCAN_TAINT(
1474 		!isDIGIT_LC_utf8((U8*)s),
1475 		!isDIGIT_LC(*s)
1476 	    );
1477 	case LNBREAK:
1478 	    REXEC_FBC_CSCAN(
1479 		is_LNBREAK_utf8(s),
1480 		is_LNBREAK_latin1(s)
1481 	    );
1482 	case VERTWS:
1483 	    REXEC_FBC_CSCAN(
1484 		is_VERTWS_utf8(s),
1485 		is_VERTWS_latin1(s)
1486 	    );
1487 	case NVERTWS:
1488 	    REXEC_FBC_CSCAN(
1489 		!is_VERTWS_utf8(s),
1490 		!is_VERTWS_latin1(s)
1491 	    );
1492 	case HORIZWS:
1493 	    REXEC_FBC_CSCAN(
1494 		is_HORIZWS_utf8(s),
1495 		is_HORIZWS_latin1(s)
1496 	    );
1497 	case NHORIZWS:
1498 	    REXEC_FBC_CSCAN(
1499 		!is_HORIZWS_utf8(s),
1500 		!is_HORIZWS_latin1(s)
1501 	    );
1502 	case AHOCORASICKC:
1503 	case AHOCORASICK:
1504 	    {
1505 	        DECL_TRIE_TYPE(c);
1506                 /* what trie are we using right now */
1507         	reg_ac_data *aho
1508         	    = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1509         	reg_trie_data *trie
1510 		    = (reg_trie_data*)progi->data->data[ aho->trie ];
1511 		HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
1512 
1513 		const char *last_start = strend - trie->minlen;
1514 #ifdef DEBUGGING
1515 		const char *real_start = s;
1516 #endif
1517 		STRLEN maxlen = trie->maxlen;
1518 		SV *sv_points;
1519 		U8 **points; /* map of where we were in the input string
1520 		                when reading a given char. For ASCII this
1521 		                is unnecessary overhead as the relationship
1522 		                is always 1:1, but for Unicode, especially
1523 		                case folded Unicode this is not true. */
1524 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1525 		U8 *bitmap=NULL;
1526 
1527 
1528                 GET_RE_DEBUG_FLAGS_DECL;
1529 
1530                 /* We can't just allocate points here. We need to wrap it in
1531                  * an SV so it gets freed properly if there is a croak while
1532                  * running the match */
1533                 ENTER;
1534 	        SAVETMPS;
1535                 sv_points=newSV(maxlen * sizeof(U8 *));
1536                 SvCUR_set(sv_points,
1537                     maxlen * sizeof(U8 *));
1538                 SvPOK_on(sv_points);
1539                 sv_2mortal(sv_points);
1540                 points=(U8**)SvPV_nolen(sv_points );
1541                 if ( trie_type != trie_utf8_fold
1542                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1543                 {
1544                     if (trie->bitmap)
1545                         bitmap=(U8*)trie->bitmap;
1546                     else
1547                         bitmap=(U8*)ANYOF_BITMAP(c);
1548                 }
1549                 /* this is the Aho-Corasick algorithm modified a touch
1550                    to include special handling for long "unknown char"
1551                    sequences. The basic idea being that we use AC as long
1552                    as we are dealing with a possible matching char, when
1553                    we encounter an unknown char (and we have not encountered
1554                    an accepting state) we scan forward until we find a legal
1555                    starting char.
1556                    AC matching is basically that of trie matching, except
1557                    that when we encounter a failing transition, we fall back
1558                    to the current states "fail state", and try the current char
1559                    again, a process we repeat until we reach the root state,
1560                    state 1, or a legal transition. If we fail on the root state
1561                    then we can either terminate if we have reached an accepting
1562                    state previously, or restart the entire process from the beginning
1563                    if we have not.
1564 
1565                  */
1566                 while (s <= last_start) {
1567                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1568                     U8 *uc = (U8*)s;
1569                     U16 charid = 0;
1570                     U32 base = 1;
1571                     U32 state = 1;
1572                     UV uvc = 0;
1573                     STRLEN len = 0;
1574                     STRLEN foldlen = 0;
1575                     U8 *uscan = (U8*)NULL;
1576                     U8 *leftmost = NULL;
1577 #ifdef DEBUGGING
1578                     U32 accepted_word= 0;
1579 #endif
1580                     U32 pointpos = 0;
1581 
1582                     while ( state && uc <= (U8*)strend ) {
1583                         int failed=0;
1584                         U32 word = aho->states[ state ].wordnum;
1585 
1586                         if( state==1 ) {
1587                             if ( bitmap ) {
1588                                 DEBUG_TRIE_EXECUTE_r(
1589                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1590                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1591                                             (char *)uc, do_utf8 );
1592                                         PerlIO_printf( Perl_debug_log,
1593                                             " Scanning for legal start char...\n");
1594                                     }
1595                                 );
1596                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1597                                     uc++;
1598                                 }
1599                                 s= (char *)uc;
1600                             }
1601                             if (uc >(U8*)last_start) break;
1602                         }
1603 
1604                         if ( word ) {
1605                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1606                             if (!leftmost || lpos < leftmost) {
1607                                 DEBUG_r(accepted_word=word);
1608                                 leftmost= lpos;
1609                             }
1610                             if (base==0) break;
1611 
1612                         }
1613                         points[pointpos++ % maxlen]= uc;
1614 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1615 					     uscan, len, uvc, charid, foldlen,
1616 					     foldbuf, uniflags);
1617                         DEBUG_TRIE_EXECUTE_r({
1618                             dump_exec_pos( (char *)uc, c, strend, real_start,
1619                                 s,   do_utf8 );
1620                             PerlIO_printf(Perl_debug_log,
1621                                 " Charid:%3u CP:%4"UVxf" ",
1622                                  charid, uvc);
1623                         });
1624 
1625                         do {
1626 #ifdef DEBUGGING
1627                             word = aho->states[ state ].wordnum;
1628 #endif
1629                             base = aho->states[ state ].trans.base;
1630 
1631                             DEBUG_TRIE_EXECUTE_r({
1632                                 if (failed)
1633                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1634                                         s,   do_utf8 );
1635                                 PerlIO_printf( Perl_debug_log,
1636                                     "%sState: %4"UVxf", word=%"UVxf,
1637                                     failed ? " Fail transition to " : "",
1638                                     (UV)state, (UV)word);
1639                             });
1640                             if ( base ) {
1641                                 U32 tmp;
1642                                 if (charid &&
1643                                      (base + charid > trie->uniquecharcount )
1644                                      && (base + charid - 1 - trie->uniquecharcount
1645                                             < trie->lasttrans)
1646                                      && trie->trans[base + charid - 1 -
1647                                             trie->uniquecharcount].check == state
1648                                      && (tmp=trie->trans[base + charid - 1 -
1649                                         trie->uniquecharcount ].next))
1650                                 {
1651                                     DEBUG_TRIE_EXECUTE_r(
1652                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1653                                     state = tmp;
1654                                     break;
1655                                 }
1656                                 else {
1657                                     DEBUG_TRIE_EXECUTE_r(
1658                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1659                                     failed = 1;
1660                                     state = aho->fail[state];
1661                                 }
1662                             }
1663                             else {
1664                                 /* we must be accepting here */
1665                                 DEBUG_TRIE_EXECUTE_r(
1666                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1667                                 failed = 1;
1668                                 break;
1669                             }
1670                         } while(state);
1671                         uc += len;
1672                         if (failed) {
1673                             if (leftmost)
1674                                 break;
1675                             if (!state) state = 1;
1676                         }
1677                     }
1678                     if ( aho->states[ state ].wordnum ) {
1679                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1680                         if (!leftmost || lpos < leftmost) {
1681                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1682                             leftmost = lpos;
1683                         }
1684                     }
1685                     if (leftmost) {
1686                         s = (char*)leftmost;
1687                         DEBUG_TRIE_EXECUTE_r({
1688                             PerlIO_printf(
1689                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1690                                 (UV)accepted_word, (IV)(s - real_start)
1691                             );
1692                         });
1693                         if (!reginfo || regtry(reginfo, &s)) {
1694                             FREETMPS;
1695 		            LEAVE;
1696                             goto got_it;
1697                         }
1698                         s = HOPc(s,1);
1699                         DEBUG_TRIE_EXECUTE_r({
1700                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1701                         });
1702                     } else {
1703                         DEBUG_TRIE_EXECUTE_r(
1704                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1705                         break;
1706                     }
1707                 }
1708                 FREETMPS;
1709                 LEAVE;
1710 	    }
1711 	    break;
1712 	default:
1713 	    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1714 	    break;
1715 	}
1716 	return 0;
1717       got_it:
1718 	return s;
1719 }
1720 
1721 static void
1722 S_swap_match_buff (pTHX_ regexp *prog) {
1723     regexp_paren_pair *t;
1724 
1725     if (!prog->swap) {
1726     /* We have to be careful. If the previous successful match
1727        was from this regex we don't want a subsequent paritally
1728        successful match to clobber the old results.
1729        So when we detect this possibility we add a swap buffer
1730        to the re, and switch the buffer each match. If we fail
1731        we switch it back, otherwise we leave it swapped.
1732     */
1733         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1734     }
1735     t = prog->swap;
1736     prog->swap = prog->offs;
1737     prog->offs = t;
1738 }
1739 
1740 
1741 /*
1742  - regexec_flags - match a regexp against a string
1743  */
1744 I32
1745 Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1746 	      char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1747 /* strend: pointer to null at end of string */
1748 /* strbeg: real beginning of string */
1749 /* minend: end of match must be >=minend after stringarg. */
1750 /* data: May be used for some additional optimizations.
1751          Currently its only used, with a U32 cast, for transmitting
1752          the ganch offset when doing a /g match. This will change */
1753 /* nosave: For optimizations. */
1754 {
1755     dVAR;
1756     /*register*/ char *s;
1757     register regnode *c;
1758     /*register*/ char *startpos = stringarg;
1759     I32 minlen;		/* must match at least this many chars */
1760     I32 dontbother = 0;	/* how many characters not to try at end */
1761     I32 end_shift = 0;			/* Same for the end. */		/* CC */
1762     I32 scream_pos = -1;		/* Internal iterator of scream. */
1763     char *scream_olds = NULL;
1764     const bool do_utf8 = (bool)DO_UTF8(sv);
1765     I32 multiline;
1766     RXi_GET_DECL(prog,progi);
1767     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1768     bool swap_on_fail = 0;
1769 
1770     GET_RE_DEBUG_FLAGS_DECL;
1771 
1772     PERL_UNUSED_ARG(data);
1773 
1774     /* Be paranoid... */
1775     if (prog == NULL || startpos == NULL) {
1776 	Perl_croak(aTHX_ "NULL regexp parameter");
1777 	return 0;
1778     }
1779 
1780     multiline = prog->extflags & RXf_PMf_MULTILINE;
1781     reginfo.prog = prog;
1782 
1783     RX_MATCH_UTF8_set(prog, do_utf8);
1784     DEBUG_EXECUTE_r(
1785         debug_start_match(prog, do_utf8, startpos, strend,
1786         "Matching");
1787     );
1788 
1789     minlen = prog->minlen;
1790 
1791     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1792         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1793 			      "String too short [regexec_flags]...\n"));
1794 	goto phooey;
1795     }
1796 
1797 
1798     /* Check validity of program. */
1799     if (UCHARAT(progi->program) != REG_MAGIC) {
1800 	Perl_croak(aTHX_ "corrupted regexp program");
1801     }
1802 
1803     PL_reg_flags = 0;
1804     PL_reg_eval_set = 0;
1805     PL_reg_maxiter = 0;
1806 
1807     if (prog->extflags & RXf_UTF8)
1808 	PL_reg_flags |= RF_utf8;
1809 
1810     /* Mark beginning of line for ^ and lookbehind. */
1811     reginfo.bol = startpos; /* XXX not used ??? */
1812     PL_bostr  = strbeg;
1813     reginfo.sv = sv;
1814 
1815     /* Mark end of line for $ (and such) */
1816     PL_regeol = strend;
1817 
1818     /* see how far we have to get to not match where we matched before */
1819     reginfo.till = startpos+minend;
1820 
1821     /* If there is a "must appear" string, look for it. */
1822     s = startpos;
1823 
1824     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1825 	MAGIC *mg;
1826 
1827 	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
1828 	    reginfo.ganch = startpos + prog->gofs;
1829 	else if (sv && SvTYPE(sv) >= SVt_PVMG
1830 		  && SvMAGIC(sv)
1831 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1832 		  && mg->mg_len >= 0) {
1833 	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
1834 	    if (prog->extflags & RXf_ANCH_GPOS) {
1835 	        if (s > reginfo.ganch)
1836 		    goto phooey;
1837 		s = reginfo.ganch - prog->gofs;
1838 	    }
1839 	}
1840 	else if (data) {
1841 	    reginfo.ganch = strbeg + PTR2UV(data);
1842 	} else				/* pos() not defined */
1843 	    reginfo.ganch = strbeg;
1844     }
1845     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1846         swap_on_fail = 1;
1847         swap_match_buff(prog); /* do we need a save destructor here for
1848                                   eval dies? */
1849     }
1850     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1851 	re_scream_pos_data d;
1852 
1853 	d.scream_olds = &scream_olds;
1854 	d.scream_pos = &scream_pos;
1855 	s = re_intuit_start(prog, sv, s, strend, flags, &d);
1856 	if (!s) {
1857 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1858 	    goto phooey;	/* not present */
1859 	}
1860     }
1861 
1862 
1863 
1864     /* Simplest case:  anchored match need be tried only once. */
1865     /*  [unless only anchor is BOL and multiline is set] */
1866     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1867 	if (s == startpos && regtry(&reginfo, &startpos))
1868 	    goto got_it;
1869 	else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1870 		 || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1871 	{
1872 	    char *end;
1873 
1874 	    if (minlen)
1875 		dontbother = minlen - 1;
1876 	    end = HOP3c(strend, -dontbother, strbeg) - 1;
1877 	    /* for multiline we only have to try after newlines */
1878 	    if (prog->check_substr || prog->check_utf8) {
1879 		if (s == startpos)
1880 		    goto after_try;
1881 		while (1) {
1882 		    if (regtry(&reginfo, &s))
1883 			goto got_it;
1884 		  after_try:
1885 		    if (s > end)
1886 			goto phooey;
1887 		    if (prog->extflags & RXf_USE_INTUIT) {
1888 			s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1889 			if (!s)
1890 			    goto phooey;
1891 		    }
1892 		    else
1893 			s++;
1894 		}
1895 	    } else {
1896 		if (s > startpos)
1897 		    s--;
1898 		while (s < end) {
1899 		    if (*s++ == '\n') {	/* don't need PL_utf8skip here */
1900 			if (regtry(&reginfo, &s))
1901 			    goto got_it;
1902 		    }
1903 		}
1904 	    }
1905 	}
1906 	goto phooey;
1907     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1908     {
1909         /* the warning about reginfo.ganch being used without intialization
1910            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1911            and we only enter this block when the same bit is set. */
1912         char *tmp_s = reginfo.ganch - prog->gofs;
1913 	if (regtry(&reginfo, &tmp_s))
1914 	    goto got_it;
1915 	goto phooey;
1916     }
1917 
1918     /* Messy cases:  unanchored match. */
1919     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1920 	/* we have /x+whatever/ */
1921 	/* it must be a one character string (XXXX Except UTF?) */
1922 	char ch;
1923 #ifdef DEBUGGING
1924 	int did_match = 0;
1925 #endif
1926 	if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1927 	    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1928 	ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1929 
1930 	if (do_utf8) {
1931 	    REXEC_FBC_SCAN(
1932 		if (*s == ch) {
1933 		    DEBUG_EXECUTE_r( did_match = 1 );
1934 		    if (regtry(&reginfo, &s)) goto got_it;
1935 		    s += UTF8SKIP(s);
1936 		    while (s < strend && *s == ch)
1937 			s += UTF8SKIP(s);
1938 		}
1939 	    );
1940 	}
1941 	else {
1942 	    REXEC_FBC_SCAN(
1943 		if (*s == ch) {
1944 		    DEBUG_EXECUTE_r( did_match = 1 );
1945 		    if (regtry(&reginfo, &s)) goto got_it;
1946 		    s++;
1947 		    while (s < strend && *s == ch)
1948 			s++;
1949 		}
1950 	    );
1951 	}
1952 	DEBUG_EXECUTE_r(if (!did_match)
1953 		PerlIO_printf(Perl_debug_log,
1954                                   "Did not find anchored character...\n")
1955                );
1956     }
1957     else if (prog->anchored_substr != NULL
1958 	      || prog->anchored_utf8 != NULL
1959 	      || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1960 		  && prog->float_max_offset < strend - s)) {
1961 	SV *must;
1962 	I32 back_max;
1963 	I32 back_min;
1964 	char *last;
1965 	char *last1;		/* Last position checked before */
1966 #ifdef DEBUGGING
1967 	int did_match = 0;
1968 #endif
1969 	if (prog->anchored_substr || prog->anchored_utf8) {
1970 	    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1971 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1972 	    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1973 	    back_max = back_min = prog->anchored_offset;
1974 	} else {
1975 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1976 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1977 	    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1978 	    back_max = prog->float_max_offset;
1979 	    back_min = prog->float_min_offset;
1980 	}
1981 
1982 
1983 	if (must == &PL_sv_undef)
1984 	    /* could not downgrade utf8 check substring, so must fail */
1985 	    goto phooey;
1986 
1987         if (back_min<0) {
1988 	    last = strend;
1989 	} else {
1990             last = HOP3c(strend,	/* Cannot start after this */
1991         	  -(I32)(CHR_SVLEN(must)
1992         		 - (SvTAIL(must) != 0) + back_min), strbeg);
1993         }
1994 	if (s > PL_bostr)
1995 	    last1 = HOPc(s, -1);
1996 	else
1997 	    last1 = s - 1;	/* bogus */
1998 
1999 	/* XXXX check_substr already used to find "s", can optimize if
2000 	   check_substr==must. */
2001 	scream_pos = -1;
2002 	dontbother = end_shift;
2003 	strend = HOPc(strend, -dontbother);
2004 	while ( (s <= last) &&
2005 		((flags & REXEC_SCREAM)
2006 		 ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2007 				    end_shift, &scream_pos, 0))
2008 		 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2009 				  (unsigned char*)strend, must,
2010 				  multiline ? FBMrf_MULTILINE : 0))) ) {
2011 	    /* we may be pointing at the wrong string */
2012 	    if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
2013 		s = strbeg + (s - SvPVX_const(sv));
2014 	    DEBUG_EXECUTE_r( did_match = 1 );
2015 	    if (HOPc(s, -back_max) > last1) {
2016 		last1 = HOPc(s, -back_min);
2017 		s = HOPc(s, -back_max);
2018 	    }
2019 	    else {
2020 		char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2021 
2022 		last1 = HOPc(s, -back_min);
2023 		s = t;
2024 	    }
2025 	    if (do_utf8) {
2026 		while (s <= last1) {
2027 		    if (regtry(&reginfo, &s))
2028 			goto got_it;
2029 		    s += UTF8SKIP(s);
2030 		}
2031 	    }
2032 	    else {
2033 		while (s <= last1) {
2034 		    if (regtry(&reginfo, &s))
2035 			goto got_it;
2036 		    s++;
2037 		}
2038 	    }
2039 	}
2040 	DEBUG_EXECUTE_r(if (!did_match) {
2041             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2042                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2043             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2044 			      ((must == prog->anchored_substr || must == prog->anchored_utf8)
2045 			       ? "anchored" : "floating"),
2046                 quoted, RE_SV_TAIL(must));
2047         });
2048 	goto phooey;
2049     }
2050     else if ( (c = progi->regstclass) ) {
2051 	if (minlen) {
2052 	    const OPCODE op = OP(progi->regstclass);
2053 	    /* don't bother with what can't match */
2054 	    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2055 	        strend = HOPc(strend, -(minlen - 1));
2056 	}
2057 	DEBUG_EXECUTE_r({
2058 	    SV * const prop = sv_newmortal();
2059 	    regprop(prog, prop, c);
2060 	    {
2061 		RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2062 		    s,strend-s,60);
2063 		PerlIO_printf(Perl_debug_log,
2064 		    "Matching stclass %.*s against %s (%d chars)\n",
2065 		    (int)SvCUR(prop), SvPVX_const(prop),
2066 		     quoted, (int)(strend - s));
2067 	    }
2068 	});
2069         if (find_byclass(prog, c, s, strend, &reginfo))
2070 	    goto got_it;
2071 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2072     }
2073     else {
2074 	dontbother = 0;
2075 	if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2076 	    /* Trim the end. */
2077 	    char *last;
2078 	    SV* float_real;
2079 
2080 	    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2081 		do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2082 	    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2083 
2084 	    if (flags & REXEC_SCREAM) {
2085 		last = screaminstr(sv, float_real, s - strbeg,
2086 				   end_shift, &scream_pos, 1); /* last one */
2087 		if (!last)
2088 		    last = scream_olds; /* Only one occurrence. */
2089 		/* we may be pointing at the wrong string */
2090 		else if (RX_MATCH_COPIED(prog))
2091 		    s = strbeg + (s - SvPVX_const(sv));
2092 	    }
2093 	    else {
2094 		STRLEN len;
2095                 const char * const little = SvPV_const(float_real, len);
2096 
2097 		if (SvTAIL(float_real)) {
2098 		    if (memEQ(strend - len + 1, little, len - 1))
2099 			last = strend - len + 1;
2100 		    else if (!multiline)
2101 			last = memEQ(strend - len, little, len)
2102 			    ? strend - len : NULL;
2103 		    else
2104 			goto find_last;
2105 		} else {
2106 		  find_last:
2107 		    if (len)
2108 			last = rninstr(s, strend, little, little + len);
2109 		    else
2110 			last = strend;	/* matching "$" */
2111 		}
2112 	    }
2113 	    if (last == NULL) {
2114 		DEBUG_EXECUTE_r(
2115 		    PerlIO_printf(Perl_debug_log,
2116 			"%sCan't trim the tail, match fails (should not happen)%s\n",
2117 	                PL_colors[4], PL_colors[5]));
2118 		goto phooey; /* Should not happen! */
2119 	    }
2120 	    dontbother = strend - last + prog->float_min_offset;
2121 	}
2122 	if (minlen && (dontbother < minlen))
2123 	    dontbother = minlen - 1;
2124 	strend -= dontbother; 		   /* this one's always in bytes! */
2125 	/* We don't know much -- general case. */
2126 	if (do_utf8) {
2127 	    for (;;) {
2128 		if (regtry(&reginfo, &s))
2129 		    goto got_it;
2130 		if (s >= strend)
2131 		    break;
2132 		s += UTF8SKIP(s);
2133 	    };
2134 	}
2135 	else {
2136 	    do {
2137 		if (regtry(&reginfo, &s))
2138 		    goto got_it;
2139 	    } while (s++ < strend);
2140 	}
2141     }
2142 
2143     /* Failure. */
2144     goto phooey;
2145 
2146 got_it:
2147     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2148 
2149     if (PL_reg_eval_set)
2150 	restore_pos(aTHX_ prog);
2151     if (prog->paren_names)
2152         (void)hv_iterinit(prog->paren_names);
2153 
2154     /* make sure $`, $&, $', and $digit will work later */
2155     if ( !(flags & REXEC_NOT_FIRST) ) {
2156 	RX_MATCH_COPY_FREE(prog);
2157 	if (flags & REXEC_COPY_STR) {
2158 	    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2159 #ifdef PERL_OLD_COPY_ON_WRITE
2160 	    if ((SvIsCOW(sv)
2161 		 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2162 		if (DEBUG_C_TEST) {
2163 		    PerlIO_printf(Perl_debug_log,
2164 				  "Copy on write: regexp capture, type %d\n",
2165 				  (int) SvTYPE(sv));
2166 		}
2167 		prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2168 		prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2169 		assert (SvPOKp(prog->saved_copy));
2170 	    } else
2171 #endif
2172 	    {
2173 		RX_MATCH_COPIED_on(prog);
2174 		s = savepvn(strbeg, i);
2175 		prog->subbeg = s;
2176 	    }
2177 	    prog->sublen = i;
2178 	}
2179 	else {
2180 	    prog->subbeg = strbeg;
2181 	    prog->sublen = PL_regeol - strbeg;	/* strend may have been modified */
2182 	}
2183     }
2184 
2185     return 1;
2186 
2187 phooey:
2188     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2189 			  PL_colors[4], PL_colors[5]));
2190     if (PL_reg_eval_set)
2191 	restore_pos(aTHX_ prog);
2192     if (swap_on_fail)
2193         /* we failed :-( roll it back */
2194         swap_match_buff(prog);
2195 
2196     return 0;
2197 }
2198 
2199 
2200 /*
2201  - regtry - try match at specific point
2202  */
2203 STATIC I32			/* 0 failure, 1 success */
2204 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2205 {
2206     dVAR;
2207     CHECKPOINT lastcp;
2208     regexp *prog = reginfo->prog;
2209     RXi_GET_DECL(prog,progi);
2210     GET_RE_DEBUG_FLAGS_DECL;
2211     reginfo->cutpoint=NULL;
2212 
2213     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2214 	MAGIC *mg;
2215 
2216 	PL_reg_eval_set = RS_init;
2217 	DEBUG_EXECUTE_r(DEBUG_s(
2218 	    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2219 			  (IV)(PL_stack_sp - PL_stack_base));
2220 	    ));
2221 	SAVESTACK_CXPOS();
2222 	cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2223 	/* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2224 	SAVETMPS;
2225 	/* Apparently this is not needed, judging by wantarray. */
2226 	/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2227 	   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2228 
2229 	if (reginfo->sv) {
2230 	    /* Make $_ available to executed code. */
2231 	    if (reginfo->sv != DEFSV) {
2232 		SAVE_DEFSV;
2233 		DEFSV = reginfo->sv;
2234 	    }
2235 
2236 	    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2237 		  && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2238 		/* prepare for quick setting of pos */
2239 #ifdef PERL_OLD_COPY_ON_WRITE
2240 		if (SvIsCOW(reginfo->sv))
2241 		    sv_force_normal_flags(reginfo->sv, 0);
2242 #endif
2243 		mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2244 				 &PL_vtbl_mglob, NULL, 0);
2245 		mg->mg_len = -1;
2246 	    }
2247 	    PL_reg_magic    = mg;
2248 	    PL_reg_oldpos   = mg->mg_len;
2249 	    SAVEDESTRUCTOR_X(restore_pos, prog);
2250         }
2251         if (!PL_reg_curpm) {
2252 	    Newxz(PL_reg_curpm, 1, PMOP);
2253 #ifdef USE_ITHREADS
2254             {
2255 		SV* const repointer = newSViv(0);
2256                 /* so we know which PL_regex_padav element is PL_reg_curpm */
2257                 SvFLAGS(repointer) |= SVf_BREAK;
2258                 av_push(PL_regex_padav,repointer);
2259                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2260                 PL_regex_pad = AvARRAY(PL_regex_padav);
2261             }
2262 #endif
2263         }
2264 	PM_SETRE(PL_reg_curpm, prog);
2265 	PL_reg_oldcurpm = PL_curpm;
2266 	PL_curpm = PL_reg_curpm;
2267 	if (RX_MATCH_COPIED(prog)) {
2268 	    /*  Here is a serious problem: we cannot rewrite subbeg,
2269 		since it may be needed if this match fails.  Thus
2270 		$` inside (?{}) could fail... */
2271 	    PL_reg_oldsaved = prog->subbeg;
2272 	    PL_reg_oldsavedlen = prog->sublen;
2273 #ifdef PERL_OLD_COPY_ON_WRITE
2274 	    PL_nrs = prog->saved_copy;
2275 #endif
2276 	    RX_MATCH_COPIED_off(prog);
2277 	}
2278 	else
2279 	    PL_reg_oldsaved = NULL;
2280 	prog->subbeg = PL_bostr;
2281 	prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2282     }
2283     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2284     prog->offs[0].start = *startpos - PL_bostr;
2285     PL_reginput = *startpos;
2286     PL_reglastparen = &prog->lastparen;
2287     PL_reglastcloseparen = &prog->lastcloseparen;
2288     prog->lastparen = 0;
2289     prog->lastcloseparen = 0;
2290     PL_regsize = 0;
2291     PL_regoffs = prog->offs;
2292     if (PL_reg_start_tmpl <= prog->nparens) {
2293 	PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2294         if(PL_reg_start_tmp)
2295             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2296         else
2297             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2298     }
2299 
2300     /* XXXX What this code is doing here?!!!  There should be no need
2301        to do this again and again, PL_reglastparen should take care of
2302        this!  --ilya*/
2303 
2304     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2305      * Actually, the code in regcppop() (which Ilya may be meaning by
2306      * PL_reglastparen), is not needed at all by the test suite
2307      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2308      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2309      * Meanwhile, this code *is* needed for the
2310      * above-mentioned test suite tests to succeed.  The common theme
2311      * on those tests seems to be returning null fields from matches.
2312      * --jhi updated by dapm */
2313 #if 1
2314     if (prog->nparens) {
2315 	regexp_paren_pair *pp = PL_regoffs;
2316 	register I32 i;
2317 	for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2318 	    ++pp;
2319 	    pp->start = -1;
2320 	    pp->end = -1;
2321 	}
2322     }
2323 #endif
2324     REGCP_SET(lastcp);
2325     if (regmatch(reginfo, progi->program + 1)) {
2326 	PL_regoffs[0].end = PL_reginput - PL_bostr;
2327 	return 1;
2328     }
2329     if (reginfo->cutpoint)
2330         *startpos= reginfo->cutpoint;
2331     REGCP_UNWIND(lastcp);
2332     return 0;
2333 }
2334 
2335 
2336 #define sayYES goto yes
2337 #define sayNO goto no
2338 #define sayNO_SILENT goto no_silent
2339 
2340 /* we dont use STMT_START/END here because it leads to
2341    "unreachable code" warnings, which are bogus, but distracting. */
2342 #define CACHEsayNO \
2343     if (ST.cache_mask) \
2344        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2345     sayNO
2346 
2347 /* this is used to determine how far from the left messages like
2348    'failed...' are printed. It should be set such that messages
2349    are inline with the regop output that created them.
2350 */
2351 #define REPORT_CODE_OFF 32
2352 
2353 
2354 /* Make sure there is a test for this +1 options in re_tests */
2355 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2356 
2357 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2358 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2359 
2360 #define SLAB_FIRST(s) (&(s)->states[0])
2361 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2362 
2363 /* grab a new slab and return the first slot in it */
2364 
2365 STATIC regmatch_state *
2366 S_push_slab(pTHX)
2367 {
2368 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2369     dMY_CXT;
2370 #endif
2371     regmatch_slab *s = PL_regmatch_slab->next;
2372     if (!s) {
2373 	Newx(s, 1, regmatch_slab);
2374 	s->prev = PL_regmatch_slab;
2375 	s->next = NULL;
2376 	PL_regmatch_slab->next = s;
2377     }
2378     PL_regmatch_slab = s;
2379     return SLAB_FIRST(s);
2380 }
2381 
2382 
2383 /* push a new state then goto it */
2384 
2385 #define PUSH_STATE_GOTO(state, node) \
2386     scan = node; \
2387     st->resume_state = state; \
2388     goto push_state;
2389 
2390 /* push a new state with success backtracking, then goto it */
2391 
2392 #define PUSH_YES_STATE_GOTO(state, node) \
2393     scan = node; \
2394     st->resume_state = state; \
2395     goto push_yes_state;
2396 
2397 
2398 
2399 /*
2400 
2401 regmatch() - main matching routine
2402 
2403 This is basically one big switch statement in a loop. We execute an op,
2404 set 'next' to point the next op, and continue. If we come to a point which
2405 we may need to backtrack to on failure such as (A|B|C), we push a
2406 backtrack state onto the backtrack stack. On failure, we pop the top
2407 state, and re-enter the loop at the state indicated. If there are no more
2408 states to pop, we return failure.
2409 
2410 Sometimes we also need to backtrack on success; for example /A+/, where
2411 after successfully matching one A, we need to go back and try to
2412 match another one; similarly for lookahead assertions: if the assertion
2413 completes successfully, we backtrack to the state just before the assertion
2414 and then carry on.  In these cases, the pushed state is marked as
2415 'backtrack on success too'. This marking is in fact done by a chain of
2416 pointers, each pointing to the previous 'yes' state. On success, we pop to
2417 the nearest yes state, discarding any intermediate failure-only states.
2418 Sometimes a yes state is pushed just to force some cleanup code to be
2419 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2420 it to free the inner regex.
2421 
2422 Note that failure backtracking rewinds the cursor position, while
2423 success backtracking leaves it alone.
2424 
2425 A pattern is complete when the END op is executed, while a subpattern
2426 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2427 ops trigger the "pop to last yes state if any, otherwise return true"
2428 behaviour.
2429 
2430 A common convention in this function is to use A and B to refer to the two
2431 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2432 the subpattern to be matched possibly multiple times, while B is the entire
2433 rest of the pattern. Variable and state names reflect this convention.
2434 
2435 The states in the main switch are the union of ops and failure/success of
2436 substates associated with with that op.  For example, IFMATCH is the op
2437 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2438 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2439 successfully matched A and IFMATCH_A_fail is a state saying that we have
2440 just failed to match A. Resume states always come in pairs. The backtrack
2441 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2442 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2443 on success or failure.
2444 
2445 The struct that holds a backtracking state is actually a big union, with
2446 one variant for each major type of op. The variable st points to the
2447 top-most backtrack struct. To make the code clearer, within each
2448 block of code we #define ST to alias the relevant union.
2449 
2450 Here's a concrete example of a (vastly oversimplified) IFMATCH
2451 implementation:
2452 
2453     switch (state) {
2454     ....
2455 
2456 #define ST st->u.ifmatch
2457 
2458     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2459 	ST.foo = ...; // some state we wish to save
2460 	...
2461 	// push a yes backtrack state with a resume value of
2462 	// IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2463 	// first node of A:
2464 	PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2465 	// NOTREACHED
2466 
2467     case IFMATCH_A: // we have successfully executed A; now continue with B
2468 	next = B;
2469 	bar = ST.foo; // do something with the preserved value
2470 	break;
2471 
2472     case IFMATCH_A_fail: // A failed, so the assertion failed
2473 	...;   // do some housekeeping, then ...
2474 	sayNO; // propagate the failure
2475 
2476 #undef ST
2477 
2478     ...
2479     }
2480 
2481 For any old-timers reading this who are familiar with the old recursive
2482 approach, the code above is equivalent to:
2483 
2484     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2485     {
2486 	int foo = ...
2487 	...
2488 	if (regmatch(A)) {
2489 	    next = B;
2490 	    bar = foo;
2491 	    break;
2492 	}
2493 	...;   // do some housekeeping, then ...
2494 	sayNO; // propagate the failure
2495     }
2496 
2497 The topmost backtrack state, pointed to by st, is usually free. If you
2498 want to claim it, populate any ST.foo fields in it with values you wish to
2499 save, then do one of
2500 
2501 	PUSH_STATE_GOTO(resume_state, node);
2502 	PUSH_YES_STATE_GOTO(resume_state, node);
2503 
2504 which sets that backtrack state's resume value to 'resume_state', pushes a
2505 new free entry to the top of the backtrack stack, then goes to 'node'.
2506 On backtracking, the free slot is popped, and the saved state becomes the
2507 new free state. An ST.foo field in this new top state can be temporarily
2508 accessed to retrieve values, but once the main loop is re-entered, it
2509 becomes available for reuse.
2510 
2511 Note that the depth of the backtrack stack constantly increases during the
2512 left-to-right execution of the pattern, rather than going up and down with
2513 the pattern nesting. For example the stack is at its maximum at Z at the
2514 end of the pattern, rather than at X in the following:
2515 
2516     /(((X)+)+)+....(Y)+....Z/
2517 
2518 The only exceptions to this are lookahead/behind assertions and the cut,
2519 (?>A), which pop all the backtrack states associated with A before
2520 continuing.
2521 
2522 Bascktrack state structs are allocated in slabs of about 4K in size.
2523 PL_regmatch_state and st always point to the currently active state,
2524 and PL_regmatch_slab points to the slab currently containing
2525 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2526 allocated, and is never freed until interpreter destruction. When the slab
2527 is full, a new one is allocated and chained to the end. At exit from
2528 regmatch(), slabs allocated since entry are freed.
2529 
2530 */
2531 
2532 
2533 #define DEBUG_STATE_pp(pp)				    \
2534     DEBUG_STATE_r({					    \
2535 	DUMP_EXEC_POS(locinput, scan, do_utf8);		    \
2536 	PerlIO_printf(Perl_debug_log,			    \
2537 	    "    %*s"pp" %s%s%s%s%s\n",			    \
2538 	    depth*2, "",				    \
2539 	    PL_reg_name[st->resume_state],                     \
2540 	    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2541 	    ((st==yes_state) ? "Y" : ""),                   \
2542 	    ((st==mark_state) ? "M" : ""),                  \
2543 	    ((st==yes_state||st==mark_state) ? "]" : "")    \
2544 	);                                                  \
2545     });
2546 
2547 
2548 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2549 
2550 #ifdef DEBUGGING
2551 
2552 STATIC void
2553 S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
2554     const char *start, const char *end, const char *blurb)
2555 {
2556     const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
2557     if (!PL_colorset)
2558             reginitcolors();
2559     {
2560         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2561             prog->precomp, prog->prelen, 60);
2562 
2563         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2564             start, end - start, 60);
2565 
2566         PerlIO_printf(Perl_debug_log,
2567             "%s%s REx%s %s against %s\n",
2568 		       PL_colors[4], blurb, PL_colors[5], s0, s1);
2569 
2570         if (do_utf8||utf8_pat)
2571             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2572                 utf8_pat ? "pattern" : "",
2573                 utf8_pat && do_utf8 ? " and " : "",
2574                 do_utf8 ? "string" : ""
2575             );
2576     }
2577 }
2578 
2579 STATIC void
2580 S_dump_exec_pos(pTHX_ const char *locinput,
2581                       const regnode *scan,
2582                       const char *loc_regeol,
2583                       const char *loc_bostr,
2584                       const char *loc_reg_starttry,
2585                       const bool do_utf8)
2586 {
2587     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2588     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2589     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2590     /* The part of the string before starttry has one color
2591        (pref0_len chars), between starttry and current
2592        position another one (pref_len - pref0_len chars),
2593        after the current position the third one.
2594        We assume that pref0_len <= pref_len, otherwise we
2595        decrease pref0_len.  */
2596     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2597 	? (5 + taill) - l : locinput - loc_bostr;
2598     int pref0_len;
2599 
2600     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2601 	pref_len++;
2602     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2603     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2604 	l = ( loc_regeol - locinput > (5 + taill) - pref_len
2605 	      ? (5 + taill) - pref_len : loc_regeol - locinput);
2606     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2607 	l--;
2608     if (pref0_len < 0)
2609 	pref0_len = 0;
2610     if (pref0_len > pref_len)
2611 	pref0_len = pref_len;
2612     {
2613 	const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2614 
2615 	RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2616 	    (locinput - pref_len),pref0_len, 60, 4, 5);
2617 
2618 	RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2619 		    (locinput - pref_len + pref0_len),
2620 		    pref_len - pref0_len, 60, 2, 3);
2621 
2622 	RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2623 		    locinput, loc_regeol - locinput, 10, 0, 1);
2624 
2625 	const STRLEN tlen=len0+len1+len2;
2626 	PerlIO_printf(Perl_debug_log,
2627 		    "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2628 		    (IV)(locinput - loc_bostr),
2629 		    len0, s0,
2630 		    len1, s1,
2631 		    (docolor ? "" : "> <"),
2632 		    len2, s2,
2633 		    (int)(tlen > 19 ? 0 :  19 - tlen),
2634 		    "");
2635     }
2636 }
2637 
2638 #endif
2639 
2640 /* reg_check_named_buff_matched()
2641  * Checks to see if a named buffer has matched. The data array of
2642  * buffer numbers corresponding to the buffer is expected to reside
2643  * in the regexp->data->data array in the slot stored in the ARG() of
2644  * node involved. Note that this routine doesn't actually care about the
2645  * name, that information is not preserved from compilation to execution.
2646  * Returns the index of the leftmost defined buffer with the given name
2647  * or 0 if non of the buffers matched.
2648  */
2649 STATIC I32
2650 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan) {
2651     I32 n;
2652     RXi_GET_DECL(rex,rexi);
2653     SV *sv_dat=(SV*)rexi->data->data[ ARG( scan ) ];
2654     I32 *nums=(I32*)SvPVX(sv_dat);
2655     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2656         if ((I32)*PL_reglastparen >= nums[n] &&
2657             PL_regoffs[nums[n]].end != -1)
2658         {
2659             return nums[n];
2660         }
2661     }
2662     return 0;
2663 }
2664 
2665 
2666 /* free all slabs above current one  - called during LEAVE_SCOPE */
2667 
2668 STATIC void
2669 S_clear_backtrack_stack(pTHX_ void *p)
2670 {
2671     regmatch_slab *s = PL_regmatch_slab->next;
2672     PERL_UNUSED_ARG(p);
2673 
2674     if (!s)
2675 	return;
2676     PL_regmatch_slab->next = NULL;
2677     while (s) {
2678 	regmatch_slab * const osl = s;
2679 	s = s->next;
2680 	Safefree(osl);
2681     }
2682 }
2683 
2684 
2685 #define SETREX(Re1,Re2) \
2686     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2687     Re1 = (Re2)
2688 
2689 STATIC I32			/* 0 failure, 1 success */
2690 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2691 {
2692 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2693     dMY_CXT;
2694 #endif
2695     dVAR;
2696     register const bool do_utf8 = PL_reg_match_utf8;
2697     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2698 
2699     regexp *rex = reginfo->prog;
2700     RXi_GET_DECL(rex,rexi);
2701 
2702     I32	oldsave;
2703 
2704     /* the current state. This is a cached copy of PL_regmatch_state */
2705     register regmatch_state *st;
2706 
2707     /* cache heavy used fields of st in registers */
2708     register regnode *scan;
2709     register regnode *next;
2710     register U32 n = 0;	/* general value; init to avoid compiler warning */
2711     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2712     register char *locinput = PL_reginput;
2713     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2714 
2715     bool result = 0;	    /* return value of S_regmatch */
2716     int depth = 0;	    /* depth of backtrack stack */
2717     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2718     const U32 max_nochange_depth =
2719         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2720         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2721 
2722     regmatch_state *yes_state = NULL; /* state to pop to on success of
2723 							    subpattern */
2724     /* mark_state piggy backs on the yes_state logic so that when we unwind
2725        the stack on success we can update the mark_state as we go */
2726     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2727 
2728     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2729     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2730     U32 state_num;
2731     bool no_final = 0;      /* prevent failure from backtracking? */
2732     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2733     char *startpoint = PL_reginput;
2734     SV *popmark = NULL;     /* are we looking for a mark? */
2735     SV *sv_commit = NULL;   /* last mark name seen in failure */
2736     SV *sv_yes_mark = NULL; /* last mark name we have seen
2737                                during a successfull match */
2738     U32 lastopen = 0;       /* last open we saw */
2739     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2740 
2741     SV* const oreplsv = GvSV(PL_replgv);
2742 
2743 
2744     /* these three flags are set by various ops to signal information to
2745      * the very next op. They have a useful lifetime of exactly one loop
2746      * iteration, and are not preserved or restored by state pushes/pops
2747      */
2748     bool sw = 0;	    /* the condition value in (?(cond)a|b) */
2749     bool minmod = 0;	    /* the next "{n,m}" is a "{n,m}?" */
2750     int logical = 0;	    /* the following EVAL is:
2751 				0: (?{...})
2752 				1: (?(?{...})X|Y)
2753 				2: (??{...})
2754 			       or the following IFMATCH/UNLESSM is:
2755 			        false: plain (?=foo)
2756 				true:  used as a condition: (?(?=foo))
2757 			    */
2758 
2759 #ifdef DEBUGGING
2760     GET_RE_DEBUG_FLAGS_DECL;
2761 #endif
2762 
2763     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2764 	    PerlIO_printf(Perl_debug_log,"regmatch start\n");
2765     }));
2766     /* on first ever call to regmatch, allocate first slab */
2767     if (!PL_regmatch_slab) {
2768 	Newx(PL_regmatch_slab, 1, regmatch_slab);
2769 	PL_regmatch_slab->prev = NULL;
2770 	PL_regmatch_slab->next = NULL;
2771 	PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2772     }
2773 
2774     oldsave = PL_savestack_ix;
2775     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2776     SAVEVPTR(PL_regmatch_slab);
2777     SAVEVPTR(PL_regmatch_state);
2778 
2779     /* grab next free state slot */
2780     st = ++PL_regmatch_state;
2781     if (st >  SLAB_LAST(PL_regmatch_slab))
2782 	st = PL_regmatch_state = S_push_slab(aTHX);
2783 
2784     /* Note that nextchr is a byte even in UTF */
2785     nextchr = UCHARAT(locinput);
2786     scan = prog;
2787     while (scan != NULL) {
2788 
2789         DEBUG_EXECUTE_r( {
2790 	    SV * const prop = sv_newmortal();
2791 	    regnode *rnext=regnext(scan);
2792 	    DUMP_EXEC_POS( locinput, scan, do_utf8 );
2793 	    regprop(rex, prop, scan);
2794 
2795 	    PerlIO_printf(Perl_debug_log,
2796 		    "%3"IVdf":%*s%s(%"IVdf")\n",
2797 		    (IV)(scan - rexi->program), depth*2, "",
2798 		    SvPVX_const(prop),
2799 		    (PL_regkind[OP(scan)] == END || !rnext) ?
2800 		        0 : (IV)(rnext - rexi->program));
2801 	});
2802 
2803 	next = scan + NEXT_OFF(scan);
2804 	if (next == scan)
2805 	    next = NULL;
2806 	state_num = OP(scan);
2807 
2808       reenter_switch:
2809 	switch (state_num) {
2810 	case BOL:
2811 	    if (locinput == PL_bostr)
2812 	    {
2813 		/* reginfo->till = reginfo->bol; */
2814 		break;
2815 	    }
2816 	    sayNO;
2817 	case MBOL:
2818 	    if (locinput == PL_bostr ||
2819 		((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2820 	    {
2821 		break;
2822 	    }
2823 	    sayNO;
2824 	case SBOL:
2825 	    if (locinput == PL_bostr)
2826 		break;
2827 	    sayNO;
2828 	case GPOS:
2829 	    if (locinput == reginfo->ganch)
2830 		break;
2831 	    sayNO;
2832 
2833 	case KEEPS:
2834 	    /* update the startpoint */
2835 	    st->u.keeper.val = PL_regoffs[0].start;
2836 	    PL_reginput = locinput;
2837 	    PL_regoffs[0].start = locinput - PL_bostr;
2838 	    PUSH_STATE_GOTO(KEEPS_next, next);
2839 	    /*NOT-REACHED*/
2840 	case KEEPS_next_fail:
2841 	    /* rollback the start point change */
2842 	    PL_regoffs[0].start = st->u.keeper.val;
2843 	    sayNO_SILENT;
2844 	    /*NOT-REACHED*/
2845 	case EOL:
2846 		goto seol;
2847 	case MEOL:
2848 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2849 		sayNO;
2850 	    break;
2851 	case SEOL:
2852 	  seol:
2853 	    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2854 		sayNO;
2855 	    if (PL_regeol - locinput > 1)
2856 		sayNO;
2857 	    break;
2858 	case EOS:
2859 	    if (PL_regeol != locinput)
2860 		sayNO;
2861 	    break;
2862 	case SANY:
2863 	    if (!nextchr && locinput >= PL_regeol)
2864 		sayNO;
2865  	    if (do_utf8) {
2866 	        locinput += PL_utf8skip[nextchr];
2867 		if (locinput > PL_regeol)
2868  		    sayNO;
2869  		nextchr = UCHARAT(locinput);
2870  	    }
2871  	    else
2872  		nextchr = UCHARAT(++locinput);
2873 	    break;
2874 	case CANY:
2875 	    if (!nextchr && locinput >= PL_regeol)
2876 		sayNO;
2877 	    nextchr = UCHARAT(++locinput);
2878 	    break;
2879 	case REG_ANY:
2880 	    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2881 		sayNO;
2882 	    if (do_utf8) {
2883 		locinput += PL_utf8skip[nextchr];
2884 		if (locinput > PL_regeol)
2885 		    sayNO;
2886 		nextchr = UCHARAT(locinput);
2887 	    }
2888 	    else
2889 		nextchr = UCHARAT(++locinput);
2890 	    break;
2891 
2892 #undef  ST
2893 #define ST st->u.trie
2894         case TRIEC:
2895             /* In this case the charclass data is available inline so
2896                we can fail fast without a lot of extra overhead.
2897              */
2898             if (scan->flags == EXACT || !do_utf8) {
2899                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2900                     DEBUG_EXECUTE_r(
2901                         PerlIO_printf(Perl_debug_log,
2902                     	          "%*s  %sfailed to match trie start class...%s\n",
2903                     	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2904                     );
2905                     sayNO_SILENT;
2906                     /* NOTREACHED */
2907                 }
2908             }
2909             /* FALL THROUGH */
2910 	case TRIE:
2911 	    {
2912                 /* what type of TRIE am I? (utf8 makes this contextual) */
2913                 DECL_TRIE_TYPE(scan);
2914 
2915                 /* what trie are we using right now */
2916 		reg_trie_data * const trie
2917         	    = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2918 		HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
2919                 U32 state = trie->startstate;
2920 
2921         	if (trie->bitmap && trie_type != trie_utf8_fold &&
2922         	    !TRIE_BITMAP_TEST(trie,*locinput)
2923         	) {
2924         	    if (trie->states[ state ].wordnum) {
2925         	         DEBUG_EXECUTE_r(
2926                             PerlIO_printf(Perl_debug_log,
2927                         	          "%*s  %smatched empty string...%s\n",
2928                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2929                         );
2930         	        break;
2931         	    } else {
2932         	        DEBUG_EXECUTE_r(
2933                             PerlIO_printf(Perl_debug_log,
2934                         	          "%*s  %sfailed to match trie start class...%s\n",
2935                         	          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2936                         );
2937         	        sayNO_SILENT;
2938         	   }
2939                 }
2940 
2941             {
2942 		U8 *uc = ( U8* )locinput;
2943 
2944 		STRLEN len = 0;
2945 		STRLEN foldlen = 0;
2946 		U8 *uscan = (U8*)NULL;
2947 		STRLEN bufflen=0;
2948 		SV *sv_accept_buff = NULL;
2949 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2950 
2951 	    	ST.accepted = 0; /* how many accepting states we have seen */
2952 		ST.B = next;
2953 		ST.jump = trie->jump;
2954 		ST.me = scan;
2955 	        /*
2956         	   traverse the TRIE keeping track of all accepting states
2957         	   we transition through until we get to a failing node.
2958         	*/
2959 
2960 		while ( state && uc <= (U8*)PL_regeol ) {
2961                     U32 base = trie->states[ state ].trans.base;
2962                     UV uvc = 0;
2963                     U16 charid;
2964                     /* We use charid to hold the wordnum as we don't use it
2965                        for charid until after we have done the wordnum logic.
2966                        We define an alias just so that the wordnum logic reads
2967                        more naturally. */
2968 
2969 #define got_wordnum charid
2970                     got_wordnum = trie->states[ state ].wordnum;
2971 
2972 		    if ( got_wordnum ) {
2973 			if ( ! ST.accepted ) {
2974 			    ENTER;
2975 			    /* SAVETMPS; */ /* XXX is this necessary? dmq */
2976 			    bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2977 			    sv_accept_buff=newSV(bufflen *
2978 					    sizeof(reg_trie_accepted) - 1);
2979 			    SvCUR_set(sv_accept_buff, 0);
2980 			    SvPOK_on(sv_accept_buff);
2981 			    sv_2mortal(sv_accept_buff);
2982 			    SAVETMPS;
2983 			    ST.accept_buff =
2984 				(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2985 			}
2986 			do {
2987 			    if (ST.accepted >= bufflen) {
2988 				bufflen *= 2;
2989 				ST.accept_buff =(reg_trie_accepted*)
2990 				    SvGROW(sv_accept_buff,
2991 				       	bufflen * sizeof(reg_trie_accepted));
2992 			    }
2993 			    SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2994 				+ sizeof(reg_trie_accepted));
2995 
2996 
2997 			    ST.accept_buff[ST.accepted].wordnum = got_wordnum;
2998 			    ST.accept_buff[ST.accepted].endpos = uc;
2999 			    ++ST.accepted;
3000 		        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3001 		    }
3002 #undef got_wordnum
3003 
3004 		    DEBUG_TRIE_EXECUTE_r({
3005 		                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3006 			        PerlIO_printf( Perl_debug_log,
3007 			            "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3008 			            2+depth * 2, "", PL_colors[4],
3009 			            (UV)state, (UV)ST.accepted );
3010 		    });
3011 
3012 		    if ( base ) {
3013 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3014 					     uscan, len, uvc, charid, foldlen,
3015 					     foldbuf, uniflags);
3016 
3017 			if (charid &&
3018 			     (base + charid > trie->uniquecharcount )
3019 			     && (base + charid - 1 - trie->uniquecharcount
3020 				    < trie->lasttrans)
3021 			     && trie->trans[base + charid - 1 -
3022 				    trie->uniquecharcount].check == state)
3023 			{
3024 			    state = trie->trans[base + charid - 1 -
3025 				trie->uniquecharcount ].next;
3026 			}
3027 			else {
3028 			    state = 0;
3029 			}
3030 			uc += len;
3031 
3032 		    }
3033 		    else {
3034 			state = 0;
3035 		    }
3036 		    DEBUG_TRIE_EXECUTE_r(
3037 		        PerlIO_printf( Perl_debug_log,
3038 		            "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3039 		            charid, uvc, (UV)state, PL_colors[5] );
3040 		    );
3041 		}
3042 		if (!ST.accepted )
3043 		   sayNO;
3044 
3045 		DEBUG_EXECUTE_r(
3046 		    PerlIO_printf( Perl_debug_log,
3047 			"%*s  %sgot %"IVdf" possible matches%s\n",
3048 			REPORT_CODE_OFF + depth * 2, "",
3049 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3050 		);
3051 	    }}
3052             goto trie_first_try; /* jump into the fail handler */
3053 	    /* NOTREACHED */
3054 	case TRIE_next_fail: /* we failed - try next alterative */
3055             if ( ST.jump) {
3056                 REGCP_UNWIND(ST.cp);
3057 	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
3058 		    PL_regoffs[n].end = -1;
3059 	        *PL_reglastparen = n;
3060 	    }
3061           trie_first_try:
3062             if (do_cutgroup) {
3063                 do_cutgroup = 0;
3064                 no_final = 0;
3065             }
3066 
3067             if ( ST.jump) {
3068                 ST.lastparen = *PL_reglastparen;
3069 	        REGCP_SET(ST.cp);
3070             }
3071 	    if ( ST.accepted == 1 ) {
3072 		/* only one choice left - just continue */
3073 		DEBUG_EXECUTE_r({
3074 		    AV *const trie_words
3075 			= (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3076 		    SV ** const tmp = av_fetch( trie_words,
3077 		        ST.accept_buff[ 0 ].wordnum-1, 0 );
3078 		    SV *sv= tmp ? sv_newmortal() : NULL;
3079 
3080 		    PerlIO_printf( Perl_debug_log,
3081 			"%*s  %sonly one match left: #%d <%s>%s\n",
3082 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3083 			ST.accept_buff[ 0 ].wordnum,
3084 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3085 	                        PL_colors[0], PL_colors[1],
3086 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3087                             )
3088 			: "not compiled under -Dr",
3089 			PL_colors[5] );
3090 		});
3091 		PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3092 		/* in this case we free tmps/leave before we call regmatch
3093 		   as we wont be using accept_buff again. */
3094 
3095 		locinput = PL_reginput;
3096 		nextchr = UCHARAT(locinput);
3097     		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3098     		    scan = ST.B;
3099     		else
3100     		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3101 		if (!has_cutgroup) {
3102 		    FREETMPS;
3103 		    LEAVE;
3104                 } else {
3105                     ST.accepted--;
3106                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3107                 }
3108 
3109 		continue; /* execute rest of RE */
3110 	    }
3111 
3112 	    if ( !ST.accepted-- ) {
3113 	        DEBUG_EXECUTE_r({
3114 		    PerlIO_printf( Perl_debug_log,
3115 			"%*s  %sTRIE failed...%s\n",
3116 			REPORT_CODE_OFF+depth*2, "",
3117 			PL_colors[4],
3118 			PL_colors[5] );
3119 		});
3120 		FREETMPS;
3121 		LEAVE;
3122 		sayNO_SILENT;
3123 		/*NOTREACHED*/
3124 	    }
3125 
3126 	    /*
3127 	       There are at least two accepting states left.  Presumably
3128 	       the number of accepting states is going to be low,
3129 	       typically two. So we simply scan through to find the one
3130 	       with lowest wordnum.  Once we find it, we swap the last
3131 	       state into its place and decrement the size. We then try to
3132 	       match the rest of the pattern at the point where the word
3133 	       ends. If we succeed, control just continues along the
3134 	       regex; if we fail we return here to try the next accepting
3135 	       state
3136 	     */
3137 
3138 	    {
3139 		U32 best = 0;
3140 		U32 cur;
3141 		for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3142 		    DEBUG_TRIE_EXECUTE_r(
3143 			PerlIO_printf( Perl_debug_log,
3144 			    "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3145 			    REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3146 			    (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3147 			    ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3148 		    );
3149 
3150 		    if (ST.accept_buff[cur].wordnum <
3151 			    ST.accept_buff[best].wordnum)
3152 			best = cur;
3153 		}
3154 
3155 		DEBUG_EXECUTE_r({
3156 		    AV *const trie_words
3157 			= (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
3158 		    SV ** const tmp = av_fetch( trie_words,
3159 		        ST.accept_buff[ best ].wordnum - 1, 0 );
3160 		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3161 		                    ST.B :
3162 		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];
3163 		    SV *sv= tmp ? sv_newmortal() : NULL;
3164 
3165 		    PerlIO_printf( Perl_debug_log,
3166 		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3167 			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3168 			ST.accept_buff[best].wordnum,
3169 			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3170 	                        PL_colors[0], PL_colors[1],
3171 	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3172                             ) : "not compiled under -Dr",
3173 			    REG_NODE_NUM(nextop),
3174 			PL_colors[5] );
3175 		});
3176 
3177 		if ( best<ST.accepted ) {
3178 		    reg_trie_accepted tmp = ST.accept_buff[ best ];
3179 		    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3180 		    ST.accept_buff[ ST.accepted ] = tmp;
3181 		    best = ST.accepted;
3182 		}
3183 		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3184 		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3185 		    scan = ST.B;
3186 		} else {
3187 		    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3188                 }
3189                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3190                 /* NOTREACHED */
3191 	    }
3192 	    /* NOTREACHED */
3193         case TRIE_next:
3194             FREETMPS;
3195 	    LEAVE;
3196 	    sayYES;
3197 #undef  ST
3198 
3199 	case EXACT: {
3200 	    char *s = STRING(scan);
3201 	    ln = STR_LEN(scan);
3202 	    if (do_utf8 != UTF) {
3203 		/* The target and the pattern have differing utf8ness. */
3204 		char *l = locinput;
3205 		const char * const e = s + ln;
3206 
3207 		if (do_utf8) {
3208 		    /* The target is utf8, the pattern is not utf8. */
3209 		    while (s < e) {
3210 			STRLEN ulen;
3211 			if (l >= PL_regeol)
3212 			     sayNO;
3213 			if (NATIVE_TO_UNI(*(U8*)s) !=
3214 			    utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3215 					    uniflags))
3216 			     sayNO;
3217 			l += ulen;
3218 			s ++;
3219 		    }
3220 		}
3221 		else {
3222 		    /* The target is not utf8, the pattern is utf8. */
3223 		    while (s < e) {
3224 			STRLEN ulen;
3225 			if (l >= PL_regeol)
3226 			    sayNO;
3227 			if (NATIVE_TO_UNI(*((U8*)l)) !=
3228 			    utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3229 					   uniflags))
3230 			    sayNO;
3231 			s += ulen;
3232 			l ++;
3233 		    }
3234 		}
3235 		locinput = l;
3236 		nextchr = UCHARAT(locinput);
3237 		break;
3238 	    }
3239 	    /* The target and the pattern have the same utf8ness. */
3240 	    /* Inline the first character, for speed. */
3241 	    if (UCHARAT(s) != nextchr)
3242 		sayNO;
3243 	    if (PL_regeol - locinput < ln)
3244 		sayNO;
3245 	    if (ln > 1 && memNE(s, locinput, ln))
3246 		sayNO;
3247 	    locinput += ln;
3248 	    nextchr = UCHARAT(locinput);
3249 	    break;
3250 	    }
3251 	case EXACTFL:
3252 	    PL_reg_flags |= RF_tainted;
3253 	    /* FALL THROUGH */
3254 	case EXACTF: {
3255 	    char * const s = STRING(scan);
3256 	    ln = STR_LEN(scan);
3257 
3258 	    if (do_utf8 || UTF) {
3259 	      /* Either target or the pattern are utf8. */
3260 		const char * const l = locinput;
3261 		char *e = PL_regeol;
3262 
3263 		if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3264 			       l, &e, 0,  do_utf8)) {
3265 		     /* One more case for the sharp s:
3266 		      * pack("U0U*", 0xDF) =~ /ss/i,
3267 		      * the 0xC3 0x9F are the UTF-8
3268 		      * byte sequence for the U+00DF. */
3269 
3270 		     if (!(do_utf8 &&
3271 		           toLOWER(s[0]) == 's' &&
3272 			   ln >= 2 &&
3273 			   toLOWER(s[1]) == 's' &&
3274 			   (U8)l[0] == 0xC3 &&
3275 			   e - l >= 2 &&
3276 			   (U8)l[1] == 0x9F))
3277 			  sayNO;
3278 		}
3279 		locinput = e;
3280 		nextchr = UCHARAT(locinput);
3281 		break;
3282 	    }
3283 
3284 	    /* Neither the target and the pattern are utf8. */
3285 
3286 	    /* Inline the first character, for speed. */
3287 	    if (UCHARAT(s) != nextchr &&
3288 		UCHARAT(s) != ((OP(scan) == EXACTF)
3289 			       ? PL_fold : PL_fold_locale)[nextchr])
3290 		sayNO;
3291 	    if (PL_regeol - locinput < ln)
3292 		sayNO;
3293 	    if (ln > 1 && (OP(scan) == EXACTF
3294 			   ? ibcmp(s, locinput, ln)
3295 			   : ibcmp_locale(s, locinput, ln)))
3296 		sayNO;
3297 	    locinput += ln;
3298 	    nextchr = UCHARAT(locinput);
3299 	    break;
3300 	    }
3301 	case ANYOF:
3302 	    if (do_utf8) {
3303 	        STRLEN inclasslen = PL_regeol - locinput;
3304 
3305 	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3306 		    goto anyof_fail;
3307 		if (locinput >= PL_regeol)
3308 		    sayNO;
3309 		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3310 		nextchr = UCHARAT(locinput);
3311 		break;
3312 	    }
3313 	    else {
3314 		if (nextchr < 0)
3315 		    nextchr = UCHARAT(locinput);
3316 		if (!REGINCLASS(rex, scan, (U8*)locinput))
3317 		    goto anyof_fail;
3318 		if (!nextchr && locinput >= PL_regeol)
3319 		    sayNO;
3320 		nextchr = UCHARAT(++locinput);
3321 		break;
3322 	    }
3323 	anyof_fail:
3324 	    /* If we might have the case of the German sharp s
3325 	     * in a casefolding Unicode character class. */
3326 
3327 	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3328 		 locinput += SHARP_S_SKIP;
3329 		 nextchr = UCHARAT(locinput);
3330 	    }
3331 	    else
3332 		 sayNO;
3333 	    break;
3334 	case ALNUML:
3335 	    PL_reg_flags |= RF_tainted;
3336 	    /* FALL THROUGH */
3337 	case ALNUM:
3338 	    if (!nextchr)
3339 		sayNO;
3340 	    if (do_utf8) {
3341 		LOAD_UTF8_CHARCLASS_ALNUM();
3342 		if (!(OP(scan) == ALNUM
3343 		      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3344 		      : isALNUM_LC_utf8((U8*)locinput)))
3345 		{
3346 		    sayNO;
3347 		}
3348 		locinput += PL_utf8skip[nextchr];
3349 		nextchr = UCHARAT(locinput);
3350 		break;
3351 	    }
3352 	    if (!(OP(scan) == ALNUM
3353 		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3354 		sayNO;
3355 	    nextchr = UCHARAT(++locinput);
3356 	    break;
3357 	case NALNUML:
3358 	    PL_reg_flags |= RF_tainted;
3359 	    /* FALL THROUGH */
3360 	case NALNUM:
3361 	    if (!nextchr && locinput >= PL_regeol)
3362 		sayNO;
3363 	    if (do_utf8) {
3364 		LOAD_UTF8_CHARCLASS_ALNUM();
3365 		if (OP(scan) == NALNUM
3366 		    ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3367 		    : isALNUM_LC_utf8((U8*)locinput))
3368 		{
3369 		    sayNO;
3370 		}
3371 		locinput += PL_utf8skip[nextchr];
3372 		nextchr = UCHARAT(locinput);
3373 		break;
3374 	    }
3375 	    if (OP(scan) == NALNUM
3376 		? isALNUM(nextchr) : isALNUM_LC(nextchr))
3377 		sayNO;
3378 	    nextchr = UCHARAT(++locinput);
3379 	    break;
3380 	case BOUNDL:
3381 	case NBOUNDL:
3382 	    PL_reg_flags |= RF_tainted;
3383 	    /* FALL THROUGH */
3384 	case BOUND:
3385 	case NBOUND:
3386 	    /* was last char in word? */
3387 	    if (do_utf8) {
3388 		if (locinput == PL_bostr)
3389 		    ln = '\n';
3390 		else {
3391 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3392 
3393 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3394 		}
3395 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3396 		    ln = isALNUM_uni(ln);
3397 		    LOAD_UTF8_CHARCLASS_ALNUM();
3398 		    n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3399 		}
3400 		else {
3401 		    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3402 		    n = isALNUM_LC_utf8((U8*)locinput);
3403 		}
3404 	    }
3405 	    else {
3406 		ln = (locinput != PL_bostr) ?
3407 		    UCHARAT(locinput - 1) : '\n';
3408 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3409 		    ln = isALNUM(ln);
3410 		    n = isALNUM(nextchr);
3411 		}
3412 		else {
3413 		    ln = isALNUM_LC(ln);
3414 		    n = isALNUM_LC(nextchr);
3415 		}
3416 	    }
3417 	    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3418 				    OP(scan) == BOUNDL))
3419 		    sayNO;
3420 	    break;
3421 	case SPACEL:
3422 	    PL_reg_flags |= RF_tainted;
3423 	    /* FALL THROUGH */
3424 	case SPACE:
3425 	    if (!nextchr)
3426 		sayNO;
3427 	    if (do_utf8) {
3428 		if (UTF8_IS_CONTINUED(nextchr)) {
3429 		    LOAD_UTF8_CHARCLASS_SPACE();
3430 		    if (!(OP(scan) == SPACE
3431 			  ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3432 			  : isSPACE_LC_utf8((U8*)locinput)))
3433 		    {
3434 			sayNO;
3435 		    }
3436 		    locinput += PL_utf8skip[nextchr];
3437 		    nextchr = UCHARAT(locinput);
3438 		    break;
3439 		}
3440 		if (!(OP(scan) == SPACE
3441 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3442 		    sayNO;
3443 		nextchr = UCHARAT(++locinput);
3444 	    }
3445 	    else {
3446 		if (!(OP(scan) == SPACE
3447 		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3448 		    sayNO;
3449 		nextchr = UCHARAT(++locinput);
3450 	    }
3451 	    break;
3452 	case NSPACEL:
3453 	    PL_reg_flags |= RF_tainted;
3454 	    /* FALL THROUGH */
3455 	case NSPACE:
3456 	    if (!nextchr && locinput >= PL_regeol)
3457 		sayNO;
3458 	    if (do_utf8) {
3459 		LOAD_UTF8_CHARCLASS_SPACE();
3460 		if (OP(scan) == NSPACE
3461 		    ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3462 		    : isSPACE_LC_utf8((U8*)locinput))
3463 		{
3464 		    sayNO;
3465 		}
3466 		locinput += PL_utf8skip[nextchr];
3467 		nextchr = UCHARAT(locinput);
3468 		break;
3469 	    }
3470 	    if (OP(scan) == NSPACE
3471 		? isSPACE(nextchr) : isSPACE_LC(nextchr))
3472 		sayNO;
3473 	    nextchr = UCHARAT(++locinput);
3474 	    break;
3475 	case DIGITL:
3476 	    PL_reg_flags |= RF_tainted;
3477 	    /* FALL THROUGH */
3478 	case DIGIT:
3479 	    if (!nextchr)
3480 		sayNO;
3481 	    if (do_utf8) {
3482 		LOAD_UTF8_CHARCLASS_DIGIT();
3483 		if (!(OP(scan) == DIGIT
3484 		      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3485 		      : isDIGIT_LC_utf8((U8*)locinput)))
3486 		{
3487 		    sayNO;
3488 		}
3489 		locinput += PL_utf8skip[nextchr];
3490 		nextchr = UCHARAT(locinput);
3491 		break;
3492 	    }
3493 	    if (!(OP(scan) == DIGIT
3494 		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3495 		sayNO;
3496 	    nextchr = UCHARAT(++locinput);
3497 	    break;
3498 	case NDIGITL:
3499 	    PL_reg_flags |= RF_tainted;
3500 	    /* FALL THROUGH */
3501 	case NDIGIT:
3502 	    if (!nextchr && locinput >= PL_regeol)
3503 		sayNO;
3504 	    if (do_utf8) {
3505 		LOAD_UTF8_CHARCLASS_DIGIT();
3506 		if (OP(scan) == NDIGIT
3507 		    ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3508 		    : isDIGIT_LC_utf8((U8*)locinput))
3509 		{
3510 		    sayNO;
3511 		}
3512 		locinput += PL_utf8skip[nextchr];
3513 		nextchr = UCHARAT(locinput);
3514 		break;
3515 	    }
3516 	    if (OP(scan) == NDIGIT
3517 		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3518 		sayNO;
3519 	    nextchr = UCHARAT(++locinput);
3520 	    break;
3521 	case CLUMP:
3522 	    if (locinput >= PL_regeol)
3523 		sayNO;
3524 	    if  (do_utf8) {
3525 		LOAD_UTF8_CHARCLASS_MARK();
3526 		if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3527 		    sayNO;
3528 		locinput += PL_utf8skip[nextchr];
3529 		while (locinput < PL_regeol &&
3530 		       swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3531 		    locinput += UTF8SKIP(locinput);
3532 		if (locinput > PL_regeol)
3533 		    sayNO;
3534 	    }
3535 	    else
3536 	       locinput++;
3537 	    nextchr = UCHARAT(locinput);
3538 	    break;
3539 
3540 	case NREFFL:
3541 	{
3542 	    char *s;
3543 	    char type;
3544 	    PL_reg_flags |= RF_tainted;
3545 	    /* FALL THROUGH */
3546 	case NREF:
3547 	case NREFF:
3548 	    type = OP(scan);
3549 	    n = reg_check_named_buff_matched(rex,scan);
3550 
3551             if ( n ) {
3552                 type = REF + ( type - NREF );
3553                 goto do_ref;
3554             } else {
3555                 sayNO;
3556             }
3557             /* unreached */
3558 	case REFFL:
3559 	    PL_reg_flags |= RF_tainted;
3560 	    /* FALL THROUGH */
3561         case REF:
3562 	case REFF:
3563 	    n = ARG(scan);  /* which paren pair */
3564 	    type = OP(scan);
3565 	  do_ref:
3566 	    ln = PL_regoffs[n].start;
3567 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3568 	    if (*PL_reglastparen < n || ln == -1)
3569 		sayNO;			/* Do not match unless seen CLOSEn. */
3570 	    if (ln == PL_regoffs[n].end)
3571 		break;
3572 
3573 	    s = PL_bostr + ln;
3574 	    if (do_utf8 && type != REF) {	/* REF can do byte comparison */
3575 		char *l = locinput;
3576 		const char *e = PL_bostr + PL_regoffs[n].end;
3577 		/*
3578 		 * Note that we can't do the "other character" lookup trick as
3579 		 * in the 8-bit case (no pun intended) because in Unicode we
3580 		 * have to map both upper and title case to lower case.
3581 		 */
3582 		if (type == REFF) {
3583 		    while (s < e) {
3584 			STRLEN ulen1, ulen2;
3585 			U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3586 			U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3587 
3588 			if (l >= PL_regeol)
3589 			    sayNO;
3590 			toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3591 			toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3592 			if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3593 			    sayNO;
3594 			s += ulen1;
3595 			l += ulen2;
3596 		    }
3597 		}
3598 		locinput = l;
3599 		nextchr = UCHARAT(locinput);
3600 		break;
3601 	    }
3602 
3603 	    /* Inline the first character, for speed. */
3604 	    if (UCHARAT(s) != nextchr &&
3605 		(type == REF ||
3606 		 (UCHARAT(s) != (type == REFF
3607 				  ? PL_fold : PL_fold_locale)[nextchr])))
3608 		sayNO;
3609 	    ln = PL_regoffs[n].end - ln;
3610 	    if (locinput + ln > PL_regeol)
3611 		sayNO;
3612 	    if (ln > 1 && (type == REF
3613 			   ? memNE(s, locinput, ln)
3614 			   : (type == REFF
3615 			      ? ibcmp(s, locinput, ln)
3616 			      : ibcmp_locale(s, locinput, ln))))
3617 		sayNO;
3618 	    locinput += ln;
3619 	    nextchr = UCHARAT(locinput);
3620 	    break;
3621 	}
3622 	case NOTHING:
3623 	case TAIL:
3624 	    break;
3625 	case BACK:
3626 	    break;
3627 
3628 #undef  ST
3629 #define ST st->u.eval
3630 	{
3631 	    SV *ret;
3632             regexp *re;
3633             regexp_internal *rei;
3634             regnode *startpoint;
3635 
3636 	case GOSTART:
3637 	case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3638 	    if (cur_eval && cur_eval->locinput==locinput) {
3639                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3640                     Perl_croak(aTHX_ "Infinite recursion in regex");
3641                 if ( ++nochange_depth > max_nochange_depth )
3642                     Perl_croak(aTHX_
3643                         "Pattern subroutine nesting without pos change"
3644                         " exceeded limit in regex");
3645             } else {
3646                 nochange_depth = 0;
3647             }
3648             re = rex;
3649             rei = rexi;
3650             (void)ReREFCNT_inc(rex);
3651             if (OP(scan)==GOSUB) {
3652                 startpoint = scan + ARG2L(scan);
3653                 ST.close_paren = ARG(scan);
3654             } else {
3655                 startpoint = rei->program+1;
3656                 ST.close_paren = 0;
3657             }
3658             goto eval_recurse_doit;
3659             /* NOTREACHED */
3660         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3661             if (cur_eval && cur_eval->locinput==locinput) {
3662 		if ( ++nochange_depth > max_nochange_depth )
3663                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3664             } else {
3665                 nochange_depth = 0;
3666             }
3667 	    {
3668 		/* execute the code in the {...} */
3669 		dSP;
3670 		SV ** const before = SP;
3671 		OP_4tree * const oop = PL_op;
3672 		COP * const ocurcop = PL_curcop;
3673 		PAD *old_comppad;
3674 
3675 		n = ARG(scan);
3676 		PL_op = (OP_4tree*)rexi->data->data[n];
3677 		DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3678 		    "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3679 		PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3680 		PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3681 
3682                 if (sv_yes_mark) {
3683                     SV *sv_mrk = get_sv("REGMARK", 1);
3684                     sv_setsv(sv_mrk, sv_yes_mark);
3685                 }
3686 
3687 		CALLRUNOPS(aTHX);			/* Scalar context. */
3688 		SPAGAIN;
3689 		if (SP == before)
3690 		    ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3691 		else {
3692 		    ret = POPs;
3693 		    PUTBACK;
3694 		}
3695 
3696 		PL_op = oop;
3697 		PAD_RESTORE_LOCAL(old_comppad);
3698 		PL_curcop = ocurcop;
3699 		if (!logical) {
3700 		    /* /(?{...})/ */
3701 		    sv_setsv(save_scalar(PL_replgv), ret);
3702 		    break;
3703 		}
3704 	    }
3705 	    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3706 		logical = 0;
3707 		{
3708 		    /* extract RE object from returned value; compiling if
3709 		     * necessary */
3710 
3711 		    MAGIC *mg = NULL;
3712 		    const SV *sv;
3713 		    if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3714 			mg = mg_find(sv, PERL_MAGIC_qr);
3715 		    else if (SvSMAGICAL(ret)) {
3716 			if (SvGMAGICAL(ret))
3717 			    sv_unmagic(ret, PERL_MAGIC_qr);
3718 			else
3719 			    mg = mg_find(ret, PERL_MAGIC_qr);
3720 		    }
3721 
3722 		    if (mg) {
3723 			re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3724 		    }
3725 		    else {
3726 			U32 pm_flags = 0;
3727 			const I32 osize = PL_regsize;
3728 
3729 			if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3730 			re = CALLREGCOMP(ret, pm_flags);
3731 			if (!(SvFLAGS(ret)
3732 			      & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3733 				| SVs_GMG)))
3734 			    sv_magic(ret,(SV*)ReREFCNT_inc(re),
3735 					PERL_MAGIC_qr,0,0);
3736 			PL_regsize = osize;
3737 		    }
3738 		}
3739                 RX_MATCH_COPIED_off(re);
3740                 re->subbeg = rex->subbeg;
3741                 re->sublen = rex->sublen;
3742 		rei = RXi_GET(re);
3743                 DEBUG_EXECUTE_r(
3744                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3745                         "Matching embedded");
3746 		);
3747 		startpoint = rei->program + 1;
3748                	ST.close_paren = 0; /* only used for GOSUB */
3749                	/* borrowed from regtry */
3750                 if (PL_reg_start_tmpl <= re->nparens) {
3751                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3752                     if(PL_reg_start_tmp)
3753                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3754                     else
3755                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3756                 }
3757 
3758         eval_recurse_doit: /* Share code with GOSUB below this line */
3759 		/* run the pattern returned from (??{...}) */
3760 		ST.cp = regcppush(0);	/* Save *all* the positions. */
3761 		REGCP_SET(ST.lastcp);
3762 
3763 		PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3764 
3765 		*PL_reglastparen = 0;
3766 		*PL_reglastcloseparen = 0;
3767 		PL_reginput = locinput;
3768 		PL_regsize = 0;
3769 
3770 		/* XXXX This is too dramatic a measure... */
3771 		PL_reg_maxiter = 0;
3772 
3773 		ST.toggle_reg_flags = PL_reg_flags;
3774 		if (re->extflags & RXf_UTF8)
3775 		    PL_reg_flags |= RF_utf8;
3776 		else
3777 		    PL_reg_flags &= ~RF_utf8;
3778 		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3779 
3780 		ST.prev_rex = rex;
3781 		ST.prev_curlyx = cur_curlyx;
3782 		SETREX(rex,re);
3783 		rexi = rei;
3784 		cur_curlyx = NULL;
3785 		ST.B = next;
3786 		ST.prev_eval = cur_eval;
3787 		cur_eval = st;
3788 		/* now continue from first node in postoned RE */
3789 		PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3790 		/* NOTREACHED */
3791 	    }
3792 	    /* logical is 1,   /(?(?{...})X|Y)/ */
3793 	    sw = (bool)SvTRUE(ret);
3794 	    logical = 0;
3795 	    break;
3796 	}
3797 
3798 	case EVAL_AB: /* cleanup after a successful (??{A})B */
3799 	    /* note: this is called twice; first after popping B, then A */
3800 	    PL_reg_flags ^= ST.toggle_reg_flags;
3801 	    ReREFCNT_dec(rex);
3802 	    SETREX(rex,ST.prev_rex);
3803 	    rexi = RXi_GET(rex);
3804 	    regcpblow(ST.cp);
3805 	    cur_eval = ST.prev_eval;
3806 	    cur_curlyx = ST.prev_curlyx;
3807 	    /* XXXX This is too dramatic a measure... */
3808 	    PL_reg_maxiter = 0;
3809             if ( nochange_depth )
3810 	        nochange_depth--;
3811 	    sayYES;
3812 
3813 
3814 	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3815 	    /* note: this is called twice; first after popping B, then A */
3816 	    PL_reg_flags ^= ST.toggle_reg_flags;
3817 	    ReREFCNT_dec(rex);
3818 	    SETREX(rex,ST.prev_rex);
3819 	    rexi = RXi_GET(rex);
3820 	    PL_reginput = locinput;
3821 	    REGCP_UNWIND(ST.lastcp);
3822 	    regcppop(rex);
3823 	    cur_eval = ST.prev_eval;
3824 	    cur_curlyx = ST.prev_curlyx;
3825 	    /* XXXX This is too dramatic a measure... */
3826 	    PL_reg_maxiter = 0;
3827 	    if ( nochange_depth )
3828 	        nochange_depth--;
3829 	    sayNO_SILENT;
3830 #undef ST
3831 
3832 	case OPEN:
3833 	    n = ARG(scan);  /* which paren pair */
3834 	    PL_reg_start_tmp[n] = locinput;
3835 	    if (n > PL_regsize)
3836 		PL_regsize = n;
3837             lastopen = n;
3838 	    break;
3839 	case CLOSE:
3840 	    n = ARG(scan);  /* which paren pair */
3841 	    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3842 	    PL_regoffs[n].end = locinput - PL_bostr;
3843 	    /*if (n > PL_regsize)
3844 		PL_regsize = n;*/
3845 	    if (n > *PL_reglastparen)
3846 		*PL_reglastparen = n;
3847 	    *PL_reglastcloseparen = n;
3848             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3849 	        goto fake_end;
3850 	    }
3851 	    break;
3852         case ACCEPT:
3853             if (ARG(scan)){
3854                 regnode *cursor;
3855                 for (cursor=scan;
3856                      cursor && OP(cursor)!=END;
3857                      cursor=regnext(cursor))
3858                 {
3859                     if ( OP(cursor)==CLOSE ){
3860                         n = ARG(cursor);
3861                         if ( n <= lastopen ) {
3862                             PL_regoffs[n].start
3863 				= PL_reg_start_tmp[n] - PL_bostr;
3864                             PL_regoffs[n].end = locinput - PL_bostr;
3865                             /*if (n > PL_regsize)
3866                             PL_regsize = n;*/
3867                             if (n > *PL_reglastparen)
3868                                 *PL_reglastparen = n;
3869                             *PL_reglastcloseparen = n;
3870                             if ( n == ARG(scan) || (cur_eval &&
3871                                 cur_eval->u.eval.close_paren == n))
3872                                 break;
3873                         }
3874                     }
3875                 }
3876             }
3877 	    goto fake_end;
3878 	    /*NOTREACHED*/
3879 	case GROUPP:
3880 	    n = ARG(scan);  /* which paren pair */
3881 	    sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3882 	    break;
3883 	case NGROUPP:
3884 	    /* reg_check_named_buff_matched returns 0 for no match */
3885 	    sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3886 	    break;
3887         case INSUBP:
3888             n = ARG(scan);
3889             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3890             break;
3891         case DEFINEP:
3892             sw = 0;
3893             break;
3894 	case IFTHEN:
3895 	    PL_reg_leftiter = PL_reg_maxiter;		/* Void cache */
3896 	    if (sw)
3897 		next = NEXTOPER(NEXTOPER(scan));
3898 	    else {
3899 		next = scan + ARG(scan);
3900 		if (OP(next) == IFTHEN) /* Fake one. */
3901 		    next = NEXTOPER(NEXTOPER(next));
3902 	    }
3903 	    break;
3904 	case LOGICAL:
3905 	    logical = scan->flags;
3906 	    break;
3907 
3908 /*******************************************************************
3909 
3910 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3911 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3912 STAR/PLUS/CURLY/CURLYN are used instead.)
3913 
3914 A*B is compiled as <CURLYX><A><WHILEM><B>
3915 
3916 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3917 state, which contains the current count, initialised to -1. It also sets
3918 cur_curlyx to point to this state, with any previous value saved in the
3919 state block.
3920 
3921 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3922 since the pattern may possibly match zero times (i.e. it's a while {} loop
3923 rather than a do {} while loop).
3924 
3925 Each entry to WHILEM represents a successful match of A. The count in the
3926 CURLYX block is incremented, another WHILEM state is pushed, and execution
3927 passes to A or B depending on greediness and the current count.
3928 
3929 For example, if matching against the string a1a2a3b (where the aN are
3930 substrings that match /A/), then the match progresses as follows: (the
3931 pushed states are interspersed with the bits of strings matched so far):
3932 
3933     <CURLYX cnt=-1>
3934     <CURLYX cnt=0><WHILEM>
3935     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3936     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3937     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3938     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3939 
3940 (Contrast this with something like CURLYM, which maintains only a single
3941 backtrack state:
3942 
3943     <CURLYM cnt=0> a1
3944     a1 <CURLYM cnt=1> a2
3945     a1 a2 <CURLYM cnt=2> a3
3946     a1 a2 a3 <CURLYM cnt=3> b
3947 )
3948 
3949 Each WHILEM state block marks a point to backtrack to upon partial failure
3950 of A or B, and also contains some minor state data related to that
3951 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
3952 overall state, such as the count, and pointers to the A and B ops.
3953 
3954 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
3955 must always point to the *current* CURLYX block, the rules are:
3956 
3957 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
3958 and set cur_curlyx to point the new block.
3959 
3960 When popping the CURLYX block after a successful or unsuccessful match,
3961 restore the previous cur_curlyx.
3962 
3963 When WHILEM is about to execute B, save the current cur_curlyx, and set it
3964 to the outer one saved in the CURLYX block.
3965 
3966 When popping the WHILEM block after a successful or unsuccessful B match,
3967 restore the previous cur_curlyx.
3968 
3969 Here's an example for the pattern (AI* BI)*BO
3970 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
3971 
3972 cur_
3973 curlyx backtrack stack
3974 ------ ---------------
3975 NULL
3976 CO     <CO prev=NULL> <WO>
3977 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3978 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3979 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
3980 
3981 At this point the pattern succeeds, and we work back down the stack to
3982 clean up, restoring as we go:
3983 
3984 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
3985 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
3986 CO     <CO prev=NULL> <WO>
3987 NULL
3988 
3989 *******************************************************************/
3990 
3991 #define ST st->u.curlyx
3992 
3993 	case CURLYX:    /* start of /A*B/  (for complex A) */
3994 	{
3995 	    /* No need to save/restore up to this paren */
3996 	    I32 parenfloor = scan->flags;
3997 
3998 	    assert(next); /* keep Coverity happy */
3999 	    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4000 		next += ARG(next);
4001 
4002 	    /* XXXX Probably it is better to teach regpush to support
4003 	       parenfloor > PL_regsize... */
4004 	    if (parenfloor > (I32)*PL_reglastparen)
4005 		parenfloor = *PL_reglastparen; /* Pessimization... */
4006 
4007 	    ST.prev_curlyx= cur_curlyx;
4008 	    cur_curlyx = st;
4009 	    ST.cp = PL_savestack_ix;
4010 
4011 	    /* these fields contain the state of the current curly.
4012 	     * they are accessed by subsequent WHILEMs */
4013 	    ST.parenfloor = parenfloor;
4014 	    ST.min = ARG1(scan);
4015 	    ST.max = ARG2(scan);
4016 	    ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4017 	    ST.B = next;
4018 	    ST.minmod = minmod;
4019 	    minmod = 0;
4020 	    ST.count = -1;	/* this will be updated by WHILEM */
4021 	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4022 
4023 	    PL_reginput = locinput;
4024 	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4025 	    /* NOTREACHED */
4026 	}
4027 
4028 	case CURLYX_end: /* just finished matching all of A*B */
4029 	    cur_curlyx = ST.prev_curlyx;
4030 	    sayYES;
4031 	    /* NOTREACHED */
4032 
4033 	case CURLYX_end_fail: /* just failed to match all of A*B */
4034 	    regcpblow(ST.cp);
4035 	    cur_curlyx = ST.prev_curlyx;
4036 	    sayNO;
4037 	    /* NOTREACHED */
4038 
4039 
4040 #undef ST
4041 #define ST st->u.whilem
4042 
4043 	case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4044 	{
4045 	    /* see the discussion above about CURLYX/WHILEM */
4046 	    I32 n;
4047 	    assert(cur_curlyx); /* keep Coverity happy */
4048 	    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4049 	    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4050 	    ST.cache_offset = 0;
4051 	    ST.cache_mask = 0;
4052 
4053 	    PL_reginput = locinput;
4054 
4055 	    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4056 		  "%*s  whilem: matched %ld out of %ld..%ld\n",
4057 		  REPORT_CODE_OFF+depth*2, "", (long)n,
4058 		  (long)cur_curlyx->u.curlyx.min,
4059 		  (long)cur_curlyx->u.curlyx.max)
4060 	    );
4061 
4062 	    /* First just match a string of min A's. */
4063 
4064 	    if (n < cur_curlyx->u.curlyx.min) {
4065 		cur_curlyx->u.curlyx.lastloc = locinput;
4066 		PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4067 		/* NOTREACHED */
4068 	    }
4069 
4070 	    /* If degenerate A matches "", assume A done. */
4071 
4072 	    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4073 		DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4074 		   "%*s  whilem: empty match detected, trying continuation...\n",
4075 		   REPORT_CODE_OFF+depth*2, "")
4076 		);
4077 		goto do_whilem_B_max;
4078 	    }
4079 
4080 	    /* super-linear cache processing */
4081 
4082 	    if (scan->flags) {
4083 
4084 		if (!PL_reg_maxiter) {
4085 		    /* start the countdown: Postpone detection until we
4086 		     * know the match is not *that* much linear. */
4087 		    PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4088 		    /* possible overflow for long strings and many CURLYX's */
4089 		    if (PL_reg_maxiter < 0)
4090 			PL_reg_maxiter = I32_MAX;
4091 		    PL_reg_leftiter = PL_reg_maxiter;
4092 		}
4093 
4094 		if (PL_reg_leftiter-- == 0) {
4095 		    /* initialise cache */
4096 		    const I32 size = (PL_reg_maxiter + 7)/8;
4097 		    if (PL_reg_poscache) {
4098 			if ((I32)PL_reg_poscache_size < size) {
4099 			    Renew(PL_reg_poscache, size, char);
4100 			    PL_reg_poscache_size = size;
4101 			}
4102 			Zero(PL_reg_poscache, size, char);
4103 		    }
4104 		    else {
4105 			PL_reg_poscache_size = size;
4106 			Newxz(PL_reg_poscache, size, char);
4107 		    }
4108 		    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4109       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4110 			      PL_colors[4], PL_colors[5])
4111 		    );
4112 		}
4113 
4114 		if (PL_reg_leftiter < 0) {
4115 		    /* have we already failed at this position? */
4116 		    I32 offset, mask;
4117 		    offset  = (scan->flags & 0xf) - 1
4118 		  		+ (locinput - PL_bostr)  * (scan->flags>>4);
4119 		    mask    = 1 << (offset % 8);
4120 		    offset /= 8;
4121 		    if (PL_reg_poscache[offset] & mask) {
4122 			DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4123 			    "%*s  whilem: (cache) already tried at this position...\n",
4124 			    REPORT_CODE_OFF+depth*2, "")
4125 			);
4126 			sayNO; /* cache records failure */
4127 		    }
4128 		    ST.cache_offset = offset;
4129 		    ST.cache_mask   = mask;
4130 		}
4131 	    }
4132 
4133 	    /* Prefer B over A for minimal matching. */
4134 
4135 	    if (cur_curlyx->u.curlyx.minmod) {
4136 		ST.save_curlyx = cur_curlyx;
4137 		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4138 		ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4139 		REGCP_SET(ST.lastcp);
4140 		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4141 		/* NOTREACHED */
4142 	    }
4143 
4144 	    /* Prefer A over B for maximal matching. */
4145 
4146 	    if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4147 		ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4148 		cur_curlyx->u.curlyx.lastloc = locinput;
4149 		REGCP_SET(ST.lastcp);
4150 		PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4151 		/* NOTREACHED */
4152 	    }
4153 	    goto do_whilem_B_max;
4154 	}
4155 	/* NOTREACHED */
4156 
4157 	case WHILEM_B_min: /* just matched B in a minimal match */
4158 	case WHILEM_B_max: /* just matched B in a maximal match */
4159 	    cur_curlyx = ST.save_curlyx;
4160 	    sayYES;
4161 	    /* NOTREACHED */
4162 
4163 	case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4164 	    cur_curlyx = ST.save_curlyx;
4165 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4166 	    cur_curlyx->u.curlyx.count--;
4167 	    CACHEsayNO;
4168 	    /* NOTREACHED */
4169 
4170 	case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4171 	    REGCP_UNWIND(ST.lastcp);
4172 	    regcppop(rex);
4173 	    /* FALL THROUGH */
4174 	case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4175 	    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4176 	    cur_curlyx->u.curlyx.count--;
4177 	    CACHEsayNO;
4178 	    /* NOTREACHED */
4179 
4180 	case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4181 	    REGCP_UNWIND(ST.lastcp);
4182 	    regcppop(rex);	/* Restore some previous $<digit>s? */
4183 	    PL_reginput = locinput;
4184 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4185 		"%*s  whilem: failed, trying continuation...\n",
4186 		REPORT_CODE_OFF+depth*2, "")
4187 	    );
4188 	  do_whilem_B_max:
4189 	    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4190 		&& ckWARN(WARN_REGEXP)
4191 		&& !(PL_reg_flags & RF_warned))
4192 	    {
4193 		PL_reg_flags |= RF_warned;
4194 		Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4195 		     "Complex regular subexpression recursion",
4196 		     REG_INFTY - 1);
4197 	    }
4198 
4199 	    /* now try B */
4200 	    ST.save_curlyx = cur_curlyx;
4201 	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4202 	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4203 	    /* NOTREACHED */
4204 
4205 	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4206 	    cur_curlyx = ST.save_curlyx;
4207 	    REGCP_UNWIND(ST.lastcp);
4208 	    regcppop(rex);
4209 
4210 	    if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4211 		/* Maximum greed exceeded */
4212 		if (cur_curlyx->u.curlyx.count >= REG_INFTY
4213 		    && ckWARN(WARN_REGEXP)
4214 		    && !(PL_reg_flags & RF_warned))
4215 		{
4216 		    PL_reg_flags |= RF_warned;
4217 		    Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4218 			"%s limit (%d) exceeded",
4219 			"Complex regular subexpression recursion",
4220 			REG_INFTY - 1);
4221 		}
4222 		cur_curlyx->u.curlyx.count--;
4223 		CACHEsayNO;
4224 	    }
4225 
4226 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4227 		"%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4228 	    );
4229 	    /* Try grabbing another A and see if it helps. */
4230 	    PL_reginput = locinput;
4231 	    cur_curlyx->u.curlyx.lastloc = locinput;
4232 	    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4233 	    REGCP_SET(ST.lastcp);
4234 	    PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4235 	    /* NOTREACHED */
4236 
4237 #undef  ST
4238 #define ST st->u.branch
4239 
4240 	case BRANCHJ:	    /*  /(...|A|...)/ with long next pointer */
4241 	    next = scan + ARG(scan);
4242 	    if (next == scan)
4243 		next = NULL;
4244 	    scan = NEXTOPER(scan);
4245 	    /* FALL THROUGH */
4246 
4247 	case BRANCH:	    /*  /(...|A|...)/ */
4248 	    scan = NEXTOPER(scan); /* scan now points to inner node */
4249 	    ST.lastparen = *PL_reglastparen;
4250 	    ST.next_branch = next;
4251 	    REGCP_SET(ST.cp);
4252 	    PL_reginput = locinput;
4253 
4254 	    /* Now go into the branch */
4255 	    if (has_cutgroup) {
4256 	        PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4257 	    } else {
4258 	        PUSH_STATE_GOTO(BRANCH_next, scan);
4259 	    }
4260 	    /* NOTREACHED */
4261         case CUTGROUP:
4262             PL_reginput = locinput;
4263             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4264                 (SV*)rexi->data->data[ ARG( scan ) ];
4265             PUSH_STATE_GOTO(CUTGROUP_next,next);
4266             /* NOTREACHED */
4267         case CUTGROUP_next_fail:
4268             do_cutgroup = 1;
4269             no_final = 1;
4270             if (st->u.mark.mark_name)
4271                 sv_commit = st->u.mark.mark_name;
4272             sayNO;
4273             /* NOTREACHED */
4274         case BRANCH_next:
4275             sayYES;
4276             /* NOTREACHED */
4277 	case BRANCH_next_fail: /* that branch failed; try the next, if any */
4278 	    if (do_cutgroup) {
4279 	        do_cutgroup = 0;
4280 	        no_final = 0;
4281 	    }
4282 	    REGCP_UNWIND(ST.cp);
4283 	    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4284 		PL_regoffs[n].end = -1;
4285 	    *PL_reglastparen = n;
4286 	    /*dmq: *PL_reglastcloseparen = n; */
4287 	    scan = ST.next_branch;
4288 	    /* no more branches? */
4289 	    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4290 	        DEBUG_EXECUTE_r({
4291 		    PerlIO_printf( Perl_debug_log,
4292 			"%*s  %sBRANCH failed...%s\n",
4293 			REPORT_CODE_OFF+depth*2, "",
4294 			PL_colors[4],
4295 			PL_colors[5] );
4296 		});
4297 		sayNO_SILENT;
4298             }
4299 	    continue; /* execute next BRANCH[J] op */
4300 	    /* NOTREACHED */
4301 
4302 	case MINMOD:
4303 	    minmod = 1;
4304 	    break;
4305 
4306 #undef  ST
4307 #define ST st->u.curlym
4308 
4309 	case CURLYM:	/* /A{m,n}B/ where A is fixed-length */
4310 
4311 	    /* This is an optimisation of CURLYX that enables us to push
4312 	     * only a single backtracking state, no matter now many matches
4313 	     * there are in {m,n}. It relies on the pattern being constant
4314 	     * length, with no parens to influence future backrefs
4315 	     */
4316 
4317 	    ST.me = scan;
4318 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4319 
4320 	    /* if paren positive, emulate an OPEN/CLOSE around A */
4321 	    if (ST.me->flags) {
4322 		U32 paren = ST.me->flags;
4323 		if (paren > PL_regsize)
4324 		    PL_regsize = paren;
4325 		if (paren > *PL_reglastparen)
4326 		    *PL_reglastparen = paren;
4327 		scan += NEXT_OFF(scan); /* Skip former OPEN. */
4328 	    }
4329 	    ST.A = scan;
4330 	    ST.B = next;
4331 	    ST.alen = 0;
4332 	    ST.count = 0;
4333 	    ST.minmod = minmod;
4334 	    minmod = 0;
4335 	    ST.c1 = CHRTEST_UNINIT;
4336 	    REGCP_SET(ST.cp);
4337 
4338 	    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4339 		goto curlym_do_B;
4340 
4341 	  curlym_do_A: /* execute the A in /A{m,n}B/  */
4342 	    PL_reginput = locinput;
4343 	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4344 	    /* NOTREACHED */
4345 
4346 	case CURLYM_A: /* we've just matched an A */
4347 	    locinput = st->locinput;
4348 	    nextchr = UCHARAT(locinput);
4349 
4350 	    ST.count++;
4351 	    /* after first match, determine A's length: u.curlym.alen */
4352 	    if (ST.count == 1) {
4353 		if (PL_reg_match_utf8) {
4354 		    char *s = locinput;
4355 		    while (s < PL_reginput) {
4356 			ST.alen++;
4357 			s += UTF8SKIP(s);
4358 		    }
4359 		}
4360 		else {
4361 		    ST.alen = PL_reginput - locinput;
4362 		}
4363 		if (ST.alen == 0)
4364 		    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4365 	    }
4366 	    DEBUG_EXECUTE_r(
4367 		PerlIO_printf(Perl_debug_log,
4368 			  "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4369 			  (int)(REPORT_CODE_OFF+(depth*2)), "",
4370 			  (IV) ST.count, (IV)ST.alen)
4371 	    );
4372 
4373 	    locinput = PL_reginput;
4374 
4375 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4376 	        cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4377 	        goto fake_end;
4378 
4379 	    if ( ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)) )
4380 		goto curlym_do_A; /* try to match another A */
4381 	    goto curlym_do_B; /* try to match B */
4382 
4383 	case CURLYM_A_fail: /* just failed to match an A */
4384 	    REGCP_UNWIND(ST.cp);
4385 
4386 	    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4387 	        || (cur_eval && cur_eval->u.eval.close_paren &&
4388 	            cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4389 		sayNO;
4390 
4391 	  curlym_do_B: /* execute the B in /A{m,n}B/  */
4392 	    PL_reginput = locinput;
4393 	    if (ST.c1 == CHRTEST_UNINIT) {
4394 		/* calculate c1 and c2 for possible match of 1st char
4395 		 * following curly */
4396 		ST.c1 = ST.c2 = CHRTEST_VOID;
4397 		if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4398 		    regnode *text_node = ST.B;
4399 		    if (! HAS_TEXT(text_node))
4400 			FIND_NEXT_IMPT(text_node);
4401 	            /* this used to be
4402 
4403 	                (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4404 
4405 	            	But the former is redundant in light of the latter.
4406 
4407 	            	if this changes back then the macro for
4408 	            	IS_TEXT and friends need to change.
4409 	             */
4410 		    if (PL_regkind[OP(text_node)] == EXACT)
4411 		    {
4412 
4413 			ST.c1 = (U8)*STRING(text_node);
4414 			ST.c2 =
4415 			    (IS_TEXTF(text_node))
4416 			    ? PL_fold[ST.c1]
4417 			    : (IS_TEXTFL(text_node))
4418 				? PL_fold_locale[ST.c1]
4419 				: ST.c1;
4420 		    }
4421 		}
4422 	    }
4423 
4424 	    DEBUG_EXECUTE_r(
4425 		PerlIO_printf(Perl_debug_log,
4426 		    "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4427 		    (int)(REPORT_CODE_OFF+(depth*2)),
4428 		    "", (IV)ST.count)
4429 		);
4430 	    if (ST.c1 != CHRTEST_VOID
4431 		    && UCHARAT(PL_reginput) != ST.c1
4432 		    && UCHARAT(PL_reginput) != ST.c2)
4433 	    {
4434 		/* simulate B failing */
4435 		DEBUG_OPTIMISE_r(
4436 		    PerlIO_printf(Perl_debug_log,
4437 		        "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4438 		        (int)(REPORT_CODE_OFF+(depth*2)),"",
4439 		        (IV)ST.c1,(IV)ST.c2
4440 		));
4441 		state_num = CURLYM_B_fail;
4442 		goto reenter_switch;
4443 	    }
4444 
4445 	    if (ST.me->flags) {
4446 		/* mark current A as captured */
4447 		I32 paren = ST.me->flags;
4448 		if (ST.count) {
4449 		    PL_regoffs[paren].start
4450 			= HOPc(PL_reginput, -ST.alen) - PL_bostr;
4451 		    PL_regoffs[paren].end = PL_reginput - PL_bostr;
4452 		    /*dmq: *PL_reglastcloseparen = paren; */
4453 		}
4454 		else
4455 		    PL_regoffs[paren].end = -1;
4456 		if (cur_eval && cur_eval->u.eval.close_paren &&
4457 		    cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4458 		{
4459 		    if (ST.count)
4460 	                goto fake_end;
4461 	            else
4462 	                sayNO;
4463 	        }
4464 	    }
4465 
4466 	    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4467 	    /* NOTREACHED */
4468 
4469 	case CURLYM_B_fail: /* just failed to match a B */
4470 	    REGCP_UNWIND(ST.cp);
4471 	    if (ST.minmod) {
4472 		if (ST.count == ARG2(ST.me) /* max */)
4473 		    sayNO;
4474 		goto curlym_do_A; /* try to match a further A */
4475 	    }
4476 	    /* backtrack one A */
4477 	    if (ST.count == ARG1(ST.me) /* min */)
4478 		sayNO;
4479 	    ST.count--;
4480 	    locinput = HOPc(locinput, -ST.alen);
4481 	    goto curlym_do_B; /* try to match B */
4482 
4483 #undef ST
4484 #define ST st->u.curly
4485 
4486 #define CURLY_SETPAREN(paren, success) \
4487     if (paren) { \
4488 	if (success) { \
4489 	    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4490 	    PL_regoffs[paren].end = locinput - PL_bostr; \
4491 	    *PL_reglastcloseparen = paren; \
4492 	} \
4493 	else \
4494 	    PL_regoffs[paren].end = -1; \
4495     }
4496 
4497 	case STAR:		/*  /A*B/ where A is width 1 */
4498 	    ST.paren = 0;
4499 	    ST.min = 0;
4500 	    ST.max = REG_INFTY;
4501 	    scan = NEXTOPER(scan);
4502 	    goto repeat;
4503 	case PLUS:		/*  /A+B/ where A is width 1 */
4504 	    ST.paren = 0;
4505 	    ST.min = 1;
4506 	    ST.max = REG_INFTY;
4507 	    scan = NEXTOPER(scan);
4508 	    goto repeat;
4509 	case CURLYN:		/*  /(A){m,n}B/ where A is width 1 */
4510 	    ST.paren = scan->flags;	/* Which paren to set */
4511 	    if (ST.paren > PL_regsize)
4512 		PL_regsize = ST.paren;
4513 	    if (ST.paren > *PL_reglastparen)
4514 		*PL_reglastparen = ST.paren;
4515 	    ST.min = ARG1(scan);  /* min to match */
4516 	    ST.max = ARG2(scan);  /* max to match */
4517 	    if (cur_eval && cur_eval->u.eval.close_paren &&
4518 	        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4519 	        ST.min=1;
4520 	        ST.max=1;
4521 	    }
4522             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4523 	    goto repeat;
4524 	case CURLY:		/*  /A{m,n}B/ where A is width 1 */
4525 	    ST.paren = 0;
4526 	    ST.min = ARG1(scan);  /* min to match */
4527 	    ST.max = ARG2(scan);  /* max to match */
4528 	    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4529 	  repeat:
4530 	    /*
4531 	    * Lookahead to avoid useless match attempts
4532 	    * when we know what character comes next.
4533 	    *
4534 	    * Used to only do .*x and .*?x, but now it allows
4535 	    * for )'s, ('s and (?{ ... })'s to be in the way
4536 	    * of the quantifier and the EXACT-like node.  -- japhy
4537 	    */
4538 
4539 	    if (ST.min > ST.max) /* XXX make this a compile-time check? */
4540 		sayNO;
4541 	    if (HAS_TEXT(next) || JUMPABLE(next)) {
4542 		U8 *s;
4543 		regnode *text_node = next;
4544 
4545 		if (! HAS_TEXT(text_node))
4546 		    FIND_NEXT_IMPT(text_node);
4547 
4548 		if (! HAS_TEXT(text_node))
4549 		    ST.c1 = ST.c2 = CHRTEST_VOID;
4550 		else {
4551 		    if ( PL_regkind[OP(text_node)] != EXACT ) {
4552 			ST.c1 = ST.c2 = CHRTEST_VOID;
4553 			goto assume_ok_easy;
4554 		    }
4555 		    else
4556 			s = (U8*)STRING(text_node);
4557 
4558                     /*  Currently we only get here when
4559 
4560                         PL_rekind[OP(text_node)] == EXACT
4561 
4562                         if this changes back then the macro for IS_TEXT and
4563                         friends need to change. */
4564 		    if (!UTF) {
4565 			ST.c2 = ST.c1 = *s;
4566 			if (IS_TEXTF(text_node))
4567 			    ST.c2 = PL_fold[ST.c1];
4568 			else if (IS_TEXTFL(text_node))
4569 			    ST.c2 = PL_fold_locale[ST.c1];
4570 		    }
4571 		    else { /* UTF */
4572 			if (IS_TEXTF(text_node)) {
4573 			     STRLEN ulen1, ulen2;
4574 			     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4575 			     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4576 
4577 			     to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4578 			     to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4579 #ifdef EBCDIC
4580 			     ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4581 						    ckWARN(WARN_UTF8) ?
4582                                                     0 : UTF8_ALLOW_ANY);
4583 			     ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4584                                                     ckWARN(WARN_UTF8) ?
4585                                                     0 : UTF8_ALLOW_ANY);
4586 #else
4587 			     ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4588 						    uniflags);
4589 			     ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4590 						    uniflags);
4591 #endif
4592 			}
4593 			else {
4594 			    ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4595 						     uniflags);
4596 			}
4597 		    }
4598 		}
4599 	    }
4600 	    else
4601 		ST.c1 = ST.c2 = CHRTEST_VOID;
4602 	assume_ok_easy:
4603 
4604 	    ST.A = scan;
4605 	    ST.B = next;
4606 	    PL_reginput = locinput;
4607 	    if (minmod) {
4608 		minmod = 0;
4609 		if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4610 		    sayNO;
4611 		ST.count = ST.min;
4612 		locinput = PL_reginput;
4613 		REGCP_SET(ST.cp);
4614 		if (ST.c1 == CHRTEST_VOID)
4615 		    goto curly_try_B_min;
4616 
4617 		ST.oldloc = locinput;
4618 
4619 		/* set ST.maxpos to the furthest point along the
4620 		 * string that could possibly match */
4621 		if  (ST.max == REG_INFTY) {
4622 		    ST.maxpos = PL_regeol - 1;
4623 		    if (do_utf8)
4624 			while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4625 			    ST.maxpos--;
4626 		}
4627 		else if (do_utf8) {
4628 		    int m = ST.max - ST.min;
4629 		    for (ST.maxpos = locinput;
4630 			 m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4631 			ST.maxpos += UTF8SKIP(ST.maxpos);
4632 		}
4633 		else {
4634 		    ST.maxpos = locinput + ST.max - ST.min;
4635 		    if (ST.maxpos >= PL_regeol)
4636 			ST.maxpos = PL_regeol - 1;
4637 		}
4638 		goto curly_try_B_min_known;
4639 
4640 	    }
4641 	    else {
4642 		ST.count = regrepeat(rex, ST.A, ST.max, depth);
4643 		locinput = PL_reginput;
4644 		if (ST.count < ST.min)
4645 		    sayNO;
4646 		if ((ST.count > ST.min)
4647 		    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4648 		{
4649 		    /* A{m,n} must come at the end of the string, there's
4650 		     * no point in backing off ... */
4651 		    ST.min = ST.count;
4652 		    /* ...except that $ and \Z can match before *and* after
4653 		       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4654 		       We may back off by one in this case. */
4655 		    if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4656 			ST.min--;
4657 		}
4658 		REGCP_SET(ST.cp);
4659 		goto curly_try_B_max;
4660 	    }
4661 	    /* NOTREACHED */
4662 
4663 
4664 	case CURLY_B_min_known_fail:
4665 	    /* failed to find B in a non-greedy match where c1,c2 valid */
4666 	    if (ST.paren && ST.count)
4667 		PL_regoffs[ST.paren].end = -1;
4668 
4669 	    PL_reginput = locinput;	/* Could be reset... */
4670 	    REGCP_UNWIND(ST.cp);
4671 	    /* Couldn't or didn't -- move forward. */
4672 	    ST.oldloc = locinput;
4673 	    if (do_utf8)
4674 		locinput += UTF8SKIP(locinput);
4675 	    else
4676 		locinput++;
4677 	    ST.count++;
4678 	  curly_try_B_min_known:
4679 	     /* find the next place where 'B' could work, then call B */
4680 	    {
4681 		int n;
4682 		if (do_utf8) {
4683 		    n = (ST.oldloc == locinput) ? 0 : 1;
4684 		    if (ST.c1 == ST.c2) {
4685 			STRLEN len;
4686 			/* set n to utf8_distance(oldloc, locinput) */
4687 			while (locinput <= ST.maxpos &&
4688 			       utf8n_to_uvchr((U8*)locinput,
4689 					      UTF8_MAXBYTES, &len,
4690 					      uniflags) != (UV)ST.c1) {
4691 			    locinput += len;
4692 			    n++;
4693 			}
4694 		    }
4695 		    else {
4696 			/* set n to utf8_distance(oldloc, locinput) */
4697 			while (locinput <= ST.maxpos) {
4698 			    STRLEN len;
4699 			    const UV c = utf8n_to_uvchr((U8*)locinput,
4700 						  UTF8_MAXBYTES, &len,
4701 						  uniflags);
4702 			    if (c == (UV)ST.c1 || c == (UV)ST.c2)
4703 				break;
4704 			    locinput += len;
4705 			    n++;
4706 			}
4707 		    }
4708 		}
4709 		else {
4710 		    if (ST.c1 == ST.c2) {
4711 			while (locinput <= ST.maxpos &&
4712 			       UCHARAT(locinput) != ST.c1)
4713 			    locinput++;
4714 		    }
4715 		    else {
4716 			while (locinput <= ST.maxpos
4717 			       && UCHARAT(locinput) != ST.c1
4718 			       && UCHARAT(locinput) != ST.c2)
4719 			    locinput++;
4720 		    }
4721 		    n = locinput - ST.oldloc;
4722 		}
4723 		if (locinput > ST.maxpos)
4724 		    sayNO;
4725 		/* PL_reginput == oldloc now */
4726 		if (n) {
4727 		    ST.count += n;
4728 		    if (regrepeat(rex, ST.A, n, depth) < n)
4729 			sayNO;
4730 		}
4731 		PL_reginput = locinput;
4732 		CURLY_SETPAREN(ST.paren, ST.count);
4733 		if (cur_eval && cur_eval->u.eval.close_paren &&
4734 		    cur_eval->u.eval.close_paren == (U32)ST.paren) {
4735 		    goto fake_end;
4736 	        }
4737 		PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4738 	    }
4739 	    /* NOTREACHED */
4740 
4741 
4742 	case CURLY_B_min_fail:
4743 	    /* failed to find B in a non-greedy match where c1,c2 invalid */
4744 	    if (ST.paren && ST.count)
4745 		PL_regoffs[ST.paren].end = -1;
4746 
4747 	    REGCP_UNWIND(ST.cp);
4748 	    /* failed -- move forward one */
4749 	    PL_reginput = locinput;
4750 	    if (regrepeat(rex, ST.A, 1, depth)) {
4751 		ST.count++;
4752 		locinput = PL_reginput;
4753 		if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4754 			ST.count > 0)) /* count overflow ? */
4755 		{
4756 		  curly_try_B_min:
4757 		    CURLY_SETPAREN(ST.paren, ST.count);
4758 		    if (cur_eval && cur_eval->u.eval.close_paren &&
4759 		        cur_eval->u.eval.close_paren == (U32)ST.paren) {
4760                         goto fake_end;
4761                     }
4762 		    PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4763 		}
4764 	    }
4765 	    sayNO;
4766 	    /* NOTREACHED */
4767 
4768 
4769 	curly_try_B_max:
4770 	    /* a successful greedy match: now try to match B */
4771             if (cur_eval && cur_eval->u.eval.close_paren &&
4772                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4773                 goto fake_end;
4774             }
4775 	    {
4776 		UV c = 0;
4777 		if (ST.c1 != CHRTEST_VOID)
4778 		    c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4779 					   UTF8_MAXBYTES, 0, uniflags)
4780 				: (UV) UCHARAT(PL_reginput);
4781 		/* If it could work, try it. */
4782 		if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4783 		    CURLY_SETPAREN(ST.paren, ST.count);
4784 		    PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4785 		    /* NOTREACHED */
4786 		}
4787 	    }
4788 	    /* FALL THROUGH */
4789 	case CURLY_B_max_fail:
4790 	    /* failed to find B in a greedy match */
4791 	    if (ST.paren && ST.count)
4792 		PL_regoffs[ST.paren].end = -1;
4793 
4794 	    REGCP_UNWIND(ST.cp);
4795 	    /*  back up. */
4796 	    if (--ST.count < ST.min)
4797 		sayNO;
4798 	    PL_reginput = locinput = HOPc(locinput, -1);
4799 	    goto curly_try_B_max;
4800 
4801 #undef ST
4802 
4803 	case END:
4804 	    fake_end:
4805 	    if (cur_eval) {
4806 		/* we've just finished A in /(??{A})B/; now continue with B */
4807 		I32 tmpix;
4808 		st->u.eval.toggle_reg_flags
4809 			    = cur_eval->u.eval.toggle_reg_flags;
4810 		PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4811 
4812 		st->u.eval.prev_rex = rex;		/* inner */
4813 		SETREX(rex,cur_eval->u.eval.prev_rex);
4814 		rexi = RXi_GET(rex);
4815 		cur_curlyx = cur_eval->u.eval.prev_curlyx;
4816 		ReREFCNT_inc(rex);
4817 		st->u.eval.cp = regcppush(0);	/* Save *all* the positions. */
4818 		REGCP_SET(st->u.eval.lastcp);
4819 		PL_reginput = locinput;
4820 
4821 		/* Restore parens of the outer rex without popping the
4822 		 * savestack */
4823 		tmpix = PL_savestack_ix;
4824 		PL_savestack_ix = cur_eval->u.eval.lastcp;
4825 		regcppop(rex);
4826 		PL_savestack_ix = tmpix;
4827 
4828 		st->u.eval.prev_eval = cur_eval;
4829 		cur_eval = cur_eval->u.eval.prev_eval;
4830 		DEBUG_EXECUTE_r(
4831 		    PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4832 				      REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4833                 if ( nochange_depth )
4834 	            nochange_depth--;
4835 
4836                 PUSH_YES_STATE_GOTO(EVAL_AB,
4837 			st->u.eval.prev_eval->u.eval.B); /* match B */
4838 	    }
4839 
4840 	    if (locinput < reginfo->till) {
4841 		DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4842 				      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4843 				      PL_colors[4],
4844 				      (long)(locinput - PL_reg_starttry),
4845 				      (long)(reginfo->till - PL_reg_starttry),
4846 				      PL_colors[5]));
4847 
4848 		sayNO_SILENT;		/* Cannot match: too short. */
4849 	    }
4850 	    PL_reginput = locinput;	/* put where regtry can find it */
4851 	    sayYES;			/* Success! */
4852 
4853 	case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4854 	    DEBUG_EXECUTE_r(
4855 	    PerlIO_printf(Perl_debug_log,
4856 		"%*s  %ssubpattern success...%s\n",
4857 		REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4858 	    PL_reginput = locinput;	/* put where regtry can find it */
4859 	    sayYES;			/* Success! */
4860 
4861 #undef  ST
4862 #define ST st->u.ifmatch
4863 
4864 	case SUSPEND:	/* (?>A) */
4865 	    ST.wanted = 1;
4866 	    PL_reginput = locinput;
4867 	    goto do_ifmatch;
4868 
4869 	case UNLESSM:	/* -ve lookaround: (?!A), or with flags, (?<!A) */
4870 	    ST.wanted = 0;
4871 	    goto ifmatch_trivial_fail_test;
4872 
4873 	case IFMATCH:	/* +ve lookaround: (?=A), or with flags, (?<=A) */
4874 	    ST.wanted = 1;
4875 	  ifmatch_trivial_fail_test:
4876 	    if (scan->flags) {
4877 		char * const s = HOPBACKc(locinput, scan->flags);
4878 		if (!s) {
4879 		    /* trivial fail */
4880 		    if (logical) {
4881 			logical = 0;
4882 			sw = 1 - (bool)ST.wanted;
4883 		    }
4884 		    else if (ST.wanted)
4885 			sayNO;
4886 		    next = scan + ARG(scan);
4887 		    if (next == scan)
4888 			next = NULL;
4889 		    break;
4890 		}
4891 		PL_reginput = s;
4892 	    }
4893 	    else
4894 		PL_reginput = locinput;
4895 
4896 	  do_ifmatch:
4897 	    ST.me = scan;
4898 	    ST.logical = logical;
4899 	    /* execute body of (?...A) */
4900 	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4901 	    /* NOTREACHED */
4902 
4903 	case IFMATCH_A_fail: /* body of (?...A) failed */
4904 	    ST.wanted = !ST.wanted;
4905 	    /* FALL THROUGH */
4906 
4907 	case IFMATCH_A: /* body of (?...A) succeeded */
4908 	    if (ST.logical) {
4909 		sw = (bool)ST.wanted;
4910 	    }
4911 	    else if (!ST.wanted)
4912 		sayNO;
4913 
4914 	    if (OP(ST.me) == SUSPEND)
4915 		locinput = PL_reginput;
4916 	    else {
4917 		locinput = PL_reginput = st->locinput;
4918 		nextchr = UCHARAT(locinput);
4919 	    }
4920 	    scan = ST.me + ARG(ST.me);
4921 	    if (scan == ST.me)
4922 		scan = NULL;
4923 	    continue; /* execute B */
4924 
4925 #undef ST
4926 
4927 	case LONGJMP:
4928 	    next = scan + ARG(scan);
4929 	    if (next == scan)
4930 		next = NULL;
4931 	    break;
4932 	case COMMIT:
4933 	    reginfo->cutpoint = PL_regeol;
4934 	    /* FALLTHROUGH */
4935 	case PRUNE:
4936 	    PL_reginput = locinput;
4937 	    if (!scan->flags)
4938 	        sv_yes_mark = sv_commit = (SV*)rexi->data->data[ ARG( scan ) ];
4939 	    PUSH_STATE_GOTO(COMMIT_next,next);
4940 	    /* NOTREACHED */
4941 	case COMMIT_next_fail:
4942 	    no_final = 1;
4943 	    /* FALLTHROUGH */
4944 	case OPFAIL:
4945 	    sayNO;
4946 	    /* NOTREACHED */
4947 
4948 #define ST st->u.mark
4949         case MARKPOINT:
4950             ST.prev_mark = mark_state;
4951             ST.mark_name = sv_commit = sv_yes_mark
4952                 = (SV*)rexi->data->data[ ARG( scan ) ];
4953             mark_state = st;
4954             ST.mark_loc = PL_reginput = locinput;
4955             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
4956             /* NOTREACHED */
4957         case MARKPOINT_next:
4958             mark_state = ST.prev_mark;
4959             sayYES;
4960             /* NOTREACHED */
4961         case MARKPOINT_next_fail:
4962             if (popmark && sv_eq(ST.mark_name,popmark))
4963             {
4964                 if (ST.mark_loc > startpoint)
4965 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
4966                 popmark = NULL; /* we found our mark */
4967                 sv_commit = ST.mark_name;
4968 
4969                 DEBUG_EXECUTE_r({
4970                         PerlIO_printf(Perl_debug_log,
4971 		            "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
4972 		            REPORT_CODE_OFF+depth*2, "",
4973 		            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
4974 		});
4975             }
4976             mark_state = ST.prev_mark;
4977             sv_yes_mark = mark_state ?
4978                 mark_state->u.mark.mark_name : NULL;
4979             sayNO;
4980             /* NOTREACHED */
4981         case SKIP:
4982             PL_reginput = locinput;
4983             if (scan->flags) {
4984                 /* (*SKIP) : if we fail we cut here*/
4985                 ST.mark_name = NULL;
4986                 ST.mark_loc = locinput;
4987                 PUSH_STATE_GOTO(SKIP_next,next);
4988             } else {
4989                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
4990                    otherwise do nothing.  Meaning we need to scan
4991                  */
4992                 regmatch_state *cur = mark_state;
4993                 SV *find = (SV*)rexi->data->data[ ARG( scan ) ];
4994 
4995                 while (cur) {
4996                     if ( sv_eq( cur->u.mark.mark_name,
4997                                 find ) )
4998                     {
4999                         ST.mark_name = find;
5000                         PUSH_STATE_GOTO( SKIP_next, next );
5001                     }
5002                     cur = cur->u.mark.prev_mark;
5003                 }
5004             }
5005             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5006             break;
5007 	case SKIP_next_fail:
5008 	    if (ST.mark_name) {
5009 	        /* (*CUT:NAME) - Set up to search for the name as we
5010 	           collapse the stack*/
5011 	        popmark = ST.mark_name;
5012 	    } else {
5013 	        /* (*CUT) - No name, we cut here.*/
5014 	        if (ST.mark_loc > startpoint)
5015 	            reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5016 	        /* but we set sv_commit to latest mark_name if there
5017 	           is one so they can test to see how things lead to this
5018 	           cut */
5019                 if (mark_state)
5020                     sv_commit=mark_state->u.mark.mark_name;
5021             }
5022             no_final = 1;
5023             sayNO;
5024             /* NOTREACHED */
5025 #undef ST
5026         case FOLDCHAR:
5027             n = ARG(scan);
5028             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5029                 locinput += ln;
5030             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5031                 sayNO;
5032             } else  {
5033                 U8 folded[UTF8_MAXBYTES_CASE+1];
5034                 STRLEN foldlen;
5035                 const char * const l = locinput;
5036                 char *e = PL_regeol;
5037                 to_uni_fold(n, folded, &foldlen);
5038 
5039 		if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5040                 	       l, &e, 0,  do_utf8)) {
5041                         sayNO;
5042                 }
5043                 locinput = e;
5044             }
5045             nextchr = UCHARAT(locinput);
5046             break;
5047         case LNBREAK:
5048             if ((n=is_LNBREAK(locinput,do_utf8))) {
5049                 locinput += n;
5050                 nextchr = UCHARAT(locinput);
5051             } else
5052                 sayNO;
5053             break;
5054 
5055 #define CASE_CLASS(nAmE)                              \
5056         case nAmE:                                    \
5057             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5058                 locinput += n;                        \
5059                 nextchr = UCHARAT(locinput);          \
5060             } else                                    \
5061                 sayNO;                                \
5062             break;                                    \
5063         case N##nAmE:                                 \
5064             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5065                 sayNO;                                \
5066             } else {                                  \
5067                 locinput += UTF8SKIP(locinput);       \
5068                 nextchr = UCHARAT(locinput);          \
5069             }                                         \
5070             break
5071 
5072         CASE_CLASS(VERTWS);
5073         CASE_CLASS(HORIZWS);
5074 #undef CASE_CLASS
5075 
5076 	default:
5077 	    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5078 			  PTR2UV(scan), OP(scan));
5079 	    Perl_croak(aTHX_ "regexp memory corruption");
5080 
5081 	} /* end switch */
5082 
5083         /* switch break jumps here */
5084 	scan = next; /* prepare to execute the next op and ... */
5085 	continue;    /* ... jump back to the top, reusing st */
5086 	/* NOTREACHED */
5087 
5088       push_yes_state:
5089 	/* push a state that backtracks on success */
5090 	st->u.yes.prev_yes_state = yes_state;
5091 	yes_state = st;
5092 	/* FALL THROUGH */
5093       push_state:
5094 	/* push a new regex state, then continue at scan  */
5095 	{
5096 	    regmatch_state *newst;
5097 
5098 	    DEBUG_STACK_r({
5099 	        regmatch_state *cur = st;
5100 	        regmatch_state *curyes = yes_state;
5101 	        int curd = depth;
5102 	        regmatch_slab *slab = PL_regmatch_slab;
5103                 for (;curd > -1;cur--,curd--) {
5104                     if (cur < SLAB_FIRST(slab)) {
5105                 	slab = slab->prev;
5106                 	cur = SLAB_LAST(slab);
5107                     }
5108                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5109                         REPORT_CODE_OFF + 2 + depth * 2,"",
5110                         curd, PL_reg_name[cur->resume_state],
5111                         (curyes == cur) ? "yes" : ""
5112                     );
5113                     if (curyes == cur)
5114 	                curyes = cur->u.yes.prev_yes_state;
5115                 }
5116             } else
5117                 DEBUG_STATE_pp("push")
5118             );
5119 	    depth++;
5120 	    st->locinput = locinput;
5121 	    newst = st+1;
5122 	    if (newst >  SLAB_LAST(PL_regmatch_slab))
5123 		newst = S_push_slab(aTHX);
5124 	    PL_regmatch_state = newst;
5125 
5126 	    locinput = PL_reginput;
5127 	    nextchr = UCHARAT(locinput);
5128 	    st = newst;
5129 	    continue;
5130 	    /* NOTREACHED */
5131 	}
5132     }
5133 
5134     /*
5135     * We get here only if there's trouble -- normally "case END" is
5136     * the terminating point.
5137     */
5138     Perl_croak(aTHX_ "corrupted regexp pointers");
5139     /*NOTREACHED*/
5140     sayNO;
5141 
5142 yes:
5143     if (yes_state) {
5144 	/* we have successfully completed a subexpression, but we must now
5145 	 * pop to the state marked by yes_state and continue from there */
5146 	assert(st != yes_state);
5147 #ifdef DEBUGGING
5148 	while (st != yes_state) {
5149 	    st--;
5150 	    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5151 		PL_regmatch_slab = PL_regmatch_slab->prev;
5152 		st = SLAB_LAST(PL_regmatch_slab);
5153 	    }
5154 	    DEBUG_STATE_r({
5155 	        if (no_final) {
5156 	            DEBUG_STATE_pp("pop (no final)");
5157 	        } else {
5158 	            DEBUG_STATE_pp("pop (yes)");
5159 	        }
5160 	    });
5161 	    depth--;
5162 	}
5163 #else
5164 	while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5165 	    || yes_state > SLAB_LAST(PL_regmatch_slab))
5166 	{
5167 	    /* not in this slab, pop slab */
5168 	    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5169 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5170 	    st = SLAB_LAST(PL_regmatch_slab);
5171 	}
5172 	depth -= (st - yes_state);
5173 #endif
5174 	st = yes_state;
5175 	yes_state = st->u.yes.prev_yes_state;
5176 	PL_regmatch_state = st;
5177 
5178         if (no_final) {
5179             locinput= st->locinput;
5180             nextchr = UCHARAT(locinput);
5181         }
5182 	state_num = st->resume_state + no_final;
5183 	goto reenter_switch;
5184     }
5185 
5186     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5187 			  PL_colors[4], PL_colors[5]));
5188 
5189     if (PL_reg_eval_set) {
5190 	/* each successfully executed (?{...}) block does the equivalent of
5191 	 *   local $^R = do {...}
5192 	 * When popping the save stack, all these locals would be undone;
5193 	 * bypass this by setting the outermost saved $^R to the latest
5194 	 * value */
5195 	if (oreplsv != GvSV(PL_replgv))
5196 	    sv_setsv(oreplsv, GvSV(PL_replgv));
5197     }
5198     result = 1;
5199     goto final_exit;
5200 
5201 no:
5202     DEBUG_EXECUTE_r(
5203 	PerlIO_printf(Perl_debug_log,
5204             "%*s  %sfailed...%s\n",
5205             REPORT_CODE_OFF+depth*2, "",
5206             PL_colors[4], PL_colors[5])
5207 	);
5208 
5209 no_silent:
5210     if (no_final) {
5211         if (yes_state) {
5212             goto yes;
5213         } else {
5214             goto final_exit;
5215         }
5216     }
5217     if (depth) {
5218 	/* there's a previous state to backtrack to */
5219 	st--;
5220 	if (st < SLAB_FIRST(PL_regmatch_slab)) {
5221 	    PL_regmatch_slab = PL_regmatch_slab->prev;
5222 	    st = SLAB_LAST(PL_regmatch_slab);
5223 	}
5224 	PL_regmatch_state = st;
5225 	locinput= st->locinput;
5226 	nextchr = UCHARAT(locinput);
5227 
5228 	DEBUG_STATE_pp("pop");
5229 	depth--;
5230 	if (yes_state == st)
5231 	    yes_state = st->u.yes.prev_yes_state;
5232 
5233 	state_num = st->resume_state + 1; /* failure = success + 1 */
5234 	goto reenter_switch;
5235     }
5236     result = 0;
5237 
5238   final_exit:
5239     if (rex->intflags & PREGf_VERBARG_SEEN) {
5240         SV *sv_err = get_sv("REGERROR", 1);
5241         SV *sv_mrk = get_sv("REGMARK", 1);
5242         if (result) {
5243             sv_commit = &PL_sv_no;
5244             if (!sv_yes_mark)
5245                 sv_yes_mark = &PL_sv_yes;
5246         } else {
5247             if (!sv_commit)
5248                 sv_commit = &PL_sv_yes;
5249             sv_yes_mark = &PL_sv_no;
5250         }
5251         sv_setsv(sv_err, sv_commit);
5252         sv_setsv(sv_mrk, sv_yes_mark);
5253     }
5254 
5255     /* clean up; in particular, free all slabs above current one */
5256     LEAVE_SCOPE(oldsave);
5257 
5258     return result;
5259 }
5260 
5261 /*
5262  - regrepeat - repeatedly match something simple, report how many
5263  */
5264 /*
5265  * [This routine now assumes that it will only match on things of length 1.
5266  * That was true before, but now we assume scan - reginput is the count,
5267  * rather than incrementing count on every character.  [Er, except utf8.]]
5268  */
5269 STATIC I32
5270 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5271 {
5272     dVAR;
5273     register char *scan;
5274     register I32 c;
5275     register char *loceol = PL_regeol;
5276     register I32 hardcount = 0;
5277     register bool do_utf8 = PL_reg_match_utf8;
5278 #ifndef DEBUGGING
5279     PERL_UNUSED_ARG(depth);
5280 #endif
5281 
5282     scan = PL_reginput;
5283     if (max == REG_INFTY)
5284 	max = I32_MAX;
5285     else if (max < loceol - scan)
5286 	loceol = scan + max;
5287     switch (OP(p)) {
5288     case REG_ANY:
5289 	if (do_utf8) {
5290 	    loceol = PL_regeol;
5291 	    while (scan < loceol && hardcount < max && *scan != '\n') {
5292 		scan += UTF8SKIP(scan);
5293 		hardcount++;
5294 	    }
5295 	} else {
5296 	    while (scan < loceol && *scan != '\n')
5297 		scan++;
5298 	}
5299 	break;
5300     case SANY:
5301         if (do_utf8) {
5302 	    loceol = PL_regeol;
5303 	    while (scan < loceol && hardcount < max) {
5304 	        scan += UTF8SKIP(scan);
5305 		hardcount++;
5306 	    }
5307 	}
5308 	else
5309 	    scan = loceol;
5310 	break;
5311     case CANY:
5312 	scan = loceol;
5313 	break;
5314     case EXACT:		/* length of string is 1 */
5315 	c = (U8)*STRING(p);
5316 	while (scan < loceol && UCHARAT(scan) == c)
5317 	    scan++;
5318 	break;
5319     case EXACTF:	/* length of string is 1 */
5320 	c = (U8)*STRING(p);
5321 	while (scan < loceol &&
5322 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5323 	    scan++;
5324 	break;
5325     case EXACTFL:	/* length of string is 1 */
5326 	PL_reg_flags |= RF_tainted;
5327 	c = (U8)*STRING(p);
5328 	while (scan < loceol &&
5329 	       (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5330 	    scan++;
5331 	break;
5332     case ANYOF:
5333 	if (do_utf8) {
5334 	    loceol = PL_regeol;
5335 	    while (hardcount < max && scan < loceol &&
5336 		   reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5337 		scan += UTF8SKIP(scan);
5338 		hardcount++;
5339 	    }
5340 	} else {
5341 	    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5342 		scan++;
5343 	}
5344 	break;
5345     case ALNUM:
5346 	if (do_utf8) {
5347 	    loceol = PL_regeol;
5348 	    LOAD_UTF8_CHARCLASS_ALNUM();
5349 	    while (hardcount < max && scan < loceol &&
5350 		   swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5351 		scan += UTF8SKIP(scan);
5352 		hardcount++;
5353 	    }
5354 	} else {
5355 	    while (scan < loceol && isALNUM(*scan))
5356 		scan++;
5357 	}
5358 	break;
5359     case ALNUML:
5360 	PL_reg_flags |= RF_tainted;
5361 	if (do_utf8) {
5362 	    loceol = PL_regeol;
5363 	    while (hardcount < max && scan < loceol &&
5364 		   isALNUM_LC_utf8((U8*)scan)) {
5365 		scan += UTF8SKIP(scan);
5366 		hardcount++;
5367 	    }
5368 	} else {
5369 	    while (scan < loceol && isALNUM_LC(*scan))
5370 		scan++;
5371 	}
5372 	break;
5373     case NALNUM:
5374 	if (do_utf8) {
5375 	    loceol = PL_regeol;
5376 	    LOAD_UTF8_CHARCLASS_ALNUM();
5377 	    while (hardcount < max && scan < loceol &&
5378 		   !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5379 		scan += UTF8SKIP(scan);
5380 		hardcount++;
5381 	    }
5382 	} else {
5383 	    while (scan < loceol && !isALNUM(*scan))
5384 		scan++;
5385 	}
5386 	break;
5387     case NALNUML:
5388 	PL_reg_flags |= RF_tainted;
5389 	if (do_utf8) {
5390 	    loceol = PL_regeol;
5391 	    while (hardcount < max && scan < loceol &&
5392 		   !isALNUM_LC_utf8((U8*)scan)) {
5393 		scan += UTF8SKIP(scan);
5394 		hardcount++;
5395 	    }
5396 	} else {
5397 	    while (scan < loceol && !isALNUM_LC(*scan))
5398 		scan++;
5399 	}
5400 	break;
5401     case SPACE:
5402 	if (do_utf8) {
5403 	    loceol = PL_regeol;
5404 	    LOAD_UTF8_CHARCLASS_SPACE();
5405 	    while (hardcount < max && scan < loceol &&
5406 		   (*scan == ' ' ||
5407 		    swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5408 		scan += UTF8SKIP(scan);
5409 		hardcount++;
5410 	    }
5411 	} else {
5412 	    while (scan < loceol && isSPACE(*scan))
5413 		scan++;
5414 	}
5415 	break;
5416     case SPACEL:
5417 	PL_reg_flags |= RF_tainted;
5418 	if (do_utf8) {
5419 	    loceol = PL_regeol;
5420 	    while (hardcount < max && scan < loceol &&
5421 		   (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5422 		scan += UTF8SKIP(scan);
5423 		hardcount++;
5424 	    }
5425 	} else {
5426 	    while (scan < loceol && isSPACE_LC(*scan))
5427 		scan++;
5428 	}
5429 	break;
5430     case NSPACE:
5431 	if (do_utf8) {
5432 	    loceol = PL_regeol;
5433 	    LOAD_UTF8_CHARCLASS_SPACE();
5434 	    while (hardcount < max && scan < loceol &&
5435 		   !(*scan == ' ' ||
5436 		     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5437 		scan += UTF8SKIP(scan);
5438 		hardcount++;
5439 	    }
5440 	} else {
5441 	    while (scan < loceol && !isSPACE(*scan))
5442 		scan++;
5443 	}
5444 	break;
5445     case NSPACEL:
5446 	PL_reg_flags |= RF_tainted;
5447 	if (do_utf8) {
5448 	    loceol = PL_regeol;
5449 	    while (hardcount < max && scan < loceol &&
5450 		   !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5451 		scan += UTF8SKIP(scan);
5452 		hardcount++;
5453 	    }
5454 	} else {
5455 	    while (scan < loceol && !isSPACE_LC(*scan))
5456 		scan++;
5457 	}
5458 	break;
5459     case DIGIT:
5460 	if (do_utf8) {
5461 	    loceol = PL_regeol;
5462 	    LOAD_UTF8_CHARCLASS_DIGIT();
5463 	    while (hardcount < max && scan < loceol &&
5464 		   swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5465 		scan += UTF8SKIP(scan);
5466 		hardcount++;
5467 	    }
5468 	} else {
5469 	    while (scan < loceol && isDIGIT(*scan))
5470 		scan++;
5471 	}
5472 	break;
5473     case NDIGIT:
5474 	if (do_utf8) {
5475 	    loceol = PL_regeol;
5476 	    LOAD_UTF8_CHARCLASS_DIGIT();
5477 	    while (hardcount < max && scan < loceol &&
5478 		   !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5479 		scan += UTF8SKIP(scan);
5480 		hardcount++;
5481 	    }
5482 	} else {
5483 	    while (scan < loceol && !isDIGIT(*scan))
5484 		scan++;
5485 	}
5486     case LNBREAK:
5487         if (do_utf8) {
5488 	    loceol = PL_regeol;
5489 	    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5490 		scan += c;
5491 		hardcount++;
5492 	    }
5493 	} else {
5494 	    /*
5495 	      LNBREAK can match two latin chars, which is ok,
5496 	      because we have a null terminated string, but we
5497 	      have to use hardcount in this situation
5498 	    */
5499 	    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5500 		scan+=c;
5501 		hardcount++;
5502 	    }
5503 	}
5504 	break;
5505     case HORIZWS:
5506         if (do_utf8) {
5507 	    loceol = PL_regeol;
5508 	    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5509 		scan += c;
5510 		hardcount++;
5511 	    }
5512 	} else {
5513 	    while (scan < loceol && is_HORIZWS_latin1(scan))
5514 		scan++;
5515 	}
5516 	break;
5517     case NHORIZWS:
5518         if (do_utf8) {
5519 	    loceol = PL_regeol;
5520 	    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5521 		scan += UTF8SKIP(scan);
5522 		hardcount++;
5523 	    }
5524 	} else {
5525 	    while (scan < loceol && !is_HORIZWS_latin1(scan))
5526 		scan++;
5527 
5528 	}
5529 	break;
5530     case VERTWS:
5531         if (do_utf8) {
5532 	    loceol = PL_regeol;
5533 	    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5534 		scan += c;
5535 		hardcount++;
5536 	    }
5537 	} else {
5538 	    while (scan < loceol && is_VERTWS_latin1(scan))
5539 		scan++;
5540 
5541 	}
5542 	break;
5543     case NVERTWS:
5544         if (do_utf8) {
5545 	    loceol = PL_regeol;
5546 	    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5547 		scan += UTF8SKIP(scan);
5548 		hardcount++;
5549 	    }
5550 	} else {
5551 	    while (scan < loceol && !is_VERTWS_latin1(scan))
5552 		scan++;
5553 
5554 	}
5555 	break;
5556 
5557     default:		/* Called on something of 0 width. */
5558 	break;		/* So match right here or not at all. */
5559     }
5560 
5561     if (hardcount)
5562 	c = hardcount;
5563     else
5564 	c = scan - PL_reginput;
5565     PL_reginput = scan;
5566 
5567     DEBUG_r({
5568 	GET_RE_DEBUG_FLAGS_DECL;
5569 	DEBUG_EXECUTE_r({
5570 	    SV * const prop = sv_newmortal();
5571 	    regprop(prog, prop, p);
5572 	    PerlIO_printf(Perl_debug_log,
5573 			"%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5574 			REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5575 	});
5576     });
5577 
5578     return(c);
5579 }
5580 
5581 
5582 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5583 /*
5584 - regclass_swash - prepare the utf8 swash
5585 */
5586 
5587 SV *
5588 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5589 {
5590     dVAR;
5591     SV *sw  = NULL;
5592     SV *si  = NULL;
5593     SV *alt = NULL;
5594     RXi_GET_DECL(prog,progi);
5595     const struct reg_data * const data = prog ? progi->data : NULL;
5596 
5597     if (data && data->count) {
5598 	const U32 n = ARG(node);
5599 
5600 	if (data->what[n] == 's') {
5601 	    SV * const rv = (SV*)data->data[n];
5602 	    AV * const av = (AV*)SvRV((SV*)rv);
5603 	    SV **const ary = AvARRAY(av);
5604 	    SV **a, **b;
5605 
5606 	    /* See the end of regcomp.c:S_regclass() for
5607 	     * documentation of these array elements. */
5608 
5609 	    si = *ary;
5610 	    a  = SvROK(ary[1]) ? &ary[1] : NULL;
5611 	    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5612 
5613 	    if (a)
5614 		sw = *a;
5615 	    else if (si && doinit) {
5616 		sw = swash_init("utf8", "", si, 1, 0);
5617 		(void)av_store(av, 1, sw);
5618 	    }
5619 	    if (b)
5620 	        alt = *b;
5621 	}
5622     }
5623 
5624     if (listsvp)
5625 	*listsvp = si;
5626     if (altsvp)
5627 	*altsvp  = alt;
5628 
5629     return sw;
5630 }
5631 #endif
5632 
5633 /*
5634  - reginclass - determine if a character falls into a character class
5635 
5636   The n is the ANYOF regnode, the p is the target string, lenp
5637   is pointer to the maximum length of how far to go in the p
5638   (if the lenp is zero, UTF8SKIP(p) is used),
5639   do_utf8 tells whether the target string is in UTF-8.
5640 
5641  */
5642 
5643 STATIC bool
5644 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5645 {
5646     dVAR;
5647     const char flags = ANYOF_FLAGS(n);
5648     bool match = FALSE;
5649     UV c = *p;
5650     STRLEN len = 0;
5651     STRLEN plen;
5652 
5653     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5654 	c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5655 		(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5656 		/* see [perl #37836] for UTF8_ALLOW_ANYUV */
5657 	if (len == (STRLEN)-1)
5658 	    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5659     }
5660 
5661     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5662     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5663         if (lenp)
5664 	    *lenp = 0;
5665 	if (do_utf8 && !ANYOF_RUNTIME(n)) {
5666 	    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5667 		match = TRUE;
5668 	}
5669 	if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5670 	    match = TRUE;
5671 	if (!match) {
5672 	    AV *av;
5673 	    SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5674 
5675 	    if (sw) {
5676 		if (swash_fetch(sw, p, do_utf8))
5677 		    match = TRUE;
5678 		else if (flags & ANYOF_FOLD) {
5679 		    if (!match && lenp && av) {
5680 		        I32 i;
5681 			for (i = 0; i <= av_len(av); i++) {
5682 			    SV* const sv = *av_fetch(av, i, FALSE);
5683 			    STRLEN len;
5684 			    const char * const s = SvPV_const(sv, len);
5685 
5686 			    if (len <= plen && memEQ(s, (char*)p, len)) {
5687 			        *lenp = len;
5688 				match = TRUE;
5689 				break;
5690 			    }
5691 			}
5692 		    }
5693 		    if (!match) {
5694 		        U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5695 			STRLEN tmplen;
5696 
5697 		        to_utf8_fold(p, tmpbuf, &tmplen);
5698 			if (swash_fetch(sw, tmpbuf, do_utf8))
5699 			    match = TRUE;
5700 		    }
5701 		}
5702 	    }
5703 	}
5704 	if (match && lenp && *lenp == 0)
5705 	    *lenp = UNISKIP(NATIVE_TO_UNI(c));
5706     }
5707     if (!match && c < 256) {
5708 	if (ANYOF_BITMAP_TEST(n, c))
5709 	    match = TRUE;
5710 	else if (flags & ANYOF_FOLD) {
5711 	    U8 f;
5712 
5713 	    if (flags & ANYOF_LOCALE) {
5714 		PL_reg_flags |= RF_tainted;
5715 		f = PL_fold_locale[c];
5716 	    }
5717 	    else
5718 		f = PL_fold[c];
5719 	    if (f != c && ANYOF_BITMAP_TEST(n, f))
5720 		match = TRUE;
5721 	}
5722 
5723 	if (!match && (flags & ANYOF_CLASS)) {
5724 	    PL_reg_flags |= RF_tainted;
5725 	    if (
5726 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5727 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5728 		(ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5729 		(ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5730 		(ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5731 		(ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5732 		(ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5733 		(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5734 		(ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5735 		(ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5736 		(ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5737 		(ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5738 		(ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5739 		(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5740 		(ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5741 		(ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5742 		(ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5743 		(ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5744 		(ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5745 		(ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5746 		(ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5747 		(ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5748 		(ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5749 		(ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5750 		(ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5751 		(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5752 		(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5753 		(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5754 		(ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5755 		(ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5756 		) /* How's that for a conditional? */
5757 	    {
5758 		match = TRUE;
5759 	    }
5760 	}
5761     }
5762 
5763     return (flags & ANYOF_INVERT) ? !match : match;
5764 }
5765 
5766 STATIC U8 *
5767 S_reghop3(U8 *s, I32 off, const U8* lim)
5768 {
5769     dVAR;
5770     if (off >= 0) {
5771 	while (off-- && s < lim) {
5772 	    /* XXX could check well-formedness here */
5773 	    s += UTF8SKIP(s);
5774 	}
5775     }
5776     else {
5777         while (off++ && s > lim) {
5778             s--;
5779             if (UTF8_IS_CONTINUED(*s)) {
5780                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5781                     s--;
5782 	    }
5783             /* XXX could check well-formedness here */
5784 	}
5785     }
5786     return s;
5787 }
5788 
5789 #ifdef XXX_dmq
5790 /* there are a bunch of places where we use two reghop3's that should
5791    be replaced with this routine. but since thats not done yet
5792    we ifdef it out - dmq
5793 */
5794 STATIC U8 *
5795 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5796 {
5797     dVAR;
5798     if (off >= 0) {
5799         while (off-- && s < rlim) {
5800             /* XXX could check well-formedness here */
5801             s += UTF8SKIP(s);
5802         }
5803     }
5804     else {
5805         while (off++ && s > llim) {
5806             s--;
5807             if (UTF8_IS_CONTINUED(*s)) {
5808                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5809                     s--;
5810             }
5811             /* XXX could check well-formedness here */
5812         }
5813     }
5814     return s;
5815 }
5816 #endif
5817 
5818 STATIC U8 *
5819 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5820 {
5821     dVAR;
5822     if (off >= 0) {
5823 	while (off-- && s < lim) {
5824 	    /* XXX could check well-formedness here */
5825 	    s += UTF8SKIP(s);
5826 	}
5827 	if (off >= 0)
5828 	    return NULL;
5829     }
5830     else {
5831         while (off++ && s > lim) {
5832             s--;
5833             if (UTF8_IS_CONTINUED(*s)) {
5834                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5835                     s--;
5836 	    }
5837             /* XXX could check well-formedness here */
5838 	}
5839 	if (off <= 0)
5840 	    return NULL;
5841     }
5842     return s;
5843 }
5844 
5845 static void
5846 restore_pos(pTHX_ void *arg)
5847 {
5848     dVAR;
5849     regexp * const rex = (regexp *)arg;
5850     if (PL_reg_eval_set) {
5851 	if (PL_reg_oldsaved) {
5852 	    rex->subbeg = PL_reg_oldsaved;
5853 	    rex->sublen = PL_reg_oldsavedlen;
5854 #ifdef PERL_OLD_COPY_ON_WRITE
5855 	    rex->saved_copy = PL_nrs;
5856 #endif
5857 	    RX_MATCH_COPIED_on(rex);
5858 	}
5859 	PL_reg_magic->mg_len = PL_reg_oldpos;
5860 	PL_reg_eval_set = 0;
5861 	PL_curpm = PL_reg_oldcurpm;
5862     }
5863 }
5864 
5865 STATIC void
5866 S_to_utf8_substr(pTHX_ register regexp *prog)
5867 {
5868     int i = 1;
5869     do {
5870 	if (prog->substrs->data[i].substr
5871 	    && !prog->substrs->data[i].utf8_substr) {
5872 	    SV* const sv = newSVsv(prog->substrs->data[i].substr);
5873 	    prog->substrs->data[i].utf8_substr = sv;
5874 	    sv_utf8_upgrade(sv);
5875 	    if (SvVALID(prog->substrs->data[i].substr)) {
5876 		const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5877 		if (flags & FBMcf_TAIL) {
5878 		    /* Trim the trailing \n that fbm_compile added last
5879 		       time.  */
5880 		    SvCUR_set(sv, SvCUR(sv) - 1);
5881 		    /* Whilst this makes the SV technically "invalid" (as its
5882 		       buffer is no longer followed by "\0") when fbm_compile()
5883 		       adds the "\n" back, a "\0" is restored.  */
5884 		}
5885 		fbm_compile(sv, flags);
5886 	    }
5887 	    if (prog->substrs->data[i].substr == prog->check_substr)
5888 		prog->check_utf8 = sv;
5889 	}
5890     } while (i--);
5891 }
5892 
5893 STATIC void
5894 S_to_byte_substr(pTHX_ register regexp *prog)
5895 {
5896     dVAR;
5897     int i = 1;
5898     do {
5899 	if (prog->substrs->data[i].utf8_substr
5900 	    && !prog->substrs->data[i].substr) {
5901 	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5902 	    if (sv_utf8_downgrade(sv, TRUE)) {
5903 		if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5904 		    const U8 flags
5905 			= BmFLAGS(prog->substrs->data[i].utf8_substr);
5906 		    if (flags & FBMcf_TAIL) {
5907 			/* Trim the trailing \n that fbm_compile added last
5908 			   time.  */
5909 			SvCUR_set(sv, SvCUR(sv) - 1);
5910 		    }
5911 		    fbm_compile(sv, flags);
5912 		}
5913 	    } else {
5914 		SvREFCNT_dec(sv);
5915 		sv = &PL_sv_undef;
5916 	    }
5917 	    prog->substrs->data[i].substr = sv;
5918 	    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
5919 		prog->check_substr = sv;
5920 	}
5921     } while (i--);
5922 }
5923 
5924 /*
5925  * Local variables:
5926  * c-indentation-style: bsd
5927  * c-basic-offset: 4
5928  * indent-tabs-mode: t
5929  * End:
5930  *
5931  * ex: set ts=8 sts=4 sw=4 noet:
5932  */
5933